LLVM  6.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Intrinsics.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/Metadata.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Statepoint.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/IR/ValueHandle.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/KnownBits.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <cstring>
55 #include <vector>
56 
57 using namespace llvm;
58 using namespace PatternMatch;
59 
60 #define DEBUG_TYPE "instcombine"
61 
62 STATISTIC(NumSimplified, "Number of library calls simplified");
63 
65  "unfold-element-atomic-memcpy-max-elements",
66  cl::init(16),
67  cl::desc("Maximum number of elements in atomic memcpy the optimizer is "
68  "allowed to unfold"));
69 
70 /// Return the specified type promoted as it would be to pass though a va_arg
71 /// area.
72 static Type *getPromotedType(Type *Ty) {
73  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
74  if (ITy->getBitWidth() < 32)
75  return Type::getInt32Ty(Ty->getContext());
76  }
77  return Ty;
78 }
79 
80 /// Return a constant boolean vector that has true elements in all positions
81 /// where the input constant data vector has an element with the sign bit set.
84  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
85  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
86  Constant *Elt = V->getElementAsConstant(I);
87  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
88  "Unexpected constant data vector element type");
89  bool Sign = V->getElementType()->isIntegerTy()
90  ? cast<ConstantInt>(Elt)->isNegative()
91  : cast<ConstantFP>(Elt)->isNegative();
92  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
93  }
94  return ConstantVector::get(BoolVec);
95 }
96 
97 Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
99  // Try to unfold this intrinsic into sequence of explicit atomic loads and
100  // stores.
101  // First check that number of elements is compile time constant.
102  auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength());
103  if (!LengthCI)
104  return nullptr;
105 
106  // Check that there are not too many elements.
107  uint64_t LengthInBytes = LengthCI->getZExtValue();
108  uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes();
109  uint64_t NumElements = LengthInBytes / ElementSizeInBytes;
110  if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
111  return nullptr;
112 
113  // Only expand if there are elements to copy.
114  if (NumElements > 0) {
115  // Don't unfold into illegal integers
116  uint64_t ElementSizeInBits = ElementSizeInBytes * 8;
117  if (!getDataLayout().isLegalInteger(ElementSizeInBits))
118  return nullptr;
119 
120  // Cast source and destination to the correct type. Intrinsic input
121  // arguments are usually represented as i8*. Often operands will be
122  // explicitly casted to i8* and we can just strip those casts instead of
123  // inserting new ones. However it's easier to rely on other InstCombine
124  // rules which will cover trivial cases anyway.
125  Value *Src = AMI->getRawSource();
126  Value *Dst = AMI->getRawDest();
127  Type *ElementPointerType =
128  Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
129  Src->getType()->getPointerAddressSpace());
130 
131  Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType,
132  "memcpy_unfold.src_casted");
133  Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType,
134  "memcpy_unfold.dst_casted");
135 
136  for (uint64_t i = 0; i < NumElements; ++i) {
137  // Get current element addresses
138  ConstantInt *ElementIdxCI =
139  ConstantInt::get(AMI->getContext(), APInt(64, i));
140  Value *SrcElementAddr =
141  Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
142  Value *DstElementAddr =
143  Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
144 
145  // Load from the source. Transfer alignment information and mark load as
146  // unordered atomic.
147  LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val");
149  // We know alignment of the first element. It is also guaranteed by the
150  // verifier that element size is less or equal than first element
151  // alignment and both of this values are powers of two. This means that
152  // all subsequent accesses are at least element size aligned.
153  // TODO: We can infer better alignment but there is no evidence that this
154  // will matter.
155  Load->setAlignment(i == 0 ? AMI->getParamAlignment(1)
156  : ElementSizeInBytes);
157  Load->setDebugLoc(AMI->getDebugLoc());
158 
159  // Store loaded value via unordered atomic store.
160  StoreInst *Store = Builder.CreateStore(Load, DstElementAddr);
162  Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
163  : ElementSizeInBytes);
164  Store->setDebugLoc(AMI->getDebugLoc());
165  }
166  }
167 
168  // Set the number of elements of the copy to 0, it will be deleted on the
169  // next iteration.
170  AMI->setLength(Constant::getNullValue(LengthCI->getType()));
171  return AMI;
172 }
173 
174 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
175  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
176  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
177  unsigned MinAlign = std::min(DstAlign, SrcAlign);
178  unsigned CopyAlign = MI->getAlignment();
179 
180  if (CopyAlign < MinAlign) {
181  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
182  return MI;
183  }
184 
185  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
186  // load/store.
187  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
188  if (!MemOpLength) return nullptr;
189 
190  // Source and destination pointer types are always "i8*" for intrinsic. See
191  // if the size is something we can handle with a single primitive load/store.
192  // A single load+store correctly handles overlapping memory in the memmove
193  // case.
194  uint64_t Size = MemOpLength->getLimitedValue();
195  assert(Size && "0-sized memory transferring should be removed already.");
196 
197  if (Size > 8 || (Size&(Size-1)))
198  return nullptr; // If not 1/2/4/8 bytes, exit.
199 
200  // Use an integer load+store unless we can find something better.
201  unsigned SrcAddrSp =
202  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
203  unsigned DstAddrSp =
204  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
205 
206  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
207  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
208  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
209 
210  // If the memcpy has metadata describing the members, see if we can get the
211  // TBAA tag describing our copy.
212  MDNode *CopyMD = nullptr;
213  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
214  if (M->getNumOperands() == 3 && M->getOperand(0) &&
215  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
216  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
217  M->getOperand(1) &&
218  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
219  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
220  Size &&
221  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
222  CopyMD = cast<MDNode>(M->getOperand(2));
223  }
224 
225  // If the memcpy/memmove provides better alignment info than we can
226  // infer, use it.
227  SrcAlign = std::max(SrcAlign, CopyAlign);
228  DstAlign = std::max(DstAlign, CopyAlign);
229 
230  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
231  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
232  LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile());
233  L->setAlignment(SrcAlign);
234  if (CopyMD)
235  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
236  MDNode *LoopMemParallelMD =
238  if (LoopMemParallelMD)
240 
241  StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile());
242  S->setAlignment(DstAlign);
243  if (CopyMD)
244  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
245  if (LoopMemParallelMD)
247 
248  // Set the size of the copy to 0, it will be deleted on the next iteration.
249  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
250  return MI;
251 }
252 
253 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
254  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
255  if (MI->getAlignment() < Alignment) {
256  MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
257  Alignment, false));
258  return MI;
259  }
260 
261  // Extract the length and alignment and fill if they are constant.
262  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
263  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
264  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
265  return nullptr;
266  uint64_t Len = LenC->getLimitedValue();
267  Alignment = MI->getAlignment();
268  assert(Len && "0-sized memory setting should be removed already.");
269 
270  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
271  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
272  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
273 
274  Value *Dest = MI->getDest();
275  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
276  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
277  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
278 
279  // Alignment 0 is identity for alignment 1 for memset, but not store.
280  if (Alignment == 0) Alignment = 1;
281 
282  // Extract the fill value and store.
283  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
284  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
285  MI->isVolatile());
286  S->setAlignment(Alignment);
287 
288  // Set the size of the copy to 0, it will be deleted on the next iteration.
289  MI->setLength(Constant::getNullValue(LenC->getType()));
290  return MI;
291  }
292 
293  return nullptr;
294 }
295 
297  InstCombiner::BuilderTy &Builder) {
298  bool LogicalShift = false;
299  bool ShiftLeft = false;
300 
301  switch (II.getIntrinsicID()) {
302  default: llvm_unreachable("Unexpected intrinsic!");
303  case Intrinsic::x86_sse2_psra_d:
304  case Intrinsic::x86_sse2_psra_w:
305  case Intrinsic::x86_sse2_psrai_d:
306  case Intrinsic::x86_sse2_psrai_w:
307  case Intrinsic::x86_avx2_psra_d:
308  case Intrinsic::x86_avx2_psra_w:
309  case Intrinsic::x86_avx2_psrai_d:
310  case Intrinsic::x86_avx2_psrai_w:
311  case Intrinsic::x86_avx512_psra_q_128:
312  case Intrinsic::x86_avx512_psrai_q_128:
313  case Intrinsic::x86_avx512_psra_q_256:
314  case Intrinsic::x86_avx512_psrai_q_256:
315  case Intrinsic::x86_avx512_psra_d_512:
316  case Intrinsic::x86_avx512_psra_q_512:
317  case Intrinsic::x86_avx512_psra_w_512:
318  case Intrinsic::x86_avx512_psrai_d_512:
319  case Intrinsic::x86_avx512_psrai_q_512:
320  case Intrinsic::x86_avx512_psrai_w_512:
321  LogicalShift = false; ShiftLeft = false;
322  break;
323  case Intrinsic::x86_sse2_psrl_d:
324  case Intrinsic::x86_sse2_psrl_q:
325  case Intrinsic::x86_sse2_psrl_w:
326  case Intrinsic::x86_sse2_psrli_d:
327  case Intrinsic::x86_sse2_psrli_q:
328  case Intrinsic::x86_sse2_psrli_w:
329  case Intrinsic::x86_avx2_psrl_d:
330  case Intrinsic::x86_avx2_psrl_q:
331  case Intrinsic::x86_avx2_psrl_w:
332  case Intrinsic::x86_avx2_psrli_d:
333  case Intrinsic::x86_avx2_psrli_q:
334  case Intrinsic::x86_avx2_psrli_w:
335  case Intrinsic::x86_avx512_psrl_d_512:
336  case Intrinsic::x86_avx512_psrl_q_512:
337  case Intrinsic::x86_avx512_psrl_w_512:
338  case Intrinsic::x86_avx512_psrli_d_512:
339  case Intrinsic::x86_avx512_psrli_q_512:
340  case Intrinsic::x86_avx512_psrli_w_512:
341  LogicalShift = true; ShiftLeft = false;
342  break;
343  case Intrinsic::x86_sse2_psll_d:
344  case Intrinsic::x86_sse2_psll_q:
345  case Intrinsic::x86_sse2_psll_w:
346  case Intrinsic::x86_sse2_pslli_d:
347  case Intrinsic::x86_sse2_pslli_q:
348  case Intrinsic::x86_sse2_pslli_w:
349  case Intrinsic::x86_avx2_psll_d:
350  case Intrinsic::x86_avx2_psll_q:
351  case Intrinsic::x86_avx2_psll_w:
352  case Intrinsic::x86_avx2_pslli_d:
353  case Intrinsic::x86_avx2_pslli_q:
354  case Intrinsic::x86_avx2_pslli_w:
355  case Intrinsic::x86_avx512_psll_d_512:
356  case Intrinsic::x86_avx512_psll_q_512:
357  case Intrinsic::x86_avx512_psll_w_512:
358  case Intrinsic::x86_avx512_pslli_d_512:
359  case Intrinsic::x86_avx512_pslli_q_512:
360  case Intrinsic::x86_avx512_pslli_w_512:
361  LogicalShift = true; ShiftLeft = true;
362  break;
363  }
364  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
365 
366  // Simplify if count is constant.
367  auto Arg1 = II.getArgOperand(1);
368  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
369  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
370  auto CInt = dyn_cast<ConstantInt>(Arg1);
371  if (!CAZ && !CDV && !CInt)
372  return nullptr;
373 
374  APInt Count(64, 0);
375  if (CDV) {
376  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
377  // operand to compute the shift amount.
378  auto VT = cast<VectorType>(CDV->getType());
379  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
380  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
381  unsigned NumSubElts = 64 / BitWidth;
382 
383  // Concatenate the sub-elements to create the 64-bit value.
384  for (unsigned i = 0; i != NumSubElts; ++i) {
385  unsigned SubEltIdx = (NumSubElts - 1) - i;
386  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
387  Count <<= BitWidth;
388  Count |= SubElt->getValue().zextOrTrunc(64);
389  }
390  }
391  else if (CInt)
392  Count = CInt->getValue();
393 
394  auto Vec = II.getArgOperand(0);
395  auto VT = cast<VectorType>(Vec->getType());
396  auto SVT = VT->getElementType();
397  unsigned VWidth = VT->getNumElements();
398  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
399 
400  // If shift-by-zero then just return the original value.
401  if (Count.isNullValue())
402  return Vec;
403 
404  // Handle cases when Shift >= BitWidth.
405  if (Count.uge(BitWidth)) {
406  // If LogicalShift - just return zero.
407  if (LogicalShift)
408  return ConstantAggregateZero::get(VT);
409 
410  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
411  Count = APInt(64, BitWidth - 1);
412  }
413 
414  // Get a constant vector of the same type as the first operand.
415  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
416  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
417 
418  if (ShiftLeft)
419  return Builder.CreateShl(Vec, ShiftVec);
420 
421  if (LogicalShift)
422  return Builder.CreateLShr(Vec, ShiftVec);
423 
424  return Builder.CreateAShr(Vec, ShiftVec);
425 }
426 
427 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
428 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
429 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
431  InstCombiner::BuilderTy &Builder) {
432  bool LogicalShift = false;
433  bool ShiftLeft = false;
434 
435  switch (II.getIntrinsicID()) {
436  default: llvm_unreachable("Unexpected intrinsic!");
437  case Intrinsic::x86_avx2_psrav_d:
438  case Intrinsic::x86_avx2_psrav_d_256:
439  case Intrinsic::x86_avx512_psrav_q_128:
440  case Intrinsic::x86_avx512_psrav_q_256:
441  case Intrinsic::x86_avx512_psrav_d_512:
442  case Intrinsic::x86_avx512_psrav_q_512:
443  case Intrinsic::x86_avx512_psrav_w_128:
444  case Intrinsic::x86_avx512_psrav_w_256:
445  case Intrinsic::x86_avx512_psrav_w_512:
446  LogicalShift = false;
447  ShiftLeft = false;
448  break;
449  case Intrinsic::x86_avx2_psrlv_d:
450  case Intrinsic::x86_avx2_psrlv_d_256:
451  case Intrinsic::x86_avx2_psrlv_q:
452  case Intrinsic::x86_avx2_psrlv_q_256:
453  case Intrinsic::x86_avx512_psrlv_d_512:
454  case Intrinsic::x86_avx512_psrlv_q_512:
455  case Intrinsic::x86_avx512_psrlv_w_128:
456  case Intrinsic::x86_avx512_psrlv_w_256:
457  case Intrinsic::x86_avx512_psrlv_w_512:
458  LogicalShift = true;
459  ShiftLeft = false;
460  break;
461  case Intrinsic::x86_avx2_psllv_d:
462  case Intrinsic::x86_avx2_psllv_d_256:
463  case Intrinsic::x86_avx2_psllv_q:
464  case Intrinsic::x86_avx2_psllv_q_256:
465  case Intrinsic::x86_avx512_psllv_d_512:
466  case Intrinsic::x86_avx512_psllv_q_512:
467  case Intrinsic::x86_avx512_psllv_w_128:
468  case Intrinsic::x86_avx512_psllv_w_256:
469  case Intrinsic::x86_avx512_psllv_w_512:
470  LogicalShift = true;
471  ShiftLeft = true;
472  break;
473  }
474  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
475 
476  // Simplify if all shift amounts are constant/undef.
477  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
478  if (!CShift)
479  return nullptr;
480 
481  auto Vec = II.getArgOperand(0);
482  auto VT = cast<VectorType>(II.getType());
483  auto SVT = VT->getVectorElementType();
484  int NumElts = VT->getNumElements();
485  int BitWidth = SVT->getIntegerBitWidth();
486 
487  // Collect each element's shift amount.
488  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
489  bool AnyOutOfRange = false;
490  SmallVector<int, 8> ShiftAmts;
491  for (int I = 0; I < NumElts; ++I) {
492  auto *CElt = CShift->getAggregateElement(I);
493  if (CElt && isa<UndefValue>(CElt)) {
494  ShiftAmts.push_back(-1);
495  continue;
496  }
497 
498  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
499  if (!COp)
500  return nullptr;
501 
502  // Handle out of range shifts.
503  // If LogicalShift - set to BitWidth (special case).
504  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
505  APInt ShiftVal = COp->getValue();
506  if (ShiftVal.uge(BitWidth)) {
507  AnyOutOfRange = LogicalShift;
508  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
509  continue;
510  }
511 
512  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
513  }
514 
515  // If all elements out of range or UNDEF, return vector of zeros/undefs.
516  // ArithmeticShift should only hit this if they are all UNDEF.
517  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
518  if (all_of(ShiftAmts, OutOfRange)) {
519  SmallVector<Constant *, 8> ConstantVec;
520  for (int Idx : ShiftAmts) {
521  if (Idx < 0) {
522  ConstantVec.push_back(UndefValue::get(SVT));
523  } else {
524  assert(LogicalShift && "Logical shift expected");
525  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
526  }
527  }
528  return ConstantVector::get(ConstantVec);
529  }
530 
531  // We can't handle only some out of range values with generic logical shifts.
532  if (AnyOutOfRange)
533  return nullptr;
534 
535  // Build the shift amount constant vector.
536  SmallVector<Constant *, 8> ShiftVecAmts;
537  for (int Idx : ShiftAmts) {
538  if (Idx < 0)
539  ShiftVecAmts.push_back(UndefValue::get(SVT));
540  else
541  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
542  }
543  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
544 
545  if (ShiftLeft)
546  return Builder.CreateShl(Vec, ShiftVec);
547 
548  if (LogicalShift)
549  return Builder.CreateLShr(Vec, ShiftVec);
550 
551  return Builder.CreateAShr(Vec, ShiftVec);
552 }
553 
555  InstCombiner::BuilderTy &Builder) {
556  Value *Arg0 = II.getArgOperand(0);
557  Value *Arg1 = II.getArgOperand(1);
558  Type *ResTy = II.getType();
559  assert(Arg0->getType()->getScalarSizeInBits() == 32 &&
560  Arg1->getType()->getScalarSizeInBits() == 32 &&
561  ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types");
562 
563  // muldq/muludq(undef, undef) -> zero (matches generic mul behavior)
564  if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
565  return ConstantAggregateZero::get(ResTy);
566 
567  // Constant folding.
568  // PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)),
569  // vXi64 sext(shuffle<0,2,..>(Arg1))))
570  // PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)),
571  // vXi64 zext(shuffle<0,2,..>(Arg1))))
572  if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
573  return nullptr;
574 
575  unsigned NumElts = ResTy->getVectorNumElements();
576  assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) &&
577  Arg1->getType()->getVectorNumElements() == (2 * NumElts) &&
578  "Unexpected muldq/muludq types");
579 
580  unsigned IntrinsicID = II.getIntrinsicID();
581  bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID ||
582  Intrinsic::x86_avx2_pmul_dq == IntrinsicID ||
583  Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID);
584 
585  SmallVector<unsigned, 16> ShuffleMask;
586  for (unsigned i = 0; i != NumElts; ++i)
587  ShuffleMask.push_back(i * 2);
588 
589  auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask);
590  auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask);
591 
592  if (IsSigned) {
593  LHS = Builder.CreateSExt(LHS, ResTy);
594  RHS = Builder.CreateSExt(RHS, ResTy);
595  } else {
596  LHS = Builder.CreateZExt(LHS, ResTy);
597  RHS = Builder.CreateZExt(RHS, ResTy);
598  }
599 
600  return Builder.CreateMul(LHS, RHS);
601 }
602 
603 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
604  Value *Arg0 = II.getArgOperand(0);
605  Value *Arg1 = II.getArgOperand(1);
606  Type *ResTy = II.getType();
607 
608  // Fast all undef handling.
609  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
610  return UndefValue::get(ResTy);
611 
612  Type *ArgTy = Arg0->getType();
613  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
614  unsigned NumDstElts = ResTy->getVectorNumElements();
615  unsigned NumSrcElts = ArgTy->getVectorNumElements();
616  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
617 
618  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
619  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
620  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
621  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
622  "Unexpected packing types");
623 
624  // Constant folding.
625  auto *Cst0 = dyn_cast<Constant>(Arg0);
626  auto *Cst1 = dyn_cast<Constant>(Arg1);
627  if (!Cst0 || !Cst1)
628  return nullptr;
629 
631  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
632  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
633  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
634  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
635  auto *COp = Cst->getAggregateElement(SrcIdx);
636  if (COp && isa<UndefValue>(COp)) {
637  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
638  continue;
639  }
640 
641  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
642  if (!CInt)
643  return nullptr;
644 
645  APInt Val = CInt->getValue();
646  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
647  "Unexpected constant bitwidth");
648 
649  if (IsSigned) {
650  // PACKSS: Truncate signed value with signed saturation.
651  // Source values less than dst minint are saturated to minint.
652  // Source values greater than dst maxint are saturated to maxint.
653  if (Val.isSignedIntN(DstScalarSizeInBits))
654  Val = Val.trunc(DstScalarSizeInBits);
655  else if (Val.isNegative())
656  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
657  else
658  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
659  } else {
660  // PACKUS: Truncate signed value with unsigned saturation.
661  // Source values less than zero are saturated to zero.
662  // Source values greater than dst maxuint are saturated to maxuint.
663  if (Val.isIntN(DstScalarSizeInBits))
664  Val = Val.trunc(DstScalarSizeInBits);
665  else if (Val.isNegative())
666  Val = APInt::getNullValue(DstScalarSizeInBits);
667  else
668  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
669  }
670 
671  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
672  }
673  }
674 
675  return ConstantVector::get(Vals);
676 }
677 
679  Value *Arg = II.getArgOperand(0);
680  Type *ResTy = II.getType();
681  Type *ArgTy = Arg->getType();
682 
683  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
684  if (isa<UndefValue>(Arg))
685  return Constant::getNullValue(ResTy);
686 
687  // We can't easily peek through x86_mmx types.
688  if (!ArgTy->isVectorTy())
689  return nullptr;
690 
691  auto *C = dyn_cast<Constant>(Arg);
692  if (!C)
693  return nullptr;
694 
695  // Extract signbits of the vector input and pack into integer result.
696  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
697  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
698  auto *COp = C->getAggregateElement(I);
699  if (!COp)
700  return nullptr;
701  if (isa<UndefValue>(COp))
702  continue;
703 
704  auto *CInt = dyn_cast<ConstantInt>(COp);
705  auto *CFp = dyn_cast<ConstantFP>(COp);
706  if (!CInt && !CFp)
707  return nullptr;
708 
709  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
710  Result.setBit(I);
711  }
712 
713  return Constant::getIntegerValue(ResTy, Result);
714 }
715 
717  InstCombiner::BuilderTy &Builder) {
718  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
719  if (!CInt)
720  return nullptr;
721 
722  VectorType *VecTy = cast<VectorType>(II.getType());
723  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
724 
725  // The immediate permute control byte looks like this:
726  // [3:0] - zero mask for each 32-bit lane
727  // [5:4] - select one 32-bit destination lane
728  // [7:6] - select one 32-bit source lane
729 
730  uint8_t Imm = CInt->getZExtValue();
731  uint8_t ZMask = Imm & 0xf;
732  uint8_t DestLane = (Imm >> 4) & 0x3;
733  uint8_t SourceLane = (Imm >> 6) & 0x3;
734 
736 
737  // If all zero mask bits are set, this was just a weird way to
738  // generate a zero vector.
739  if (ZMask == 0xf)
740  return ZeroVector;
741 
742  // Initialize by passing all of the first source bits through.
743  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
744 
745  // We may replace the second operand with the zero vector.
746  Value *V1 = II.getArgOperand(1);
747 
748  if (ZMask) {
749  // If the zero mask is being used with a single input or the zero mask
750  // overrides the destination lane, this is a shuffle with the zero vector.
751  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
752  (ZMask & (1 << DestLane))) {
753  V1 = ZeroVector;
754  // We may still move 32-bits of the first source vector from one lane
755  // to another.
756  ShuffleMask[DestLane] = SourceLane;
757  // The zero mask may override the previous insert operation.
758  for (unsigned i = 0; i < 4; ++i)
759  if ((ZMask >> i) & 0x1)
760  ShuffleMask[i] = i + 4;
761  } else {
762  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
763  return nullptr;
764  }
765  } else {
766  // Replace the selected destination lane with the selected source lane.
767  ShuffleMask[DestLane] = SourceLane + 4;
768  }
769 
770  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
771 }
772 
773 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
774 /// or conversion to a shuffle vector.
776  ConstantInt *CILength, ConstantInt *CIIndex,
777  InstCombiner::BuilderTy &Builder) {
778  auto LowConstantHighUndef = [&](uint64_t Val) {
779  Type *IntTy64 = Type::getInt64Ty(II.getContext());
780  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
781  UndefValue::get(IntTy64)};
782  return ConstantVector::get(Args);
783  };
784 
785  // See if we're dealing with constant values.
786  Constant *C0 = dyn_cast<Constant>(Op0);
787  ConstantInt *CI0 =
788  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
789  : nullptr;
790 
791  // Attempt to constant fold.
792  if (CILength && CIIndex) {
793  // From AMD documentation: "The bit index and field length are each six
794  // bits in length other bits of the field are ignored."
795  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
796  APInt APLength = CILength->getValue().zextOrTrunc(6);
797 
798  unsigned Index = APIndex.getZExtValue();
799 
800  // From AMD documentation: "a value of zero in the field length is
801  // defined as length of 64".
802  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
803 
804  // From AMD documentation: "If the sum of the bit index + length field
805  // is greater than 64, the results are undefined".
806  unsigned End = Index + Length;
807 
808  // Note that both field index and field length are 8-bit quantities.
809  // Since variables 'Index' and 'Length' are unsigned values
810  // obtained from zero-extending field index and field length
811  // respectively, their sum should never wrap around.
812  if (End > 64)
813  return UndefValue::get(II.getType());
814 
815  // If we are inserting whole bytes, we can convert this to a shuffle.
816  // Lowering can recognize EXTRQI shuffle masks.
817  if ((Length % 8) == 0 && (Index % 8) == 0) {
818  // Convert bit indices to byte indices.
819  Length /= 8;
820  Index /= 8;
821 
822  Type *IntTy8 = Type::getInt8Ty(II.getContext());
823  Type *IntTy32 = Type::getInt32Ty(II.getContext());
824  VectorType *ShufTy = VectorType::get(IntTy8, 16);
825 
826  SmallVector<Constant *, 16> ShuffleMask;
827  for (int i = 0; i != (int)Length; ++i)
828  ShuffleMask.push_back(
829  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
830  for (int i = Length; i != 8; ++i)
831  ShuffleMask.push_back(
832  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
833  for (int i = 8; i != 16; ++i)
834  ShuffleMask.push_back(UndefValue::get(IntTy32));
835 
836  Value *SV = Builder.CreateShuffleVector(
837  Builder.CreateBitCast(Op0, ShufTy),
838  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
839  return Builder.CreateBitCast(SV, II.getType());
840  }
841 
842  // Constant Fold - shift Index'th bit to lowest position and mask off
843  // Length bits.
844  if (CI0) {
845  APInt Elt = CI0->getValue();
846  Elt.lshrInPlace(Index);
847  Elt = Elt.zextOrTrunc(Length);
848  return LowConstantHighUndef(Elt.getZExtValue());
849  }
850 
851  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
852  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
853  Value *Args[] = {Op0, CILength, CIIndex};
854  Module *M = II.getModule();
855  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
856  return Builder.CreateCall(F, Args);
857  }
858  }
859 
860  // Constant Fold - extraction from zero is always {zero, undef}.
861  if (CI0 && CI0->isZero())
862  return LowConstantHighUndef(0);
863 
864  return nullptr;
865 }
866 
867 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
868 /// folding or conversion to a shuffle vector.
870  APInt APLength, APInt APIndex,
871  InstCombiner::BuilderTy &Builder) {
872  // From AMD documentation: "The bit index and field length are each six bits
873  // in length other bits of the field are ignored."
874  APIndex = APIndex.zextOrTrunc(6);
875  APLength = APLength.zextOrTrunc(6);
876 
877  // Attempt to constant fold.
878  unsigned Index = APIndex.getZExtValue();
879 
880  // From AMD documentation: "a value of zero in the field length is
881  // defined as length of 64".
882  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
883 
884  // From AMD documentation: "If the sum of the bit index + length field
885  // is greater than 64, the results are undefined".
886  unsigned End = Index + Length;
887 
888  // Note that both field index and field length are 8-bit quantities.
889  // Since variables 'Index' and 'Length' are unsigned values
890  // obtained from zero-extending field index and field length
891  // respectively, their sum should never wrap around.
892  if (End > 64)
893  return UndefValue::get(II.getType());
894 
895  // If we are inserting whole bytes, we can convert this to a shuffle.
896  // Lowering can recognize INSERTQI shuffle masks.
897  if ((Length % 8) == 0 && (Index % 8) == 0) {
898  // Convert bit indices to byte indices.
899  Length /= 8;
900  Index /= 8;
901 
902  Type *IntTy8 = Type::getInt8Ty(II.getContext());
903  Type *IntTy32 = Type::getInt32Ty(II.getContext());
904  VectorType *ShufTy = VectorType::get(IntTy8, 16);
905 
906  SmallVector<Constant *, 16> ShuffleMask;
907  for (int i = 0; i != (int)Index; ++i)
908  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
909  for (int i = 0; i != (int)Length; ++i)
910  ShuffleMask.push_back(
911  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
912  for (int i = Index + Length; i != 8; ++i)
913  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
914  for (int i = 8; i != 16; ++i)
915  ShuffleMask.push_back(UndefValue::get(IntTy32));
916 
917  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
918  Builder.CreateBitCast(Op1, ShufTy),
919  ConstantVector::get(ShuffleMask));
920  return Builder.CreateBitCast(SV, II.getType());
921  }
922 
923  // See if we're dealing with constant values.
924  Constant *C0 = dyn_cast<Constant>(Op0);
925  Constant *C1 = dyn_cast<Constant>(Op1);
926  ConstantInt *CI00 =
927  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
928  : nullptr;
929  ConstantInt *CI10 =
930  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
931  : nullptr;
932 
933  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
934  if (CI00 && CI10) {
935  APInt V00 = CI00->getValue();
936  APInt V10 = CI10->getValue();
937  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
938  V00 = V00 & ~Mask;
939  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
940  APInt Val = V00 | V10;
941  Type *IntTy64 = Type::getInt64Ty(II.getContext());
942  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
943  UndefValue::get(IntTy64)};
944  return ConstantVector::get(Args);
945  }
946 
947  // If we were an INSERTQ call, we'll save demanded elements if we convert to
948  // INSERTQI.
949  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
950  Type *IntTy8 = Type::getInt8Ty(II.getContext());
951  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
952  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
953 
954  Value *Args[] = {Op0, Op1, CILength, CIIndex};
955  Module *M = II.getModule();
956  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
957  return Builder.CreateCall(F, Args);
958  }
959 
960  return nullptr;
961 }
962 
963 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
965  InstCombiner::BuilderTy &Builder) {
967  if (!V)
968  return nullptr;
969 
970  auto *VecTy = cast<VectorType>(II.getType());
971  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
972  unsigned NumElts = VecTy->getNumElements();
973  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
974  "Unexpected number of elements in shuffle mask!");
975 
976  // Construct a shuffle mask from constant integers or UNDEFs.
977  Constant *Indexes[64] = {nullptr};
978 
979  // Each byte in the shuffle control mask forms an index to permute the
980  // corresponding byte in the destination operand.
981  for (unsigned I = 0; I < NumElts; ++I) {
982  Constant *COp = V->getAggregateElement(I);
983  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
984  return nullptr;
985 
986  if (isa<UndefValue>(COp)) {
987  Indexes[I] = UndefValue::get(MaskEltTy);
988  continue;
989  }
990 
991  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
992 
993  // If the most significant bit (bit[7]) of each byte of the shuffle
994  // control mask is set, then zero is written in the result byte.
995  // The zero vector is in the right-hand side of the resulting
996  // shufflevector.
997 
998  // The value of each index for the high 128-bit lane is the least
999  // significant 4 bits of the respective shuffle control byte.
1000  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1001  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1002  }
1003 
1004  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1005  auto V1 = II.getArgOperand(0);
1006  auto V2 = Constant::getNullValue(VecTy);
1007  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1008 }
1009 
1010 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1012  InstCombiner::BuilderTy &Builder) {
1013  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1014  if (!V)
1015  return nullptr;
1016 
1017  auto *VecTy = cast<VectorType>(II.getType());
1018  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1019  unsigned NumElts = VecTy->getVectorNumElements();
1020  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1021  unsigned NumLaneElts = IsPD ? 2 : 4;
1022  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1023 
1024  // Construct a shuffle mask from constant integers or UNDEFs.
1025  Constant *Indexes[16] = {nullptr};
1026 
1027  // The intrinsics only read one or two bits, clear the rest.
1028  for (unsigned I = 0; I < NumElts; ++I) {
1029  Constant *COp = V->getAggregateElement(I);
1030  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1031  return nullptr;
1032 
1033  if (isa<UndefValue>(COp)) {
1034  Indexes[I] = UndefValue::get(MaskEltTy);
1035  continue;
1036  }
1037 
1038  APInt Index = cast<ConstantInt>(COp)->getValue();
1039  Index = Index.zextOrTrunc(32).getLoBits(2);
1040 
1041  // The PD variants uses bit 1 to select per-lane element index, so
1042  // shift down to convert to generic shuffle mask index.
1043  if (IsPD)
1044  Index.lshrInPlace(1);
1045 
1046  // The _256 variants are a bit trickier since the mask bits always index
1047  // into the corresponding 128 half. In order to convert to a generic
1048  // shuffle, we have to make that explicit.
1049  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1050 
1051  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1052  }
1053 
1054  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1055  auto V1 = II.getArgOperand(0);
1056  auto V2 = UndefValue::get(V1->getType());
1057  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1058 }
1059 
1060 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1062  InstCombiner::BuilderTy &Builder) {
1063  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1064  if (!V)
1065  return nullptr;
1066 
1067  auto *VecTy = cast<VectorType>(II.getType());
1068  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1069  unsigned Size = VecTy->getNumElements();
1070  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1071  "Unexpected shuffle mask size");
1072 
1073  // Construct a shuffle mask from constant integers or UNDEFs.
1074  Constant *Indexes[64] = {nullptr};
1075 
1076  for (unsigned I = 0; I < Size; ++I) {
1077  Constant *COp = V->getAggregateElement(I);
1078  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1079  return nullptr;
1080 
1081  if (isa<UndefValue>(COp)) {
1082  Indexes[I] = UndefValue::get(MaskEltTy);
1083  continue;
1084  }
1085 
1086  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1087  Index &= Size - 1;
1088  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1089  }
1090 
1091  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1092  auto V1 = II.getArgOperand(0);
1093  auto V2 = UndefValue::get(VecTy);
1094  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1095 }
1096 
1097 /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
1098 /// source vectors, unless a zero bit is set. If a zero bit is set,
1099 /// then ignore that half of the mask and clear that half of the vector.
1101  InstCombiner::BuilderTy &Builder) {
1102  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
1103  if (!CInt)
1104  return nullptr;
1105 
1106  VectorType *VecTy = cast<VectorType>(II.getType());
1108 
1109  // The immediate permute control byte looks like this:
1110  // [1:0] - select 128 bits from sources for low half of destination
1111  // [2] - ignore
1112  // [3] - zero low half of destination
1113  // [5:4] - select 128 bits from sources for high half of destination
1114  // [6] - ignore
1115  // [7] - zero high half of destination
1116 
1117  uint8_t Imm = CInt->getZExtValue();
1118 
1119  bool LowHalfZero = Imm & 0x08;
1120  bool HighHalfZero = Imm & 0x80;
1121 
1122  // If both zero mask bits are set, this was just a weird way to
1123  // generate a zero vector.
1124  if (LowHalfZero && HighHalfZero)
1125  return ZeroVector;
1126 
1127  // If 0 or 1 zero mask bits are set, this is a simple shuffle.
1128  unsigned NumElts = VecTy->getNumElements();
1129  unsigned HalfSize = NumElts / 2;
1130  SmallVector<uint32_t, 8> ShuffleMask(NumElts);
1131 
1132  // The high bit of the selection field chooses the 1st or 2nd operand.
1133  bool LowInputSelect = Imm & 0x02;
1134  bool HighInputSelect = Imm & 0x20;
1135 
1136  // The low bit of the selection field chooses the low or high half
1137  // of the selected operand.
1138  bool LowHalfSelect = Imm & 0x01;
1139  bool HighHalfSelect = Imm & 0x10;
1140 
1141  // Determine which operand(s) are actually in use for this instruction.
1142  Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
1143  Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
1144 
1145  // If needed, replace operands based on zero mask.
1146  V0 = LowHalfZero ? ZeroVector : V0;
1147  V1 = HighHalfZero ? ZeroVector : V1;
1148 
1149  // Permute low half of result.
1150  unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
1151  for (unsigned i = 0; i < HalfSize; ++i)
1152  ShuffleMask[i] = StartIndex + i;
1153 
1154  // Permute high half of result.
1155  StartIndex = HighHalfSelect ? HalfSize : 0;
1156  StartIndex += NumElts;
1157  for (unsigned i = 0; i < HalfSize; ++i)
1158  ShuffleMask[i + HalfSize] = StartIndex + i;
1159 
1160  return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
1161 }
1162 
1163 /// Decode XOP integer vector comparison intrinsics.
1165  InstCombiner::BuilderTy &Builder,
1166  bool IsSigned) {
1167  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1168  uint64_t Imm = CInt->getZExtValue() & 0x7;
1169  VectorType *VecTy = cast<VectorType>(II.getType());
1171 
1172  switch (Imm) {
1173  case 0x0:
1174  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1175  break;
1176  case 0x1:
1177  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1178  break;
1179  case 0x2:
1180  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1181  break;
1182  case 0x3:
1183  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1184  break;
1185  case 0x4:
1186  Pred = ICmpInst::ICMP_EQ; break;
1187  case 0x5:
1188  Pred = ICmpInst::ICMP_NE; break;
1189  case 0x6:
1190  return ConstantInt::getSigned(VecTy, 0); // FALSE
1191  case 0x7:
1192  return ConstantInt::getSigned(VecTy, -1); // TRUE
1193  }
1194 
1195  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1196  II.getArgOperand(1)))
1197  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1198  }
1199  return nullptr;
1200 }
1201 
1202 // Emit a select instruction and appropriate bitcasts to help simplify
1203 // masked intrinsics.
1205  InstCombiner::BuilderTy &Builder) {
1206  unsigned VWidth = Op0->getType()->getVectorNumElements();
1207 
1208  // If the mask is all ones we don't need the select. But we need to check
1209  // only the bit thats will be used in case VWidth is less than 8.
1210  if (auto *C = dyn_cast<ConstantInt>(Mask))
1211  if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
1212  return Op0;
1213 
1214  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
1215  cast<IntegerType>(Mask->getType())->getBitWidth());
1216  Mask = Builder.CreateBitCast(Mask, MaskTy);
1217 
1218  // If we have less than 8 elements, then the starting mask was an i8 and
1219  // we need to extract down to the right number of elements.
1220  if (VWidth < 8) {
1221  uint32_t Indices[4];
1222  for (unsigned i = 0; i != VWidth; ++i)
1223  Indices[i] = i;
1224  Mask = Builder.CreateShuffleVector(Mask, Mask,
1225  makeArrayRef(Indices, VWidth),
1226  "extract");
1227  }
1228 
1229  return Builder.CreateSelect(Mask, Op0, Op1);
1230 }
1231 
1233  Value *Arg0 = II.getArgOperand(0);
1234  Value *Arg1 = II.getArgOperand(1);
1235 
1236  // fmin(x, x) -> x
1237  if (Arg0 == Arg1)
1238  return Arg0;
1239 
1240  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
1241 
1242  // fmin(x, nan) -> x
1243  if (C1 && C1->isNaN())
1244  return Arg0;
1245 
1246  // This is the value because if undef were NaN, we would return the other
1247  // value and cannot return a NaN unless both operands are.
1248  //
1249  // fmin(undef, x) -> x
1250  if (isa<UndefValue>(Arg0))
1251  return Arg1;
1252 
1253  // fmin(x, undef) -> x
1254  if (isa<UndefValue>(Arg1))
1255  return Arg0;
1256 
1257  Value *X = nullptr;
1258  Value *Y = nullptr;
1259  if (II.getIntrinsicID() == Intrinsic::minnum) {
1260  // fmin(x, fmin(x, y)) -> fmin(x, y)
1261  // fmin(y, fmin(x, y)) -> fmin(x, y)
1262  if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
1263  if (Arg0 == X || Arg0 == Y)
1264  return Arg1;
1265  }
1266 
1267  // fmin(fmin(x, y), x) -> fmin(x, y)
1268  // fmin(fmin(x, y), y) -> fmin(x, y)
1269  if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
1270  if (Arg1 == X || Arg1 == Y)
1271  return Arg0;
1272  }
1273 
1274  // TODO: fmin(nnan x, inf) -> x
1275  // TODO: fmin(nnan ninf x, flt_max) -> x
1276  if (C1 && C1->isInfinity()) {
1277  // fmin(x, -inf) -> -inf
1278  if (C1->isNegative())
1279  return Arg1;
1280  }
1281  } else {
1283  // fmax(x, fmax(x, y)) -> fmax(x, y)
1284  // fmax(y, fmax(x, y)) -> fmax(x, y)
1285  if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
1286  if (Arg0 == X || Arg0 == Y)
1287  return Arg1;
1288  }
1289 
1290  // fmax(fmax(x, y), x) -> fmax(x, y)
1291  // fmax(fmax(x, y), y) -> fmax(x, y)
1292  if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
1293  if (Arg1 == X || Arg1 == Y)
1294  return Arg0;
1295  }
1296 
1297  // TODO: fmax(nnan x, -inf) -> x
1298  // TODO: fmax(nnan ninf x, -flt_max) -> x
1299  if (C1 && C1->isInfinity()) {
1300  // fmax(x, inf) -> inf
1301  if (!C1->isNegative())
1302  return Arg1;
1303  }
1304  }
1305  return nullptr;
1306 }
1307 
1309  auto *ConstMask = dyn_cast<Constant>(Mask);
1310  if (!ConstMask)
1311  return false;
1312  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1313  return true;
1314  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1315  ++I) {
1316  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1317  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1318  continue;
1319  return false;
1320  }
1321  return true;
1322 }
1323 
1325  InstCombiner::BuilderTy &Builder) {
1326  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1327  // argument.
1328  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1329  Value *LoadPtr = II.getArgOperand(0);
1330  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1331  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1332  }
1333 
1334  return nullptr;
1335 }
1336 
1338  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1339  if (!ConstMask)
1340  return nullptr;
1341 
1342  // If the mask is all zeros, this instruction does nothing.
1343  if (ConstMask->isNullValue())
1344  return IC.eraseInstFromFunction(II);
1345 
1346  // If the mask is all ones, this is a plain vector store of the 1st argument.
1347  if (ConstMask->isAllOnesValue()) {
1348  Value *StorePtr = II.getArgOperand(1);
1349  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1350  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1351  }
1352 
1353  return nullptr;
1354 }
1355 
1357  // If the mask is all zeros, return the "passthru" argument of the gather.
1358  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1359  if (ConstMask && ConstMask->isNullValue())
1360  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1361 
1362  return nullptr;
1363 }
1364 
1366  // If the mask is all zeros, a scatter does nothing.
1367  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1368  if (ConstMask && ConstMask->isNullValue())
1369  return IC.eraseInstFromFunction(II);
1370 
1371  return nullptr;
1372 }
1373 
1375  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1376  II.getIntrinsicID() == Intrinsic::ctlz) &&
1377  "Expected cttz or ctlz intrinsic");
1378  Value *Op0 = II.getArgOperand(0);
1379 
1380  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1381 
1382  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1383  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1384  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1385  : Known.countMaxLeadingZeros();
1386  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1387  : Known.countMinLeadingZeros();
1388 
1389  // If all bits above (ctlz) or below (cttz) the first known one are known
1390  // zero, this value is constant.
1391  // FIXME: This should be in InstSimplify because we're replacing an
1392  // instruction with a constant.
1393  if (PossibleZeros == DefiniteZeros) {
1394  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1395  return IC.replaceInstUsesWith(II, C);
1396  }
1397 
1398  // If the input to cttz/ctlz is known to be non-zero,
1399  // then change the 'ZeroIsUndef' parameter to 'true'
1400  // because we know the zero behavior can't affect the result.
1401  if (!Known.One.isNullValue() ||
1402  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1403  &IC.getDominatorTree())) {
1404  if (!match(II.getArgOperand(1), m_One())) {
1405  II.setOperand(1, IC.Builder.getTrue());
1406  return &II;
1407  }
1408  }
1409 
1410  // Add range metadata since known bits can't completely reflect what we know.
1411  // TODO: Handle splat vectors.
1412  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1413  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1414  Metadata *LowAndHigh[] = {
1415  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1416  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1419  return &II;
1420  }
1421 
1422  return nullptr;
1423 }
1424 
1426  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1427  "Expected ctpop intrinsic");
1428  Value *Op0 = II.getArgOperand(0);
1429  // FIXME: Try to simplify vectors of integers.
1430  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1431  if (!IT)
1432  return nullptr;
1433 
1434  unsigned BitWidth = IT->getBitWidth();
1435  KnownBits Known(BitWidth);
1436  IC.computeKnownBits(Op0, Known, 0, &II);
1437 
1438  unsigned MinCount = Known.countMinPopulation();
1439  unsigned MaxCount = Known.countMaxPopulation();
1440 
1441  // Add range metadata since known bits can't completely reflect what we know.
1442  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1443  Metadata *LowAndHigh[] = {
1445  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1448  return &II;
1449  }
1450 
1451  return nullptr;
1452 }
1453 
1454 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1455 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1456 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1458  Value *Ptr = II.getOperand(0);
1459  Value *Mask = II.getOperand(1);
1460  Constant *ZeroVec = Constant::getNullValue(II.getType());
1461 
1462  // Special case a zero mask since that's not a ConstantDataVector.
1463  // This masked load instruction creates a zero vector.
1464  if (isa<ConstantAggregateZero>(Mask))
1465  return IC.replaceInstUsesWith(II, ZeroVec);
1466 
1467  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1468  if (!ConstMask)
1469  return nullptr;
1470 
1471  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1472  // to allow target-independent optimizations.
1473 
1474  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1475  // the LLVM intrinsic definition for the pointer argument.
1476  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1477  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1478  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1479 
1480  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1481  // on each element's most significant bit (the sign bit).
1482  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1483 
1484  // The pass-through vector for an x86 masked load is a zero vector.
1485  CallInst *NewMaskedLoad =
1486  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1487  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1488 }
1489 
1490 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1491 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1492 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1494  Value *Ptr = II.getOperand(0);
1495  Value *Mask = II.getOperand(1);
1496  Value *Vec = II.getOperand(2);
1497 
1498  // Special case a zero mask since that's not a ConstantDataVector:
1499  // this masked store instruction does nothing.
1500  if (isa<ConstantAggregateZero>(Mask)) {
1501  IC.eraseInstFromFunction(II);
1502  return true;
1503  }
1504 
1505  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1506  // anything else at this level.
1507  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1508  return false;
1509 
1510  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1511  if (!ConstMask)
1512  return false;
1513 
1514  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1515  // to allow target-independent optimizations.
1516 
1517  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1518  // the LLVM intrinsic definition for the pointer argument.
1519  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1520  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1521  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1522 
1523  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1524  // on each element's most significant bit (the sign bit).
1525  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1526 
1527  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1528 
1529  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1530  IC.eraseInstFromFunction(II);
1531  return true;
1532 }
1533 
1534 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1535 //
1536 // A single NaN input is folded to minnum, so we rely on that folding for
1537 // handling NaNs.
1538 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1539  const APFloat &Src2) {
1540  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1541 
1542  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1543  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1544  if (Cmp0 == APFloat::cmpEqual)
1545  return maxnum(Src1, Src2);
1546 
1547  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1548  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1549  if (Cmp1 == APFloat::cmpEqual)
1550  return maxnum(Src0, Src2);
1551 
1552  return maxnum(Src0, Src1);
1553 }
1554 
1555 // Returns true iff the 2 intrinsics have the same operands, limiting the
1556 // comparison to the first NumOperands.
1557 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1558  unsigned NumOperands) {
1559  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1560  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1561  for (unsigned i = 0; i < NumOperands; i++)
1562  if (I.getArgOperand(i) != E.getArgOperand(i))
1563  return false;
1564  return true;
1565 }
1566 
1567 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1568 // immediately followed by an end (ignoring debuginfo or other
1569 // start/end intrinsics in between). As this handles only the most trivial
1570 // cases, tracking the nesting level is not needed:
1571 //
1572 // call @llvm.foo.start(i1 0) ; &I
1573 // call @llvm.foo.start(i1 0)
1574 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1575 // call @llvm.foo.end(i1 0)
1576 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1577  unsigned EndID, InstCombiner &IC) {
1578  assert(I.getIntrinsicID() == StartID &&
1579  "Start intrinsic does not have expected ID");
1580  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1581  for (++BI; BI != BE; ++BI) {
1582  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1583  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1584  continue;
1585  if (E->getIntrinsicID() == EndID &&
1586  haveSameOperands(I, *E, E->getNumArgOperands())) {
1587  IC.eraseInstFromFunction(*E);
1588  IC.eraseInstFromFunction(I);
1589  return true;
1590  }
1591  }
1592  break;
1593  }
1594 
1595  return false;
1596 }
1597 
1598 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1600  // Each NVVM intrinsic we can simplify can be replaced with one of:
1601  //
1602  // * an LLVM intrinsic,
1603  // * an LLVM cast operation,
1604  // * an LLVM binary operation, or
1605  // * ad-hoc LLVM IR for the particular operation.
1606 
1607  // Some transformations are only valid when the module's
1608  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1609  // transformations are valid regardless of the module's ftz setting.
1610  enum FtzRequirementTy {
1611  FTZ_Any, // Any ftz setting is ok.
1612  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1613  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1614  };
1615  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1616  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1617  // simplify.
1618  enum SpecialCase {
1619  SPC_Reciprocal,
1620  };
1621 
1622  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1623  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1624  struct SimplifyAction {
1625  // Invariant: At most one of these Optionals has a value.
1629  Optional<SpecialCase> Special;
1630 
1631  FtzRequirementTy FtzRequirement = FTZ_Any;
1632 
1633  SimplifyAction() = default;
1634 
1635  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1636  : IID(IID), FtzRequirement(FtzReq) {}
1637 
1638  // Cast operations don't have anything to do with FTZ, so we skip that
1639  // argument.
1640  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1641 
1642  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1643  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1644 
1645  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1646  : Special(Special), FtzRequirement(FtzReq) {}
1647  };
1648 
1649  // Try to generate a SimplifyAction describing how to replace our
1650  // IntrinsicInstr with target-generic LLVM IR.
1651  const SimplifyAction Action = [II]() -> SimplifyAction {
1652  switch (II->getIntrinsicID()) {
1653 
1654  // NVVM intrinsics that map directly to LLVM intrinsics.
1655  case Intrinsic::nvvm_ceil_d:
1656  return {Intrinsic::ceil, FTZ_Any};
1657  case Intrinsic::nvvm_ceil_f:
1658  return {Intrinsic::ceil, FTZ_MustBeOff};
1659  case Intrinsic::nvvm_ceil_ftz_f:
1660  return {Intrinsic::ceil, FTZ_MustBeOn};
1661  case Intrinsic::nvvm_fabs_d:
1662  return {Intrinsic::fabs, FTZ_Any};
1663  case Intrinsic::nvvm_fabs_f:
1664  return {Intrinsic::fabs, FTZ_MustBeOff};
1665  case Intrinsic::nvvm_fabs_ftz_f:
1666  return {Intrinsic::fabs, FTZ_MustBeOn};
1667  case Intrinsic::nvvm_floor_d:
1668  return {Intrinsic::floor, FTZ_Any};
1669  case Intrinsic::nvvm_floor_f:
1670  return {Intrinsic::floor, FTZ_MustBeOff};
1671  case Intrinsic::nvvm_floor_ftz_f:
1672  return {Intrinsic::floor, FTZ_MustBeOn};
1673  case Intrinsic::nvvm_fma_rn_d:
1674  return {Intrinsic::fma, FTZ_Any};
1675  case Intrinsic::nvvm_fma_rn_f:
1676  return {Intrinsic::fma, FTZ_MustBeOff};
1677  case Intrinsic::nvvm_fma_rn_ftz_f:
1678  return {Intrinsic::fma, FTZ_MustBeOn};
1679  case Intrinsic::nvvm_fmax_d:
1680  return {Intrinsic::maxnum, FTZ_Any};
1681  case Intrinsic::nvvm_fmax_f:
1682  return {Intrinsic::maxnum, FTZ_MustBeOff};
1683  case Intrinsic::nvvm_fmax_ftz_f:
1684  return {Intrinsic::maxnum, FTZ_MustBeOn};
1685  case Intrinsic::nvvm_fmin_d:
1686  return {Intrinsic::minnum, FTZ_Any};
1687  case Intrinsic::nvvm_fmin_f:
1688  return {Intrinsic::minnum, FTZ_MustBeOff};
1689  case Intrinsic::nvvm_fmin_ftz_f:
1690  return {Intrinsic::minnum, FTZ_MustBeOn};
1691  case Intrinsic::nvvm_round_d:
1692  return {Intrinsic::round, FTZ_Any};
1693  case Intrinsic::nvvm_round_f:
1694  return {Intrinsic::round, FTZ_MustBeOff};
1695  case Intrinsic::nvvm_round_ftz_f:
1696  return {Intrinsic::round, FTZ_MustBeOn};
1697  case Intrinsic::nvvm_sqrt_rn_d:
1698  return {Intrinsic::sqrt, FTZ_Any};
1699  case Intrinsic::nvvm_sqrt_f:
1700  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1701  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1702  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1703  // the versions with explicit ftz-ness.
1704  return {Intrinsic::sqrt, FTZ_Any};
1705  case Intrinsic::nvvm_sqrt_rn_f:
1706  return {Intrinsic::sqrt, FTZ_MustBeOff};
1707  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1708  return {Intrinsic::sqrt, FTZ_MustBeOn};
1709  case Intrinsic::nvvm_trunc_d:
1710  return {Intrinsic::trunc, FTZ_Any};
1711  case Intrinsic::nvvm_trunc_f:
1712  return {Intrinsic::trunc, FTZ_MustBeOff};
1713  case Intrinsic::nvvm_trunc_ftz_f:
1714  return {Intrinsic::trunc, FTZ_MustBeOn};
1715 
1716  // NVVM intrinsics that map to LLVM cast operations.
1717  //
1718  // Note that llvm's target-generic conversion operators correspond to the rz
1719  // (round to zero) versions of the nvvm conversion intrinsics, even though
1720  // most everything else here uses the rn (round to nearest even) nvvm ops.
1721  case Intrinsic::nvvm_d2i_rz:
1722  case Intrinsic::nvvm_f2i_rz:
1723  case Intrinsic::nvvm_d2ll_rz:
1724  case Intrinsic::nvvm_f2ll_rz:
1725  return {Instruction::FPToSI};
1726  case Intrinsic::nvvm_d2ui_rz:
1727  case Intrinsic::nvvm_f2ui_rz:
1728  case Intrinsic::nvvm_d2ull_rz:
1729  case Intrinsic::nvvm_f2ull_rz:
1730  return {Instruction::FPToUI};
1731  case Intrinsic::nvvm_i2d_rz:
1732  case Intrinsic::nvvm_i2f_rz:
1733  case Intrinsic::nvvm_ll2d_rz:
1734  case Intrinsic::nvvm_ll2f_rz:
1735  return {Instruction::SIToFP};
1736  case Intrinsic::nvvm_ui2d_rz:
1737  case Intrinsic::nvvm_ui2f_rz:
1738  case Intrinsic::nvvm_ull2d_rz:
1739  case Intrinsic::nvvm_ull2f_rz:
1740  return {Instruction::UIToFP};
1741 
1742  // NVVM intrinsics that map to LLVM binary ops.
1743  case Intrinsic::nvvm_add_rn_d:
1744  return {Instruction::FAdd, FTZ_Any};
1745  case Intrinsic::nvvm_add_rn_f:
1746  return {Instruction::FAdd, FTZ_MustBeOff};
1747  case Intrinsic::nvvm_add_rn_ftz_f:
1748  return {Instruction::FAdd, FTZ_MustBeOn};
1749  case Intrinsic::nvvm_mul_rn_d:
1750  return {Instruction::FMul, FTZ_Any};
1751  case Intrinsic::nvvm_mul_rn_f:
1752  return {Instruction::FMul, FTZ_MustBeOff};
1753  case Intrinsic::nvvm_mul_rn_ftz_f:
1754  return {Instruction::FMul, FTZ_MustBeOn};
1755  case Intrinsic::nvvm_div_rn_d:
1756  return {Instruction::FDiv, FTZ_Any};
1757  case Intrinsic::nvvm_div_rn_f:
1758  return {Instruction::FDiv, FTZ_MustBeOff};
1759  case Intrinsic::nvvm_div_rn_ftz_f:
1760  return {Instruction::FDiv, FTZ_MustBeOn};
1761 
1762  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1763  // need special handling.
1764  //
1765  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1766  // as well.
1767  case Intrinsic::nvvm_rcp_rn_d:
1768  return {SPC_Reciprocal, FTZ_Any};
1769  case Intrinsic::nvvm_rcp_rn_f:
1770  return {SPC_Reciprocal, FTZ_MustBeOff};
1771  case Intrinsic::nvvm_rcp_rn_ftz_f:
1772  return {SPC_Reciprocal, FTZ_MustBeOn};
1773 
1774  // We do not currently simplify intrinsics that give an approximate answer.
1775  // These include:
1776  //
1777  // - nvvm_cos_approx_{f,ftz_f}
1778  // - nvvm_ex2_approx_{d,f,ftz_f}
1779  // - nvvm_lg2_approx_{d,f,ftz_f}
1780  // - nvvm_sin_approx_{f,ftz_f}
1781  // - nvvm_sqrt_approx_{f,ftz_f}
1782  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1783  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1784  // - nvvm_rcp_approx_ftz_d
1785  //
1786  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1787  // means that fastmath is enabled in the intrinsic. Unfortunately only
1788  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1789  // information gets lost and we can't select on it.
1790  //
1791  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1792  // lower them to "fast fdiv".
1793 
1794  default:
1795  return {};
1796  }
1797  }();
1798 
1799  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1800  // can bail out now. (Notice that in the case that IID is not an NVVM
1801  // intrinsic, we don't have to look up any module metadata, as
1802  // FtzRequirementTy will be FTZ_Any.)
1803  if (Action.FtzRequirement != FTZ_Any) {
1804  bool FtzEnabled =
1805  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1806  "true";
1807 
1808  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1809  return nullptr;
1810  }
1811 
1812  // Simplify to target-generic intrinsic.
1813  if (Action.IID) {
1815  // All the target-generic intrinsics currently of interest to us have one
1816  // type argument, equal to that of the nvvm intrinsic's argument.
1817  Type *Tys[] = {II->getArgOperand(0)->getType()};
1818  return CallInst::Create(
1819  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1820  }
1821 
1822  // Simplify to target-generic binary op.
1823  if (Action.BinaryOp)
1824  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1825  II->getArgOperand(1), II->getName());
1826 
1827  // Simplify to target-generic cast op.
1828  if (Action.CastOp)
1829  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1830  II->getName());
1831 
1832  // All that's left are the special cases.
1833  if (!Action.Special)
1834  return nullptr;
1835 
1836  switch (*Action.Special) {
1837  case SPC_Reciprocal:
1838  // Simplify reciprocal.
1839  return BinaryOperator::Create(
1840  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1841  II->getArgOperand(0), II->getName());
1842  }
1843  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1844 }
1845 
1847  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1848  return nullptr;
1849 }
1850 
1852  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1853  return nullptr;
1854 }
1855 
1856 /// CallInst simplification. This mostly only handles folding of intrinsic
1857 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1858 /// lifting.
1860  auto Args = CI.arg_operands();
1861  if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
1862  Args.end(), SQ.getWithInstruction(&CI)))
1863  return replaceInstUsesWith(CI, V);
1864 
1865  if (isFreeCall(&CI, &TLI))
1866  return visitFree(CI);
1867 
1868  // If the caller function is nounwind, mark the call as nounwind, even if the
1869  // callee isn't.
1870  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1871  CI.setDoesNotThrow();
1872  return &CI;
1873  }
1874 
1875  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1876  if (!II) return visitCallSite(&CI);
1877 
1878  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1879  // visitCallSite.
1880  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
1881  bool Changed = false;
1882 
1883  // memmove/cpy/set of zero bytes is a noop.
1884  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1885  if (NumBytes->isNullValue())
1886  return eraseInstFromFunction(CI);
1887 
1888  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1889  if (CI->getZExtValue() == 1) {
1890  // Replace the instruction with just byte operations. We would
1891  // transform other cases to loads/stores, but we don't know if
1892  // alignment is sufficient.
1893  }
1894  }
1895 
1896  // No other transformations apply to volatile transfers.
1897  if (MI->isVolatile())
1898  return nullptr;
1899 
1900  // If we have a memmove and the source operation is a constant global,
1901  // then the source and dest pointers can't alias, so we can change this
1902  // into a call to memcpy.
1903  if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
1904  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1905  if (GVSrc->isConstant()) {
1906  Module *M = CI.getModule();
1907  Intrinsic::ID MemCpyID = Intrinsic::memcpy;
1908  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1909  CI.getArgOperand(1)->getType(),
1910  CI.getArgOperand(2)->getType() };
1911  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1912  Changed = true;
1913  }
1914  }
1915 
1916  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1917  // memmove(x,x,size) -> noop.
1918  if (MTI->getSource() == MTI->getDest())
1919  return eraseInstFromFunction(CI);
1920  }
1921 
1922  // If we can determine a pointer alignment that is bigger than currently
1923  // set, update the alignment.
1924  if (isa<MemTransferInst>(MI)) {
1925  if (Instruction *I = SimplifyMemTransfer(MI))
1926  return I;
1927  } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
1928  if (Instruction *I = SimplifyMemSet(MSI))
1929  return I;
1930  }
1931 
1932  if (Changed) return II;
1933  }
1934 
1935  if (auto *AMI = dyn_cast<ElementUnorderedAtomicMemCpyInst>(II)) {
1936  if (Constant *C = dyn_cast<Constant>(AMI->getLength()))
1937  if (C->isNullValue())
1938  return eraseInstFromFunction(*AMI);
1939 
1940  if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI))
1941  return I;
1942  }
1943 
1944  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1945  return I;
1946 
1947  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1948  unsigned DemandedWidth) {
1949  APInt UndefElts(Width, 0);
1950  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1951  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1952  };
1953 
1954  switch (II->getIntrinsicID()) {
1955  default: break;
1956  case Intrinsic::objectsize:
1957  if (ConstantInt *N =
1958  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1959  return replaceInstUsesWith(CI, N);
1960  return nullptr;
1961 
1962  case Intrinsic::bswap: {
1963  Value *IIOperand = II->getArgOperand(0);
1964  Value *X = nullptr;
1965 
1966  // TODO should this be in InstSimplify?
1967  // bswap(bswap(x)) -> x
1968  if (match(IIOperand, m_BSwap(m_Value(X))))
1969  return replaceInstUsesWith(CI, X);
1970 
1971  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1972  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1973  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1974  IIOperand->getType()->getPrimitiveSizeInBits();
1975  Value *CV = ConstantInt::get(X->getType(), C);
1976  Value *V = Builder.CreateLShr(X, CV);
1977  return new TruncInst(V, IIOperand->getType());
1978  }
1979  break;
1980  }
1981 
1982  case Intrinsic::bitreverse: {
1983  Value *IIOperand = II->getArgOperand(0);
1984  Value *X = nullptr;
1985 
1986  // TODO should this be in InstSimplify?
1987  // bitreverse(bitreverse(x)) -> x
1988  if (match(IIOperand, m_BitReverse(m_Value(X))))
1989  return replaceInstUsesWith(CI, X);
1990  break;
1991  }
1992 
1993  case Intrinsic::masked_load:
1994  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1995  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1996  break;
1997  case Intrinsic::masked_store:
1998  return simplifyMaskedStore(*II, *this);
1999  case Intrinsic::masked_gather:
2000  return simplifyMaskedGather(*II, *this);
2001  case Intrinsic::masked_scatter:
2002  return simplifyMaskedScatter(*II, *this);
2003 
2004  case Intrinsic::powi:
2005  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2006  // powi(x, 0) -> 1.0
2007  if (Power->isZero())
2008  return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
2009  // powi(x, 1) -> x
2010  if (Power->isOne())
2011  return replaceInstUsesWith(CI, II->getArgOperand(0));
2012  // powi(x, -1) -> 1/x
2013  if (Power->isMinusOne())
2014  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
2015  II->getArgOperand(0));
2016  }
2017  break;
2018 
2019  case Intrinsic::cttz:
2020  case Intrinsic::ctlz:
2021  if (auto *I = foldCttzCtlz(*II, *this))
2022  return I;
2023  break;
2024 
2025  case Intrinsic::ctpop:
2026  if (auto *I = foldCtpop(*II, *this))
2027  return I;
2028  break;
2029 
2030  case Intrinsic::uadd_with_overflow:
2031  case Intrinsic::sadd_with_overflow:
2032  case Intrinsic::umul_with_overflow:
2033  case Intrinsic::smul_with_overflow:
2034  if (isa<Constant>(II->getArgOperand(0)) &&
2035  !isa<Constant>(II->getArgOperand(1))) {
2036  // Canonicalize constants into the RHS.
2037  Value *LHS = II->getArgOperand(0);
2038  II->setArgOperand(0, II->getArgOperand(1));
2039  II->setArgOperand(1, LHS);
2040  return II;
2041  }
2043 
2044  case Intrinsic::usub_with_overflow:
2045  case Intrinsic::ssub_with_overflow: {
2046  OverflowCheckFlavor OCF =
2048  assert(OCF != OCF_INVALID && "unexpected!");
2049 
2050  Value *OperationResult = nullptr;
2051  Constant *OverflowResult = nullptr;
2052  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
2053  *II, OperationResult, OverflowResult))
2054  return CreateOverflowTuple(II, OperationResult, OverflowResult);
2055 
2056  break;
2057  }
2058 
2059  case Intrinsic::minnum:
2060  case Intrinsic::maxnum: {
2061  Value *Arg0 = II->getArgOperand(0);
2062  Value *Arg1 = II->getArgOperand(1);
2063  // Canonicalize constants to the RHS.
2064  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
2065  II->setArgOperand(0, Arg1);
2066  II->setArgOperand(1, Arg0);
2067  return II;
2068  }
2069  if (Value *V = simplifyMinnumMaxnum(*II))
2070  return replaceInstUsesWith(*II, V);
2071  break;
2072  }
2073  case Intrinsic::fmuladd: {
2074  // Canonicalize fast fmuladd to the separate fmul + fadd.
2075  if (II->hasUnsafeAlgebra()) {
2076  BuilderTy::FastMathFlagGuard Guard(Builder);
2077  Builder.setFastMathFlags(II->getFastMathFlags());
2078  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2079  II->getArgOperand(1));
2080  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2081  Add->takeName(II);
2082  return replaceInstUsesWith(*II, Add);
2083  }
2084 
2086  }
2087  case Intrinsic::fma: {
2088  Value *Src0 = II->getArgOperand(0);
2089  Value *Src1 = II->getArgOperand(1);
2090 
2091  // Canonicalize constants into the RHS.
2092  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2093  II->setArgOperand(0, Src1);
2094  II->setArgOperand(1, Src0);
2095  std::swap(Src0, Src1);
2096  }
2097 
2098  Value *LHS = nullptr;
2099  Value *RHS = nullptr;
2100 
2101  // fma fneg(x), fneg(y), z -> fma x, y, z
2102  if (match(Src0, m_FNeg(m_Value(LHS))) &&
2103  match(Src1, m_FNeg(m_Value(RHS)))) {
2104  II->setArgOperand(0, LHS);
2105  II->setArgOperand(1, RHS);
2106  return II;
2107  }
2108 
2109  // fma fabs(x), fabs(x), z -> fma x, x, z
2110  if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
2111  match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
2112  II->setArgOperand(0, LHS);
2113  II->setArgOperand(1, RHS);
2114  return II;
2115  }
2116 
2117  // fma x, 1, z -> fadd x, z
2118  if (match(Src1, m_FPOne())) {
2119  Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2120  RI->copyFastMathFlags(II);
2121  return RI;
2122  }
2123 
2124  break;
2125  }
2126  case Intrinsic::fabs: {
2127  Value *Cond;
2128  Constant *LHS, *RHS;
2129  if (match(II->getArgOperand(0),
2130  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2131  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2132  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2133  return SelectInst::Create(Cond, Call0, Call1);
2134  }
2135 
2137  }
2138  case Intrinsic::ceil:
2139  case Intrinsic::floor:
2140  case Intrinsic::round:
2141  case Intrinsic::nearbyint:
2142  case Intrinsic::rint:
2143  case Intrinsic::trunc: {
2144  Value *ExtSrc;
2145  if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
2146  II->getArgOperand(0)->hasOneUse()) {
2147  // fabs (fpext x) -> fpext (fabs x)
2149  { ExtSrc->getType() });
2150  CallInst *NewFabs = Builder.CreateCall(F, ExtSrc);
2151  NewFabs->copyFastMathFlags(II);
2152  NewFabs->takeName(II);
2153  return new FPExtInst(NewFabs, II->getType());
2154  }
2155 
2156  break;
2157  }
2158  case Intrinsic::cos:
2159  case Intrinsic::amdgcn_cos: {
2160  Value *SrcSrc;
2161  Value *Src = II->getArgOperand(0);
2162  if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
2163  match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
2164  // cos(-x) -> cos(x)
2165  // cos(fabs(x)) -> cos(x)
2166  II->setArgOperand(0, SrcSrc);
2167  return II;
2168  }
2169 
2170  break;
2171  }
2172  case Intrinsic::ppc_altivec_lvx:
2173  case Intrinsic::ppc_altivec_lvxl:
2174  // Turn PPC lvx -> load if the pointer is known aligned.
2175  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2176  &DT) >= 16) {
2177  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2178  PointerType::getUnqual(II->getType()));
2179  return new LoadInst(Ptr);
2180  }
2181  break;
2182  case Intrinsic::ppc_vsx_lxvw4x:
2183  case Intrinsic::ppc_vsx_lxvd2x: {
2184  // Turn PPC VSX loads into normal loads.
2185  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2186  PointerType::getUnqual(II->getType()));
2187  return new LoadInst(Ptr, Twine(""), false, 1);
2188  }
2189  case Intrinsic::ppc_altivec_stvx:
2190  case Intrinsic::ppc_altivec_stvxl:
2191  // Turn stvx -> store if the pointer is known aligned.
2192  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2193  &DT) >= 16) {
2194  Type *OpPtrTy =
2195  PointerType::getUnqual(II->getArgOperand(0)->getType());
2196  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2197  return new StoreInst(II->getArgOperand(0), Ptr);
2198  }
2199  break;
2200  case Intrinsic::ppc_vsx_stxvw4x:
2201  case Intrinsic::ppc_vsx_stxvd2x: {
2202  // Turn PPC VSX stores into normal stores.
2203  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2204  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2205  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2206  }
2207  case Intrinsic::ppc_qpx_qvlfs:
2208  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2209  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2210  &DT) >= 16) {
2211  Type *VTy = VectorType::get(Builder.getFloatTy(),
2212  II->getType()->getVectorNumElements());
2213  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2214  PointerType::getUnqual(VTy));
2215  Value *Load = Builder.CreateLoad(Ptr);
2216  return new FPExtInst(Load, II->getType());
2217  }
2218  break;
2219  case Intrinsic::ppc_qpx_qvlfd:
2220  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2221  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2222  &DT) >= 32) {
2223  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2224  PointerType::getUnqual(II->getType()));
2225  return new LoadInst(Ptr);
2226  }
2227  break;
2228  case Intrinsic::ppc_qpx_qvstfs:
2229  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2230  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2231  &DT) >= 16) {
2232  Type *VTy = VectorType::get(Builder.getFloatTy(),
2233  II->getArgOperand(0)->getType()->getVectorNumElements());
2234  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2235  Type *OpPtrTy = PointerType::getUnqual(VTy);
2236  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2237  return new StoreInst(TOp, Ptr);
2238  }
2239  break;
2240  case Intrinsic::ppc_qpx_qvstfd:
2241  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2242  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2243  &DT) >= 32) {
2244  Type *OpPtrTy =
2245  PointerType::getUnqual(II->getArgOperand(0)->getType());
2246  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2247  return new StoreInst(II->getArgOperand(0), Ptr);
2248  }
2249  break;
2250 
2251  case Intrinsic::x86_vcvtph2ps_128:
2252  case Intrinsic::x86_vcvtph2ps_256: {
2253  auto Arg = II->getArgOperand(0);
2254  auto ArgType = cast<VectorType>(Arg->getType());
2255  auto RetType = cast<VectorType>(II->getType());
2256  unsigned ArgWidth = ArgType->getNumElements();
2257  unsigned RetWidth = RetType->getNumElements();
2258  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2259  assert(ArgType->isIntOrIntVectorTy() &&
2260  ArgType->getScalarSizeInBits() == 16 &&
2261  "CVTPH2PS input type should be 16-bit integer vector");
2262  assert(RetType->getScalarType()->isFloatTy() &&
2263  "CVTPH2PS output type should be 32-bit float vector");
2264 
2265  // Constant folding: Convert to generic half to single conversion.
2266  if (isa<ConstantAggregateZero>(Arg))
2267  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2268 
2269  if (isa<ConstantDataVector>(Arg)) {
2270  auto VectorHalfAsShorts = Arg;
2271  if (RetWidth < ArgWidth) {
2272  SmallVector<uint32_t, 8> SubVecMask;
2273  for (unsigned i = 0; i != RetWidth; ++i)
2274  SubVecMask.push_back((int)i);
2275  VectorHalfAsShorts = Builder.CreateShuffleVector(
2276  Arg, UndefValue::get(ArgType), SubVecMask);
2277  }
2278 
2279  auto VectorHalfType =
2280  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2281  auto VectorHalfs =
2282  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2283  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2284  return replaceInstUsesWith(*II, VectorFloats);
2285  }
2286 
2287  // We only use the lowest lanes of the argument.
2288  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2289  II->setArgOperand(0, V);
2290  return II;
2291  }
2292  break;
2293  }
2294 
2295  case Intrinsic::x86_sse_cvtss2si:
2296  case Intrinsic::x86_sse_cvtss2si64:
2297  case Intrinsic::x86_sse_cvttss2si:
2298  case Intrinsic::x86_sse_cvttss2si64:
2299  case Intrinsic::x86_sse2_cvtsd2si:
2300  case Intrinsic::x86_sse2_cvtsd2si64:
2301  case Intrinsic::x86_sse2_cvttsd2si:
2302  case Intrinsic::x86_sse2_cvttsd2si64:
2303  case Intrinsic::x86_avx512_vcvtss2si32:
2304  case Intrinsic::x86_avx512_vcvtss2si64:
2305  case Intrinsic::x86_avx512_vcvtss2usi32:
2306  case Intrinsic::x86_avx512_vcvtss2usi64:
2307  case Intrinsic::x86_avx512_vcvtsd2si32:
2308  case Intrinsic::x86_avx512_vcvtsd2si64:
2309  case Intrinsic::x86_avx512_vcvtsd2usi32:
2310  case Intrinsic::x86_avx512_vcvtsd2usi64:
2311  case Intrinsic::x86_avx512_cvttss2si:
2312  case Intrinsic::x86_avx512_cvttss2si64:
2313  case Intrinsic::x86_avx512_cvttss2usi:
2314  case Intrinsic::x86_avx512_cvttss2usi64:
2315  case Intrinsic::x86_avx512_cvttsd2si:
2316  case Intrinsic::x86_avx512_cvttsd2si64:
2317  case Intrinsic::x86_avx512_cvttsd2usi:
2318  case Intrinsic::x86_avx512_cvttsd2usi64: {
2319  // These intrinsics only demand the 0th element of their input vectors. If
2320  // we can simplify the input based on that, do so now.
2321  Value *Arg = II->getArgOperand(0);
2322  unsigned VWidth = Arg->getType()->getVectorNumElements();
2323  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2324  II->setArgOperand(0, V);
2325  return II;
2326  }
2327  break;
2328  }
2329 
2330  case Intrinsic::x86_mmx_pmovmskb:
2331  case Intrinsic::x86_sse_movmsk_ps:
2332  case Intrinsic::x86_sse2_movmsk_pd:
2333  case Intrinsic::x86_sse2_pmovmskb_128:
2334  case Intrinsic::x86_avx_movmsk_pd_256:
2335  case Intrinsic::x86_avx_movmsk_ps_256:
2336  case Intrinsic::x86_avx2_pmovmskb: {
2337  if (Value *V = simplifyX86movmsk(*II))
2338  return replaceInstUsesWith(*II, V);
2339  break;
2340  }
2341 
2342  case Intrinsic::x86_sse_comieq_ss:
2343  case Intrinsic::x86_sse_comige_ss:
2344  case Intrinsic::x86_sse_comigt_ss:
2345  case Intrinsic::x86_sse_comile_ss:
2346  case Intrinsic::x86_sse_comilt_ss:
2347  case Intrinsic::x86_sse_comineq_ss:
2348  case Intrinsic::x86_sse_ucomieq_ss:
2349  case Intrinsic::x86_sse_ucomige_ss:
2350  case Intrinsic::x86_sse_ucomigt_ss:
2351  case Intrinsic::x86_sse_ucomile_ss:
2352  case Intrinsic::x86_sse_ucomilt_ss:
2353  case Intrinsic::x86_sse_ucomineq_ss:
2354  case Intrinsic::x86_sse2_comieq_sd:
2355  case Intrinsic::x86_sse2_comige_sd:
2356  case Intrinsic::x86_sse2_comigt_sd:
2357  case Intrinsic::x86_sse2_comile_sd:
2358  case Intrinsic::x86_sse2_comilt_sd:
2359  case Intrinsic::x86_sse2_comineq_sd:
2360  case Intrinsic::x86_sse2_ucomieq_sd:
2361  case Intrinsic::x86_sse2_ucomige_sd:
2362  case Intrinsic::x86_sse2_ucomigt_sd:
2363  case Intrinsic::x86_sse2_ucomile_sd:
2364  case Intrinsic::x86_sse2_ucomilt_sd:
2365  case Intrinsic::x86_sse2_ucomineq_sd:
2366  case Intrinsic::x86_avx512_vcomi_ss:
2367  case Intrinsic::x86_avx512_vcomi_sd:
2368  case Intrinsic::x86_avx512_mask_cmp_ss:
2369  case Intrinsic::x86_avx512_mask_cmp_sd: {
2370  // These intrinsics only demand the 0th element of their input vectors. If
2371  // we can simplify the input based on that, do so now.
2372  bool MadeChange = false;
2373  Value *Arg0 = II->getArgOperand(0);
2374  Value *Arg1 = II->getArgOperand(1);
2375  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2376  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2377  II->setArgOperand(0, V);
2378  MadeChange = true;
2379  }
2380  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2381  II->setArgOperand(1, V);
2382  MadeChange = true;
2383  }
2384  if (MadeChange)
2385  return II;
2386  break;
2387  }
2388  case Intrinsic::x86_avx512_mask_cmp_pd_128:
2389  case Intrinsic::x86_avx512_mask_cmp_pd_256:
2390  case Intrinsic::x86_avx512_mask_cmp_pd_512:
2391  case Intrinsic::x86_avx512_mask_cmp_ps_128:
2392  case Intrinsic::x86_avx512_mask_cmp_ps_256:
2393  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
2394  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2395  Value *Arg0 = II->getArgOperand(0);
2396  Value *Arg1 = II->getArgOperand(1);
2397  bool Arg0IsZero = match(Arg0, m_Zero());
2398  if (Arg0IsZero)
2399  std::swap(Arg0, Arg1);
2400  Value *A, *B;
2401  // This fold requires only the NINF(not +/- inf) since inf minus
2402  // inf is nan.
2403  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2404  // equal for both compares.
2405  // NNAN is not needed because nans compare the same for both compares.
2406  // The compare intrinsic uses the above assumptions and therefore
2407  // doesn't require additional flags.
2408  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2409  match(Arg1, m_Zero()) &&
2410  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2411  if (Arg0IsZero)
2412  std::swap(A, B);
2413  II->setArgOperand(0, A);
2414  II->setArgOperand(1, B);
2415  return II;
2416  }
2417  break;
2418  }
2419 
2420  case Intrinsic::x86_avx512_mask_add_ps_512:
2421  case Intrinsic::x86_avx512_mask_div_ps_512:
2422  case Intrinsic::x86_avx512_mask_mul_ps_512:
2423  case Intrinsic::x86_avx512_mask_sub_ps_512:
2424  case Intrinsic::x86_avx512_mask_add_pd_512:
2425  case Intrinsic::x86_avx512_mask_div_pd_512:
2426  case Intrinsic::x86_avx512_mask_mul_pd_512:
2427  case Intrinsic::x86_avx512_mask_sub_pd_512:
2428  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2429  // IR operations.
2430  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2431  if (R->getValue() == 4) {
2432  Value *Arg0 = II->getArgOperand(0);
2433  Value *Arg1 = II->getArgOperand(1);
2434 
2435  Value *V;
2436  switch (II->getIntrinsicID()) {
2437  default: llvm_unreachable("Case stmts out of sync!");
2438  case Intrinsic::x86_avx512_mask_add_ps_512:
2439  case Intrinsic::x86_avx512_mask_add_pd_512:
2440  V = Builder.CreateFAdd(Arg0, Arg1);
2441  break;
2442  case Intrinsic::x86_avx512_mask_sub_ps_512:
2443  case Intrinsic::x86_avx512_mask_sub_pd_512:
2444  V = Builder.CreateFSub(Arg0, Arg1);
2445  break;
2446  case Intrinsic::x86_avx512_mask_mul_ps_512:
2447  case Intrinsic::x86_avx512_mask_mul_pd_512:
2448  V = Builder.CreateFMul(Arg0, Arg1);
2449  break;
2450  case Intrinsic::x86_avx512_mask_div_ps_512:
2451  case Intrinsic::x86_avx512_mask_div_pd_512:
2452  V = Builder.CreateFDiv(Arg0, Arg1);
2453  break;
2454  }
2455 
2456  // Create a select for the masking.
2457  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2458  Builder);
2459  return replaceInstUsesWith(*II, V);
2460  }
2461  }
2462  break;
2463 
2464  case Intrinsic::x86_avx512_mask_add_ss_round:
2465  case Intrinsic::x86_avx512_mask_div_ss_round:
2466  case Intrinsic::x86_avx512_mask_mul_ss_round:
2467  case Intrinsic::x86_avx512_mask_sub_ss_round:
2468  case Intrinsic::x86_avx512_mask_add_sd_round:
2469  case Intrinsic::x86_avx512_mask_div_sd_round:
2470  case Intrinsic::x86_avx512_mask_mul_sd_round:
2471  case Intrinsic::x86_avx512_mask_sub_sd_round:
2472  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2473  // IR operations.
2474  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2475  if (R->getValue() == 4) {
2476  // Extract the element as scalars.
2477  Value *Arg0 = II->getArgOperand(0);
2478  Value *Arg1 = II->getArgOperand(1);
2479  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2480  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2481 
2482  Value *V;
2483  switch (II->getIntrinsicID()) {
2484  default: llvm_unreachable("Case stmts out of sync!");
2485  case Intrinsic::x86_avx512_mask_add_ss_round:
2486  case Intrinsic::x86_avx512_mask_add_sd_round:
2487  V = Builder.CreateFAdd(LHS, RHS);
2488  break;
2489  case Intrinsic::x86_avx512_mask_sub_ss_round:
2490  case Intrinsic::x86_avx512_mask_sub_sd_round:
2491  V = Builder.CreateFSub(LHS, RHS);
2492  break;
2493  case Intrinsic::x86_avx512_mask_mul_ss_round:
2494  case Intrinsic::x86_avx512_mask_mul_sd_round:
2495  V = Builder.CreateFMul(LHS, RHS);
2496  break;
2497  case Intrinsic::x86_avx512_mask_div_ss_round:
2498  case Intrinsic::x86_avx512_mask_div_sd_round:
2499  V = Builder.CreateFDiv(LHS, RHS);
2500  break;
2501  }
2502 
2503  // Handle the masking aspect of the intrinsic.
2504  Value *Mask = II->getArgOperand(3);
2505  auto *C = dyn_cast<ConstantInt>(Mask);
2506  // We don't need a select if we know the mask bit is a 1.
2507  if (!C || !C->getValue()[0]) {
2508  // Cast the mask to an i1 vector and then extract the lowest element.
2509  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2510  cast<IntegerType>(Mask->getType())->getBitWidth());
2511  Mask = Builder.CreateBitCast(Mask, MaskTy);
2512  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2513  // Extract the lowest element from the passthru operand.
2514  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2515  (uint64_t)0);
2516  V = Builder.CreateSelect(Mask, V, Passthru);
2517  }
2518 
2519  // Insert the result back into the original argument 0.
2520  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2521 
2522  return replaceInstUsesWith(*II, V);
2523  }
2524  }
2526 
2527  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2528  case Intrinsic::x86_avx512_mask_max_ss_round:
2529  case Intrinsic::x86_avx512_mask_min_ss_round:
2530  case Intrinsic::x86_avx512_mask_max_sd_round:
2531  case Intrinsic::x86_avx512_mask_min_sd_round:
2532  case Intrinsic::x86_avx512_mask_vfmadd_ss:
2533  case Intrinsic::x86_avx512_mask_vfmadd_sd:
2534  case Intrinsic::x86_avx512_maskz_vfmadd_ss:
2535  case Intrinsic::x86_avx512_maskz_vfmadd_sd:
2536  case Intrinsic::x86_avx512_mask3_vfmadd_ss:
2537  case Intrinsic::x86_avx512_mask3_vfmadd_sd:
2538  case Intrinsic::x86_avx512_mask3_vfmsub_ss:
2539  case Intrinsic::x86_avx512_mask3_vfmsub_sd:
2540  case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
2541  case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
2542  case Intrinsic::x86_fma_vfmadd_ss:
2543  case Intrinsic::x86_fma_vfmsub_ss:
2544  case Intrinsic::x86_fma_vfnmadd_ss:
2545  case Intrinsic::x86_fma_vfnmsub_ss:
2546  case Intrinsic::x86_fma_vfmadd_sd:
2547  case Intrinsic::x86_fma_vfmsub_sd:
2548  case Intrinsic::x86_fma_vfnmadd_sd:
2549  case Intrinsic::x86_fma_vfnmsub_sd:
2550  case Intrinsic::x86_sse_cmp_ss:
2551  case Intrinsic::x86_sse_min_ss:
2552  case Intrinsic::x86_sse_max_ss:
2553  case Intrinsic::x86_sse2_cmp_sd:
2554  case Intrinsic::x86_sse2_min_sd:
2555  case Intrinsic::x86_sse2_max_sd:
2556  case Intrinsic::x86_sse41_round_ss:
2557  case Intrinsic::x86_sse41_round_sd:
2558  case Intrinsic::x86_xop_vfrcz_ss:
2559  case Intrinsic::x86_xop_vfrcz_sd: {
2560  unsigned VWidth = II->getType()->getVectorNumElements();
2561  APInt UndefElts(VWidth, 0);
2562  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2563  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2564  if (V != II)
2565  return replaceInstUsesWith(*II, V);
2566  return II;
2567  }
2568  break;
2569  }
2570 
2571  // Constant fold ashr( <A x Bi>, Ci ).
2572  // Constant fold lshr( <A x Bi>, Ci ).
2573  // Constant fold shl( <A x Bi>, Ci ).
2574  case Intrinsic::x86_sse2_psrai_d:
2575  case Intrinsic::x86_sse2_psrai_w:
2576  case Intrinsic::x86_avx2_psrai_d:
2577  case Intrinsic::x86_avx2_psrai_w:
2578  case Intrinsic::x86_avx512_psrai_q_128:
2579  case Intrinsic::x86_avx512_psrai_q_256:
2580  case Intrinsic::x86_avx512_psrai_d_512:
2581  case Intrinsic::x86_avx512_psrai_q_512:
2582  case Intrinsic::x86_avx512_psrai_w_512:
2583  case Intrinsic::x86_sse2_psrli_d:
2584  case Intrinsic::x86_sse2_psrli_q:
2585  case Intrinsic::x86_sse2_psrli_w:
2586  case Intrinsic::x86_avx2_psrli_d:
2587  case Intrinsic::x86_avx2_psrli_q:
2588  case Intrinsic::x86_avx2_psrli_w:
2589  case Intrinsic::x86_avx512_psrli_d_512:
2590  case Intrinsic::x86_avx512_psrli_q_512:
2591  case Intrinsic::x86_avx512_psrli_w_512:
2592  case Intrinsic::x86_sse2_pslli_d:
2593  case Intrinsic::x86_sse2_pslli_q:
2594  case Intrinsic::x86_sse2_pslli_w:
2595  case Intrinsic::x86_avx2_pslli_d:
2596  case Intrinsic::x86_avx2_pslli_q:
2597  case Intrinsic::x86_avx2_pslli_w:
2598  case Intrinsic::x86_avx512_pslli_d_512:
2599  case Intrinsic::x86_avx512_pslli_q_512:
2600  case Intrinsic::x86_avx512_pslli_w_512:
2601  if (Value *V = simplifyX86immShift(*II, Builder))
2602  return replaceInstUsesWith(*II, V);
2603  break;
2604 
2605  case Intrinsic::x86_sse2_psra_d:
2606  case Intrinsic::x86_sse2_psra_w:
2607  case Intrinsic::x86_avx2_psra_d:
2608  case Intrinsic::x86_avx2_psra_w:
2609  case Intrinsic::x86_avx512_psra_q_128:
2610  case Intrinsic::x86_avx512_psra_q_256:
2611  case Intrinsic::x86_avx512_psra_d_512:
2612  case Intrinsic::x86_avx512_psra_q_512:
2613  case Intrinsic::x86_avx512_psra_w_512:
2614  case Intrinsic::x86_sse2_psrl_d:
2615  case Intrinsic::x86_sse2_psrl_q:
2616  case Intrinsic::x86_sse2_psrl_w:
2617  case Intrinsic::x86_avx2_psrl_d:
2618  case Intrinsic::x86_avx2_psrl_q:
2619  case Intrinsic::x86_avx2_psrl_w:
2620  case Intrinsic::x86_avx512_psrl_d_512:
2621  case Intrinsic::x86_avx512_psrl_q_512:
2622  case Intrinsic::x86_avx512_psrl_w_512:
2623  case Intrinsic::x86_sse2_psll_d:
2624  case Intrinsic::x86_sse2_psll_q:
2625  case Intrinsic::x86_sse2_psll_w:
2626  case Intrinsic::x86_avx2_psll_d:
2627  case Intrinsic::x86_avx2_psll_q:
2628  case Intrinsic::x86_avx2_psll_w:
2629  case Intrinsic::x86_avx512_psll_d_512:
2630  case Intrinsic::x86_avx512_psll_q_512:
2631  case Intrinsic::x86_avx512_psll_w_512: {
2632  if (Value *V = simplifyX86immShift(*II, Builder))
2633  return replaceInstUsesWith(*II, V);
2634 
2635  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2636  // operand to compute the shift amount.
2637  Value *Arg1 = II->getArgOperand(1);
2638  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2639  "Unexpected packed shift size");
2640  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2641 
2642  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2643  II->setArgOperand(1, V);
2644  return II;
2645  }
2646  break;
2647  }
2648 
2649  case Intrinsic::x86_avx2_psllv_d:
2650  case Intrinsic::x86_avx2_psllv_d_256:
2651  case Intrinsic::x86_avx2_psllv_q:
2652  case Intrinsic::x86_avx2_psllv_q_256:
2653  case Intrinsic::x86_avx512_psllv_d_512:
2654  case Intrinsic::x86_avx512_psllv_q_512:
2655  case Intrinsic::x86_avx512_psllv_w_128:
2656  case Intrinsic::x86_avx512_psllv_w_256:
2657  case Intrinsic::x86_avx512_psllv_w_512:
2658  case Intrinsic::x86_avx2_psrav_d:
2659  case Intrinsic::x86_avx2_psrav_d_256:
2660  case Intrinsic::x86_avx512_psrav_q_128:
2661  case Intrinsic::x86_avx512_psrav_q_256:
2662  case Intrinsic::x86_avx512_psrav_d_512:
2663  case Intrinsic::x86_avx512_psrav_q_512:
2664  case Intrinsic::x86_avx512_psrav_w_128:
2665  case Intrinsic::x86_avx512_psrav_w_256:
2666  case Intrinsic::x86_avx512_psrav_w_512:
2667  case Intrinsic::x86_avx2_psrlv_d:
2668  case Intrinsic::x86_avx2_psrlv_d_256:
2669  case Intrinsic::x86_avx2_psrlv_q:
2670  case Intrinsic::x86_avx2_psrlv_q_256:
2671  case Intrinsic::x86_avx512_psrlv_d_512:
2672  case Intrinsic::x86_avx512_psrlv_q_512:
2673  case Intrinsic::x86_avx512_psrlv_w_128:
2674  case Intrinsic::x86_avx512_psrlv_w_256:
2675  case Intrinsic::x86_avx512_psrlv_w_512:
2676  if (Value *V = simplifyX86varShift(*II, Builder))
2677  return replaceInstUsesWith(*II, V);
2678  break;
2679 
2680  case Intrinsic::x86_sse2_pmulu_dq:
2681  case Intrinsic::x86_sse41_pmuldq:
2682  case Intrinsic::x86_avx2_pmul_dq:
2683  case Intrinsic::x86_avx2_pmulu_dq:
2684  case Intrinsic::x86_avx512_pmul_dq_512:
2685  case Intrinsic::x86_avx512_pmulu_dq_512: {
2686  if (Value *V = simplifyX86muldq(*II, Builder))
2687  return replaceInstUsesWith(*II, V);
2688 
2689  unsigned VWidth = II->getType()->getVectorNumElements();
2690  APInt UndefElts(VWidth, 0);
2691  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
2692  if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
2693  if (V != II)
2694  return replaceInstUsesWith(*II, V);
2695  return II;
2696  }
2697  break;
2698  }
2699 
2700  case Intrinsic::x86_sse2_packssdw_128:
2701  case Intrinsic::x86_sse2_packsswb_128:
2702  case Intrinsic::x86_avx2_packssdw:
2703  case Intrinsic::x86_avx2_packsswb:
2704  case Intrinsic::x86_avx512_packssdw_512:
2705  case Intrinsic::x86_avx512_packsswb_512:
2706  if (Value *V = simplifyX86pack(*II, true))
2707  return replaceInstUsesWith(*II, V);
2708  break;
2709 
2710  case Intrinsic::x86_sse2_packuswb_128:
2711  case Intrinsic::x86_sse41_packusdw:
2712  case Intrinsic::x86_avx2_packusdw:
2713  case Intrinsic::x86_avx2_packuswb:
2714  case Intrinsic::x86_avx512_packusdw_512:
2715  case Intrinsic::x86_avx512_packuswb_512:
2716  if (Value *V = simplifyX86pack(*II, false))
2717  return replaceInstUsesWith(*II, V);
2718  break;
2719 
2720  case Intrinsic::x86_pclmulqdq: {
2721  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2722  unsigned Imm = C->getZExtValue();
2723 
2724  bool MadeChange = false;
2725  Value *Arg0 = II->getArgOperand(0);
2726  Value *Arg1 = II->getArgOperand(1);
2727  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2728  APInt DemandedElts(VWidth, 0);
2729 
2730  APInt UndefElts1(VWidth, 0);
2731  DemandedElts = (Imm & 0x01) ? 2 : 1;
2732  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
2733  UndefElts1)) {
2734  II->setArgOperand(0, V);
2735  MadeChange = true;
2736  }
2737 
2738  APInt UndefElts2(VWidth, 0);
2739  DemandedElts = (Imm & 0x10) ? 2 : 1;
2740  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
2741  UndefElts2)) {
2742  II->setArgOperand(1, V);
2743  MadeChange = true;
2744  }
2745 
2746  // If both input elements are undef, the result is undef.
2747  if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
2748  UndefElts2[(Imm & 0x10) ? 1 : 0])
2749  return replaceInstUsesWith(*II,
2750  ConstantAggregateZero::get(II->getType()));
2751 
2752  if (MadeChange)
2753  return II;
2754  }
2755  break;
2756  }
2757 
2758  case Intrinsic::x86_sse41_insertps:
2759  if (Value *V = simplifyX86insertps(*II, Builder))
2760  return replaceInstUsesWith(*II, V);
2761  break;
2762 
2763  case Intrinsic::x86_sse4a_extrq: {
2764  Value *Op0 = II->getArgOperand(0);
2765  Value *Op1 = II->getArgOperand(1);
2766  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2767  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2768  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2769  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2770  VWidth1 == 16 && "Unexpected operand sizes");
2771 
2772  // See if we're dealing with constant values.
2773  Constant *C1 = dyn_cast<Constant>(Op1);
2774  ConstantInt *CILength =
2775  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2776  : nullptr;
2777  ConstantInt *CIIndex =
2778  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2779  : nullptr;
2780 
2781  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2782  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2783  return replaceInstUsesWith(*II, V);
2784 
2785  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2786  // operands and the lowest 16-bits of the second.
2787  bool MadeChange = false;
2788  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2789  II->setArgOperand(0, V);
2790  MadeChange = true;
2791  }
2792  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2793  II->setArgOperand(1, V);
2794  MadeChange = true;
2795  }
2796  if (MadeChange)
2797  return II;
2798  break;
2799  }
2800 
2801  case Intrinsic::x86_sse4a_extrqi: {
2802  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2803  // bits of the lower 64-bits. The upper 64-bits are undefined.
2804  Value *Op0 = II->getArgOperand(0);
2805  unsigned VWidth = Op0->getType()->getVectorNumElements();
2806  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2807  "Unexpected operand size");
2808 
2809  // See if we're dealing with constant values.
2810  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2811  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2812 
2813  // Attempt to simplify to a constant or shuffle vector.
2814  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2815  return replaceInstUsesWith(*II, V);
2816 
2817  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2818  // operand.
2819  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2820  II->setArgOperand(0, V);
2821  return II;
2822  }
2823  break;
2824  }
2825 
2826  case Intrinsic::x86_sse4a_insertq: {
2827  Value *Op0 = II->getArgOperand(0);
2828  Value *Op1 = II->getArgOperand(1);
2829  unsigned VWidth = Op0->getType()->getVectorNumElements();
2830  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2831  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2832  Op1->getType()->getVectorNumElements() == 2 &&
2833  "Unexpected operand size");
2834 
2835  // See if we're dealing with constant values.
2836  Constant *C1 = dyn_cast<Constant>(Op1);
2837  ConstantInt *CI11 =
2838  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2839  : nullptr;
2840 
2841  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2842  if (CI11) {
2843  const APInt &V11 = CI11->getValue();
2844  APInt Len = V11.zextOrTrunc(6);
2845  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2846  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2847  return replaceInstUsesWith(*II, V);
2848  }
2849 
2850  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2851  // operand.
2852  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2853  II->setArgOperand(0, V);
2854  return II;
2855  }
2856  break;
2857  }
2858 
2859  case Intrinsic::x86_sse4a_insertqi: {
2860  // INSERTQI: Extract lowest Length bits from lower half of second source and
2861  // insert over first source starting at Index bit. The upper 64-bits are
2862  // undefined.
2863  Value *Op0 = II->getArgOperand(0);
2864  Value *Op1 = II->getArgOperand(1);
2865  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2866  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2867  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2868  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2869  VWidth1 == 2 && "Unexpected operand sizes");
2870 
2871  // See if we're dealing with constant values.
2872  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2873  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2874 
2875  // Attempt to simplify to a constant or shuffle vector.
2876  if (CILength && CIIndex) {
2877  APInt Len = CILength->getValue().zextOrTrunc(6);
2878  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2879  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2880  return replaceInstUsesWith(*II, V);
2881  }
2882 
2883  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2884  // operands.
2885  bool MadeChange = false;
2886  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2887  II->setArgOperand(0, V);
2888  MadeChange = true;
2889  }
2890  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2891  II->setArgOperand(1, V);
2892  MadeChange = true;
2893  }
2894  if (MadeChange)
2895  return II;
2896  break;
2897  }
2898 
2899  case Intrinsic::x86_sse41_pblendvb:
2900  case Intrinsic::x86_sse41_blendvps:
2901  case Intrinsic::x86_sse41_blendvpd:
2902  case Intrinsic::x86_avx_blendv_ps_256:
2903  case Intrinsic::x86_avx_blendv_pd_256:
2904  case Intrinsic::x86_avx2_pblendvb: {
2905  // Convert blendv* to vector selects if the mask is constant.
2906  // This optimization is convoluted because the intrinsic is defined as
2907  // getting a vector of floats or doubles for the ps and pd versions.
2908  // FIXME: That should be changed.
2909 
2910  Value *Op0 = II->getArgOperand(0);
2911  Value *Op1 = II->getArgOperand(1);
2912  Value *Mask = II->getArgOperand(2);
2913 
2914  // fold (blend A, A, Mask) -> A
2915  if (Op0 == Op1)
2916  return replaceInstUsesWith(CI, Op0);
2917 
2918  // Zero Mask - select 1st argument.
2919  if (isa<ConstantAggregateZero>(Mask))
2920  return replaceInstUsesWith(CI, Op0);
2921 
2922  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2923  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2924  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2925  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2926  }
2927  break;
2928  }
2929 
2930  case Intrinsic::x86_ssse3_pshuf_b_128:
2931  case Intrinsic::x86_avx2_pshuf_b:
2932  case Intrinsic::x86_avx512_pshuf_b_512:
2933  if (Value *V = simplifyX86pshufb(*II, Builder))
2934  return replaceInstUsesWith(*II, V);
2935  break;
2936 
2937  case Intrinsic::x86_avx_vpermilvar_ps:
2938  case Intrinsic::x86_avx_vpermilvar_ps_256:
2939  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2940  case Intrinsic::x86_avx_vpermilvar_pd:
2941  case Intrinsic::x86_avx_vpermilvar_pd_256:
2942  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2943  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2944  return replaceInstUsesWith(*II, V);
2945  break;
2946 
2947  case Intrinsic::x86_avx2_permd:
2948  case Intrinsic::x86_avx2_permps:
2949  if (Value *V = simplifyX86vpermv(*II, Builder))
2950  return replaceInstUsesWith(*II, V);
2951  break;
2952 
2953  case Intrinsic::x86_avx512_mask_permvar_df_256:
2954  case Intrinsic::x86_avx512_mask_permvar_df_512:
2955  case Intrinsic::x86_avx512_mask_permvar_di_256:
2956  case Intrinsic::x86_avx512_mask_permvar_di_512:
2957  case Intrinsic::x86_avx512_mask_permvar_hi_128:
2958  case Intrinsic::x86_avx512_mask_permvar_hi_256:
2959  case Intrinsic::x86_avx512_mask_permvar_hi_512:
2960  case Intrinsic::x86_avx512_mask_permvar_qi_128:
2961  case Intrinsic::x86_avx512_mask_permvar_qi_256:
2962  case Intrinsic::x86_avx512_mask_permvar_qi_512:
2963  case Intrinsic::x86_avx512_mask_permvar_sf_256:
2964  case Intrinsic::x86_avx512_mask_permvar_sf_512:
2965  case Intrinsic::x86_avx512_mask_permvar_si_256:
2966  case Intrinsic::x86_avx512_mask_permvar_si_512:
2967  if (Value *V = simplifyX86vpermv(*II, Builder)) {
2968  // We simplified the permuting, now create a select for the masking.
2969  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2970  Builder);
2971  return replaceInstUsesWith(*II, V);
2972  }
2973  break;
2974 
2975  case Intrinsic::x86_avx_vperm2f128_pd_256:
2976  case Intrinsic::x86_avx_vperm2f128_ps_256:
2977  case Intrinsic::x86_avx_vperm2f128_si_256:
2978  case Intrinsic::x86_avx2_vperm2i128:
2979  if (Value *V = simplifyX86vperm2(*II, Builder))
2980  return replaceInstUsesWith(*II, V);
2981  break;
2982 
2983  case Intrinsic::x86_avx_maskload_ps:
2984  case Intrinsic::x86_avx_maskload_pd:
2985  case Intrinsic::x86_avx_maskload_ps_256:
2986  case Intrinsic::x86_avx_maskload_pd_256:
2987  case Intrinsic::x86_avx2_maskload_d:
2988  case Intrinsic::x86_avx2_maskload_q:
2989  case Intrinsic::x86_avx2_maskload_d_256:
2990  case Intrinsic::x86_avx2_maskload_q_256:
2991  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
2992  return I;
2993  break;
2994 
2995  case Intrinsic::x86_sse2_maskmov_dqu:
2996  case Intrinsic::x86_avx_maskstore_ps:
2997  case Intrinsic::x86_avx_maskstore_pd:
2998  case Intrinsic::x86_avx_maskstore_ps_256:
2999  case Intrinsic::x86_avx_maskstore_pd_256:
3000  case Intrinsic::x86_avx2_maskstore_d:
3001  case Intrinsic::x86_avx2_maskstore_q:
3002  case Intrinsic::x86_avx2_maskstore_d_256:
3003  case Intrinsic::x86_avx2_maskstore_q_256:
3004  if (simplifyX86MaskedStore(*II, *this))
3005  return nullptr;
3006  break;
3007 
3008  case Intrinsic::x86_xop_vpcomb:
3009  case Intrinsic::x86_xop_vpcomd:
3010  case Intrinsic::x86_xop_vpcomq:
3011  case Intrinsic::x86_xop_vpcomw:
3012  if (Value *V = simplifyX86vpcom(*II, Builder, true))
3013  return replaceInstUsesWith(*II, V);
3014  break;
3015 
3016  case Intrinsic::x86_xop_vpcomub:
3017  case Intrinsic::x86_xop_vpcomud:
3018  case Intrinsic::x86_xop_vpcomuq:
3019  case Intrinsic::x86_xop_vpcomuw:
3020  if (Value *V = simplifyX86vpcom(*II, Builder, false))
3021  return replaceInstUsesWith(*II, V);
3022  break;
3023 
3024  case Intrinsic::ppc_altivec_vperm:
3025  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
3026  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
3027  // a vectorshuffle for little endian, we must undo the transformation
3028  // performed on vec_perm in altivec.h. That is, we must complement
3029  // the permutation mask with respect to 31 and reverse the order of
3030  // V1 and V2.
3031  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
3032  assert(Mask->getType()->getVectorNumElements() == 16 &&
3033  "Bad type for intrinsic!");
3034 
3035  // Check that all of the elements are integer constants or undefs.
3036  bool AllEltsOk = true;
3037  for (unsigned i = 0; i != 16; ++i) {
3038  Constant *Elt = Mask->getAggregateElement(i);
3039  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3040  AllEltsOk = false;
3041  break;
3042  }
3043  }
3044 
3045  if (AllEltsOk) {
3046  // Cast the input vectors to byte vectors.
3047  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3048  Mask->getType());
3049  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3050  Mask->getType());
3051  Value *Result = UndefValue::get(Op0->getType());
3052 
3053  // Only extract each element once.
3054  Value *ExtractedElts[32];
3055  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3056 
3057  for (unsigned i = 0; i != 16; ++i) {
3058  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3059  continue;
3060  unsigned Idx =
3061  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3062  Idx &= 31; // Match the hardware behavior.
3063  if (DL.isLittleEndian())
3064  Idx = 31 - Idx;
3065 
3066  if (!ExtractedElts[Idx]) {
3067  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3068  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3069  ExtractedElts[Idx] =
3070  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3071  Builder.getInt32(Idx&15));
3072  }
3073 
3074  // Insert this value into the result vector.
3075  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3076  Builder.getInt32(i));
3077  }
3078  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3079  }
3080  }
3081  break;
3082 
3083  case Intrinsic::arm_neon_vld1:
3084  case Intrinsic::arm_neon_vld2:
3085  case Intrinsic::arm_neon_vld3:
3086  case Intrinsic::arm_neon_vld4:
3087  case Intrinsic::arm_neon_vld2lane:
3088  case Intrinsic::arm_neon_vld3lane:
3089  case Intrinsic::arm_neon_vld4lane:
3090  case Intrinsic::arm_neon_vst1:
3091  case Intrinsic::arm_neon_vst2:
3092  case Intrinsic::arm_neon_vst3:
3093  case Intrinsic::arm_neon_vst4:
3094  case Intrinsic::arm_neon_vst2lane:
3095  case Intrinsic::arm_neon_vst3lane:
3096  case Intrinsic::arm_neon_vst4lane: {
3097  unsigned MemAlign =
3098  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3099  unsigned AlignArg = II->getNumArgOperands() - 1;
3100  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3101  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3102  II->setArgOperand(AlignArg,
3103  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3104  MemAlign, false));
3105  return II;
3106  }
3107  break;
3108  }
3109 
3110  case Intrinsic::arm_neon_vmulls:
3111  case Intrinsic::arm_neon_vmullu:
3112  case Intrinsic::aarch64_neon_smull:
3113  case Intrinsic::aarch64_neon_umull: {
3114  Value *Arg0 = II->getArgOperand(0);
3115  Value *Arg1 = II->getArgOperand(1);
3116 
3117  // Handle mul by zero first:
3118  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3119  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3120  }
3121 
3122  // Check for constant LHS & RHS - in this case we just simplify.
3123  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3124  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3125  VectorType *NewVT = cast<VectorType>(II->getType());
3126  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3127  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3128  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3129  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3130 
3131  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3132  }
3133 
3134  // Couldn't simplify - canonicalize constant to the RHS.
3135  std::swap(Arg0, Arg1);
3136  }
3137 
3138  // Handle mul by one:
3139  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3140  if (ConstantInt *Splat =
3141  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3142  if (Splat->isOne())
3143  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3144  /*isSigned=*/!Zext);
3145 
3146  break;
3147  }
3148  case Intrinsic::amdgcn_rcp: {
3149  Value *Src = II->getArgOperand(0);
3150 
3151  // TODO: Move to ConstantFolding/InstSimplify?
3152  if (isa<UndefValue>(Src))
3153  return replaceInstUsesWith(CI, Src);
3154 
3155  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3156  const APFloat &ArgVal = C->getValueAPF();
3157  APFloat Val(ArgVal.getSemantics(), 1.0);
3158  APFloat::opStatus Status = Val.divide(ArgVal,
3160  // Only do this if it was exact and therefore not dependent on the
3161  // rounding mode.
3162  if (Status == APFloat::opOK)
3163  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3164  }
3165 
3166  break;
3167  }
3168  case Intrinsic::amdgcn_rsq: {
3169  Value *Src = II->getArgOperand(0);
3170 
3171  // TODO: Move to ConstantFolding/InstSimplify?
3172  if (isa<UndefValue>(Src))
3173  return replaceInstUsesWith(CI, Src);
3174  break;
3175  }
3176  case Intrinsic::amdgcn_frexp_mant:
3177  case Intrinsic::amdgcn_frexp_exp: {
3178  Value *Src = II->getArgOperand(0);
3179  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3180  int Exp;
3181  APFloat Significand = frexp(C->getValueAPF(), Exp,
3183 
3184  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3185  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3186  Significand));
3187  }
3188 
3189  // Match instruction special case behavior.
3190  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3191  Exp = 0;
3192 
3193  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3194  }
3195 
3196  if (isa<UndefValue>(Src))
3197  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3198 
3199  break;
3200  }
3201  case Intrinsic::amdgcn_class: {
3202  enum {
3203  S_NAN = 1 << 0, // Signaling NaN
3204  Q_NAN = 1 << 1, // Quiet NaN
3205  N_INFINITY = 1 << 2, // Negative infinity
3206  N_NORMAL = 1 << 3, // Negative normal
3207  N_SUBNORMAL = 1 << 4, // Negative subnormal
3208  N_ZERO = 1 << 5, // Negative zero
3209  P_ZERO = 1 << 6, // Positive zero
3210  P_SUBNORMAL = 1 << 7, // Positive subnormal
3211  P_NORMAL = 1 << 8, // Positive normal
3212  P_INFINITY = 1 << 9 // Positive infinity
3213  };
3214 
3215  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3217 
3218  Value *Src0 = II->getArgOperand(0);
3219  Value *Src1 = II->getArgOperand(1);
3220  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3221  if (!CMask) {
3222  if (isa<UndefValue>(Src0))
3223  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3224 
3225  if (isa<UndefValue>(Src1))
3226  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3227  break;
3228  }
3229 
3230  uint32_t Mask = CMask->getZExtValue();
3231 
3232  // If all tests are made, it doesn't matter what the value is.
3233  if ((Mask & FullMask) == FullMask)
3234  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3235 
3236  if ((Mask & FullMask) == 0)
3237  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3238 
3239  if (Mask == (S_NAN | Q_NAN)) {
3240  // Equivalent of isnan. Replace with standard fcmp.
3241  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3242  FCmp->takeName(II);
3243  return replaceInstUsesWith(*II, FCmp);
3244  }
3245 
3246  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3247  if (!CVal) {
3248  if (isa<UndefValue>(Src0))
3249  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3250 
3251  // Clamp mask to used bits
3252  if ((Mask & FullMask) != Mask) {
3253  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3254  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3255  );
3256 
3257  NewCall->takeName(II);
3258  return replaceInstUsesWith(*II, NewCall);
3259  }
3260 
3261  break;
3262  }
3263 
3264  const APFloat &Val = CVal->getValueAPF();
3265 
3266  bool Result =
3267  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3268  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3269  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3270  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3271  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3272  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3273  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3274  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3275  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3276  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3277 
3278  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3279  }
3280  case Intrinsic::amdgcn_cvt_pkrtz: {
3281  Value *Src0 = II->getArgOperand(0);
3282  Value *Src1 = II->getArgOperand(1);
3283  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3284  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3285  const fltSemantics &HalfSem
3286  = II->getType()->getScalarType()->getFltSemantics();
3287  bool LosesInfo;
3288  APFloat Val0 = C0->getValueAPF();
3289  APFloat Val1 = C1->getValueAPF();
3290  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3291  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3292 
3293  Constant *Folded = ConstantVector::get({
3294  ConstantFP::get(II->getContext(), Val0),
3295  ConstantFP::get(II->getContext(), Val1) });
3296  return replaceInstUsesWith(*II, Folded);
3297  }
3298  }
3299 
3300  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3301  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3302 
3303  break;
3304  }
3305  case Intrinsic::amdgcn_ubfe:
3306  case Intrinsic::amdgcn_sbfe: {
3307  // Decompose simple cases into standard shifts.
3308  Value *Src = II->getArgOperand(0);
3309  if (isa<UndefValue>(Src))
3310  return replaceInstUsesWith(*II, Src);
3311 
3312  unsigned Width;
3313  Type *Ty = II->getType();
3314  unsigned IntSize = Ty->getIntegerBitWidth();
3315 
3316  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3317  if (CWidth) {
3318  Width = CWidth->getZExtValue();
3319  if ((Width & (IntSize - 1)) == 0)
3320  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3321 
3322  if (Width >= IntSize) {
3323  // Hardware ignores high bits, so remove those.
3324  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3325  Width & (IntSize - 1)));
3326  return II;
3327  }
3328  }
3329 
3330  unsigned Offset;
3331  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3332  if (COffset) {
3333  Offset = COffset->getZExtValue();
3334  if (Offset >= IntSize) {
3335  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3336  Offset & (IntSize - 1)));
3337  return II;
3338  }
3339  }
3340 
3341  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3342 
3343  // TODO: Also emit sub if only width is constant.
3344  if (!CWidth && COffset && Offset == 0) {
3345  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3346  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3347  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3348 
3349  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3350  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3351  : Builder.CreateLShr(Shl, ShiftVal);
3352  RightShift->takeName(II);
3353  return replaceInstUsesWith(*II, RightShift);
3354  }
3355 
3356  if (!CWidth || !COffset)
3357  break;
3358 
3359  // TODO: This allows folding to undef when the hardware has specific
3360  // behavior?
3361  if (Offset + Width < IntSize) {
3362  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3363  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3364  : Builder.CreateLShr(Shl, IntSize - Width);
3365  RightShift->takeName(II);
3366  return replaceInstUsesWith(*II, RightShift);
3367  }
3368 
3369  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3370  : Builder.CreateLShr(Src, Offset);
3371 
3372  RightShift->takeName(II);
3373  return replaceInstUsesWith(*II, RightShift);
3374  }
3375  case Intrinsic::amdgcn_exp:
3376  case Intrinsic::amdgcn_exp_compr: {
3377  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3378  if (!En) // Illegal.
3379  break;
3380 
3381  unsigned EnBits = En->getZExtValue();
3382  if (EnBits == 0xf)
3383  break; // All inputs enabled.
3384 
3385  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3386  bool Changed = false;
3387  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3388  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3389  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3390  Value *Src = II->getArgOperand(I + 2);
3391  if (!isa<UndefValue>(Src)) {
3392  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3393  Changed = true;
3394  }
3395  }
3396  }
3397 
3398  if (Changed)
3399  return II;
3400 
3401  break;
3402 
3403  }
3404  case Intrinsic::amdgcn_fmed3: {
3405  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3406  // for the shader.
3407 
3408  Value *Src0 = II->getArgOperand(0);
3409  Value *Src1 = II->getArgOperand(1);
3410  Value *Src2 = II->getArgOperand(2);
3411 
3412  bool Swap = false;
3413  // Canonicalize constants to RHS operands.
3414  //
3415  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3416  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3417  std::swap(Src0, Src1);
3418  Swap = true;
3419  }
3420 
3421  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3422  std::swap(Src1, Src2);
3423  Swap = true;
3424  }
3425 
3426  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3427  std::swap(Src0, Src1);
3428  Swap = true;
3429  }
3430 
3431  if (Swap) {
3432  II->setArgOperand(0, Src0);
3433  II->setArgOperand(1, Src1);
3434  II->setArgOperand(2, Src2);
3435  return II;
3436  }
3437 
3438  if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3439  CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
3440  NewCall->copyFastMathFlags(II);
3441  NewCall->takeName(II);
3442  return replaceInstUsesWith(*II, NewCall);
3443  }
3444 
3445  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3446  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3447  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3448  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3449  C2->getValueAPF());
3450  return replaceInstUsesWith(*II,
3451  ConstantFP::get(Builder.getContext(), Result));
3452  }
3453  }
3454  }
3455 
3456  break;
3457  }
3458  case Intrinsic::amdgcn_icmp:
3459  case Intrinsic::amdgcn_fcmp: {
3460  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3461  if (!CC)
3462  break;
3463 
3464  // Guard against invalid arguments.
3465  int64_t CCVal = CC->getZExtValue();
3466  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3467  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3468  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3469  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3470  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3471  break;
3472 
3473  Value *Src0 = II->getArgOperand(0);
3474  Value *Src1 = II->getArgOperand(1);
3475 
3476  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3477  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3478  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3479  if (CCmp->isNullValue()) {
3480  return replaceInstUsesWith(
3481  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3482  }
3483 
3484  // The result of V_ICMP/V_FCMP assembly instructions (which this
3485  // intrinsic exposes) is one bit per thread, masked with the EXEC
3486  // register (which contains the bitmask of live threads). So a
3487  // comparison that always returns true is the same as a read of the
3488  // EXEC register.
3490  II->getModule(), Intrinsic::read_register, II->getType());
3491  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3492  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3493  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3494  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3497  NewCall->takeName(II);
3498  return replaceInstUsesWith(*II, NewCall);
3499  }
3500 
3501  // Canonicalize constants to RHS.
3502  CmpInst::Predicate SwapPred
3503  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3504  II->setArgOperand(0, Src1);
3505  II->setArgOperand(1, Src0);
3506  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3507  static_cast<int>(SwapPred)));
3508  return II;
3509  }
3510 
3511  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3512  break;
3513 
3514  // Canonicalize compare eq with true value to compare != 0
3515  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3516  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3517  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3518  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3519  Value *ExtSrc;
3520  if (CCVal == CmpInst::ICMP_EQ &&
3521  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3522  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3523  ExtSrc->getType()->isIntegerTy(1)) {
3524  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3525  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3526  return II;
3527  }
3528 
3529  CmpInst::Predicate SrcPred;
3530  Value *SrcLHS;
3531  Value *SrcRHS;
3532 
3533  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3534  // intrinsic. The typical use is a wave vote function in the library, which
3535  // will be fed from a user code condition compared with 0. Fold in the
3536  // redundant compare.
3537 
3538  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3539  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3540  //
3541  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3542  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3543  if (match(Src1, m_Zero()) &&
3544  match(Src0,
3545  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3546  if (CCVal == CmpInst::ICMP_EQ)
3547  SrcPred = CmpInst::getInversePredicate(SrcPred);
3548 
3549  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3550  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3551 
3552  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3553  SrcLHS->getType());
3554  Value *Args[] = { SrcLHS, SrcRHS,
3555  ConstantInt::get(CC->getType(), SrcPred) };
3556  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3557  NewCall->takeName(II);
3558  return replaceInstUsesWith(*II, NewCall);
3559  }
3560 
3561  break;
3562  }
3563  case Intrinsic::stackrestore: {
3564  // If the save is right next to the restore, remove the restore. This can
3565  // happen when variable allocas are DCE'd.
3566  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3567  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3568  if (&*++SS->getIterator() == II)
3569  return eraseInstFromFunction(CI);
3570  }
3571  }
3572 
3573  // Scan down this block to see if there is another stack restore in the
3574  // same block without an intervening call/alloca.
3575  BasicBlock::iterator BI(II);
3576  TerminatorInst *TI = II->getParent()->getTerminator();
3577  bool CannotRemove = false;
3578  for (++BI; &*BI != TI; ++BI) {
3579  if (isa<AllocaInst>(BI)) {
3580  CannotRemove = true;
3581  break;
3582  }
3583  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3584  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3585  // If there is a stackrestore below this one, remove this one.
3586  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3587  return eraseInstFromFunction(CI);
3588 
3589  // Bail if we cross over an intrinsic with side effects, such as
3590  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3591  if (II->mayHaveSideEffects()) {
3592  CannotRemove = true;
3593  break;
3594  }
3595  } else {
3596  // If we found a non-intrinsic call, we can't remove the stack
3597  // restore.
3598  CannotRemove = true;
3599  break;
3600  }
3601  }
3602  }
3603 
3604  // If the stack restore is in a return, resume, or unwind block and if there
3605  // are no allocas or calls between the restore and the return, nuke the
3606  // restore.
3607  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3608  return eraseInstFromFunction(CI);
3609  break;
3610  }
3611  case Intrinsic::lifetime_start:
3612  // Asan needs to poison memory to detect invalid access which is possible
3613  // even for empty lifetime range.
3614  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
3615  break;
3616 
3617  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3618  Intrinsic::lifetime_end, *this))
3619  return nullptr;
3620  break;
3621  case Intrinsic::assume: {
3622  Value *IIOperand = II->getArgOperand(0);
3623  // Remove an assume if it is immediately followed by an identical assume.
3624  if (match(II->getNextNode(),
3625  m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3626  return eraseInstFromFunction(CI);
3627 
3628  // Canonicalize assume(a && b) -> assume(a); assume(b);
3629  // Note: New assumption intrinsics created here are registered by
3630  // the InstCombineIRInserter object.
3631  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3632  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3633  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3634  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3635  return eraseInstFromFunction(*II);
3636  }
3637  // assume(!(a || b)) -> assume(!a); assume(!b);
3638  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3639  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3640  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3641  return eraseInstFromFunction(*II);
3642  }
3643 
3644  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3645  // (if assume is valid at the load)
3646  CmpInst::Predicate Pred;
3647  Instruction *LHS;
3648  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3649  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3650  LHS->getType()->isPointerTy() &&
3651  isValidAssumeForContext(II, LHS, &DT)) {
3652  MDNode *MD = MDNode::get(II->getContext(), None);
3654  return eraseInstFromFunction(*II);
3655 
3656  // TODO: apply nonnull return attributes to calls and invokes
3657  // TODO: apply range metadata for range check patterns?
3658  }
3659 
3660  // If there is a dominating assume with the same condition as this one,
3661  // then this one is redundant, and should be removed.
3662  KnownBits Known(1);
3663  computeKnownBits(IIOperand, Known, 0, II);
3664  if (Known.isAllOnes())
3665  return eraseInstFromFunction(*II);
3666 
3667  // Update the cache of affected values for this assumption (we might be
3668  // here because we just simplified the condition).
3669  AC.updateAffectedValues(II);
3670  break;
3671  }
3672  case Intrinsic::experimental_gc_relocate: {
3673  // Translate facts known about a pointer before relocating into
3674  // facts about the relocate value, while being careful to
3675  // preserve relocation semantics.
3676  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3677 
3678  // Remove the relocation if unused, note that this check is required
3679  // to prevent the cases below from looping forever.
3680  if (II->use_empty())
3681  return eraseInstFromFunction(*II);
3682 
3683  // Undef is undef, even after relocation.
3684  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3685  // most practical collectors, but there was discussion in the review thread
3686  // about whether it was legal for all possible collectors.
3687  if (isa<UndefValue>(DerivedPtr))
3688  // Use undef of gc_relocate's type to replace it.
3689  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3690 
3691  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3692  // The relocation of null will be null for most any collector.
3693  // TODO: provide a hook for this in GCStrategy. There might be some
3694  // weird collector this property does not hold for.
3695  if (isa<ConstantPointerNull>(DerivedPtr))
3696  // Use null-pointer of gc_relocate's type to replace it.
3697  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3698 
3699  // isKnownNonNull -> nonnull attribute
3700  if (isKnownNonNullAt(DerivedPtr, II, &DT))
3701  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3702  }
3703 
3704  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3705  // Canonicalize on the type from the uses to the defs
3706 
3707  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3708  break;
3709  }
3710 
3711  case Intrinsic::experimental_guard: {
3712  // Is this guard followed by another guard?
3713  Instruction *NextInst = II->getNextNode();
3714  Value *NextCond = nullptr;
3715  if (match(NextInst,
3716  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3717  Value *CurrCond = II->getArgOperand(0);
3718 
3719  // Remove a guard that it is immediately preceded by an identical guard.
3720  if (CurrCond == NextCond)
3721  return eraseInstFromFunction(*NextInst);
3722 
3723  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3724  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3725  return eraseInstFromFunction(*NextInst);
3726  }
3727  break;
3728  }
3729  }
3730  return visitCallSite(II);
3731 }
3732 
3733 // Fence instruction simplification
3735  // Remove identical consecutive fences.
3736  if (auto *NFI = dyn_cast<FenceInst>(FI.getNextNode()))
3737  if (FI.isIdenticalTo(NFI))
3738  return eraseInstFromFunction(FI);
3739  return nullptr;
3740 }
3741 
3742 // InvokeInst simplification
3743 //
3745  return visitCallSite(&II);
3746 }
3747 
3748 /// If this cast does not affect the value passed through the varargs area, we
3749 /// can eliminate the use of the cast.
3751  const DataLayout &DL,
3752  const CastInst *const CI,
3753  const int ix) {
3754  if (!CI->isLosslessCast())
3755  return false;
3756 
3757  // If this is a GC intrinsic, avoid munging types. We need types for
3758  // statepoint reconstruction in SelectionDAG.
3759  // TODO: This is probably something which should be expanded to all
3760  // intrinsics since the entire point of intrinsics is that
3761  // they are understandable by the optimizer.
3762  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3763  return false;
3764 
3765  // The size of ByVal or InAlloca arguments is derived from the type, so we
3766  // can't change to a type with a different size. If the size were
3767  // passed explicitly we could avoid this check.
3768  if (!CS.isByValOrInAllocaArgument(ix))
3769  return true;
3770 
3771  Type* SrcTy =
3772  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3773  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3774  if (!SrcTy->isSized() || !DstTy->isSized())
3775  return false;
3776  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3777  return false;
3778  return true;
3779 }
3780 
3781 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3782  if (!CI->getCalledFunction()) return nullptr;
3783 
3784  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3785  replaceInstUsesWith(*From, With);
3786  };
3787  LibCallSimplifier Simplifier(DL, &TLI, InstCombineRAUW);
3788  if (Value *With = Simplifier.optimizeCall(CI)) {
3789  ++NumSimplified;
3790  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3791  }
3792 
3793  return nullptr;
3794 }
3795 
3797  // Strip off at most one level of pointer casts, looking for an alloca. This
3798  // is good enough in practice and simpler than handling any number of casts.
3799  Value *Underlying = TrampMem->stripPointerCasts();
3800  if (Underlying != TrampMem &&
3801  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3802  return nullptr;
3803  if (!isa<AllocaInst>(Underlying))
3804  return nullptr;
3805 
3806  IntrinsicInst *InitTrampoline = nullptr;
3807  for (User *U : TrampMem->users()) {
3809  if (!II)
3810  return nullptr;
3811  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3812  if (InitTrampoline)
3813  // More than one init_trampoline writes to this value. Give up.
3814  return nullptr;
3815  InitTrampoline = II;
3816  continue;
3817  }
3818  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3819  // Allow any number of calls to adjust.trampoline.
3820  continue;
3821  return nullptr;
3822  }
3823 
3824  // No call to init.trampoline found.
3825  if (!InitTrampoline)
3826  return nullptr;
3827 
3828  // Check that the alloca is being used in the expected way.
3829  if (InitTrampoline->getOperand(0) != TrampMem)
3830  return nullptr;
3831 
3832  return InitTrampoline;
3833 }
3834 
3836  Value *TrampMem) {
3837  // Visit all the previous instructions in the basic block, and try to find a
3838  // init.trampoline which has a direct path to the adjust.trampoline.
3839  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3840  E = AdjustTramp->getParent()->begin();
3841  I != E;) {
3842  Instruction *Inst = &*--I;
3843  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
3844  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3845  II->getOperand(0) == TrampMem)
3846  return II;
3847  if (Inst->mayWriteToMemory())
3848  return nullptr;
3849  }
3850  return nullptr;
3851 }
3852 
3853 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3854 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3855 // to a direct call to a function. Otherwise return NULL.
3856 //
3858  Callee = Callee->stripPointerCasts();
3859  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
3860  if (!AdjustTramp ||
3861  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3862  return nullptr;
3863 
3864  Value *TrampMem = AdjustTramp->getOperand(0);
3865 
3867  return IT;
3868  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3869  return IT;
3870  return nullptr;
3871 }
3872 
3873 /// Improvements for call and invoke instructions.
3874 Instruction *InstCombiner::visitCallSite(CallSite CS) {
3875  if (isAllocLikeFn(CS.getInstruction(), &TLI))
3876  return visitAllocSite(*CS.getInstruction());
3877 
3878  bool Changed = false;
3879 
3880  // Mark any parameters that are known to be non-null with the nonnull
3881  // attribute. This is helpful for inlining calls to functions with null
3882  // checks on their arguments.
3883  SmallVector<unsigned, 4> ArgNos;
3884  unsigned ArgNo = 0;
3885 
3886  for (Value *V : CS.args()) {
3887  if (V->getType()->isPointerTy() &&
3888  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
3889  isKnownNonNullAt(V, CS.getInstruction(), &DT))
3890  ArgNos.push_back(ArgNo);
3891  ArgNo++;
3892  }
3893 
3894  assert(ArgNo == CS.arg_size() && "sanity check");
3895 
3896  if (!ArgNos.empty()) {
3898  LLVMContext &Ctx = CS.getInstruction()->getContext();
3899  AS = AS.addParamAttribute(Ctx, ArgNos,
3900  Attribute::get(Ctx, Attribute::NonNull));
3901  CS.setAttributes(AS);
3902  Changed = true;
3903  }
3904 
3905  // If the callee is a pointer to a function, attempt to move any casts to the
3906  // arguments of the call/invoke.
3907  Value *Callee = CS.getCalledValue();
3908  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
3909  return nullptr;
3910 
3911  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
3912  // Remove the convergent attr on calls when the callee is not convergent.
3913  if (CS.isConvergent() && !CalleeF->isConvergent() &&
3914  !CalleeF->isIntrinsic()) {
3915  DEBUG(dbgs() << "Removing convergent attr from instr "
3916  << CS.getInstruction() << "\n");
3917  CS.setNotConvergent();
3918  return CS.getInstruction();
3919  }
3920 
3921  // If the call and callee calling conventions don't match, this call must
3922  // be unreachable, as the call is undefined.
3923  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
3924  // Only do this for calls to a function with a body. A prototype may
3925  // not actually end up matching the implementation's calling conv for a
3926  // variety of reasons (e.g. it may be written in assembly).
3927  !CalleeF->isDeclaration()) {
3928  Instruction *OldCall = CS.getInstruction();
3929  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3931  OldCall);
3932  // If OldCall does not return void then replaceAllUsesWith undef.
3933  // This allows ValueHandlers and custom metadata to adjust itself.
3934  if (!OldCall->getType()->isVoidTy())
3935  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
3936  if (isa<CallInst>(OldCall))
3937  return eraseInstFromFunction(*OldCall);
3938 
3939  // We cannot remove an invoke, because it would change the CFG, just
3940  // change the callee to a null pointer.
3941  cast<InvokeInst>(OldCall)->setCalledFunction(
3942  Constant::getNullValue(CalleeF->getType()));
3943  return nullptr;
3944  }
3945  }
3946 
3947  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
3948  // If CS does not return void then replaceAllUsesWith undef.
3949  // This allows ValueHandlers and custom metadata to adjust itself.
3950  if (!CS.getInstruction()->getType()->isVoidTy())
3951  replaceInstUsesWith(*CS.getInstruction(),
3953 
3954  if (isa<InvokeInst>(CS.getInstruction())) {
3955  // Can't remove an invoke because we cannot change the CFG.
3956  return nullptr;
3957  }
3958 
3959  // This instruction is not reachable, just remove it. We insert a store to
3960  // undef so that we know that this code is not reachable, despite the fact
3961  // that we can't modify the CFG here.
3962  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3964  CS.getInstruction());
3965 
3966  return eraseInstFromFunction(*CS.getInstruction());
3967  }
3968 
3969  if (IntrinsicInst *II = findInitTrampoline(Callee))
3970  return transformCallThroughTrampoline(CS, II);
3971 
3972  PointerType *PTy = cast<PointerType>(Callee->getType());
3973  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
3974  if (FTy->isVarArg()) {
3975  int ix = FTy->getNumParams();
3976  // See if we can optimize any arguments passed through the varargs area of
3977  // the call.
3978  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
3979  E = CS.arg_end(); I != E; ++I, ++ix) {
3980  CastInst *CI = dyn_cast<CastInst>(*I);
3981  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
3982  *I = CI->getOperand(0);
3983  Changed = true;
3984  }
3985  }
3986  }
3987 
3988  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
3989  // Inline asm calls cannot throw - mark them 'nounwind'.
3990  CS.setDoesNotThrow();
3991  Changed = true;
3992  }
3993 
3994  // Try to optimize the call if possible, we require DataLayout for most of
3995  // this. None of these calls are seen as possibly dead so go ahead and
3996  // delete the instruction now.
3997  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
3998  Instruction *I = tryOptimizeCall(CI);
3999  // If we changed something return the result, etc. Otherwise let
4000  // the fallthrough check.
4001  if (I) return eraseInstFromFunction(*I);
4002  }
4003 
4004  return Changed ? CS.getInstruction() : nullptr;
4005 }
4006 
4007 /// If the callee is a constexpr cast of a function, attempt to move the cast to
4008 /// the arguments of the call/invoke.
4009 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4010  auto *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
4011  if (!Callee)
4012  return false;
4013 
4014  // The prototype of a thunk is a lie. Don't directly call such a function.
4015  if (Callee->hasFnAttribute("thunk"))
4016  return false;
4017 
4018  Instruction *Caller = CS.getInstruction();
4019  const AttributeList &CallerPAL = CS.getAttributes();
4020 
4021  // Okay, this is a cast from a function to a different type. Unless doing so
4022  // would cause a type conversion of one of our arguments, change this call to
4023  // be a direct call with arguments casted to the appropriate types.
4024  //
4025  FunctionType *FT = Callee->getFunctionType();
4026  Type *OldRetTy = Caller->getType();
4027  Type *NewRetTy = FT->getReturnType();
4028 
4029  // Check to see if we are changing the return type...
4030  if (OldRetTy != NewRetTy) {
4031 
4032  if (NewRetTy->isStructTy())
4033  return false; // TODO: Handle multiple return values.
4034 
4035  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4036  if (Callee->isDeclaration())
4037  return false; // Cannot transform this return value.
4038 
4039  if (!Caller->use_empty() &&
4040  // void -> non-void is handled specially
4041  !NewRetTy->isVoidTy())
4042  return false; // Cannot transform this return value.
4043  }
4044 
4045  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4046  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4047  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4048  return false; // Attribute not compatible with transformed value.
4049  }
4050 
4051  // If the callsite is an invoke instruction, and the return value is used by
4052  // a PHI node in a successor, we cannot change the return type of the call
4053  // because there is no place to put the cast instruction (without breaking
4054  // the critical edge). Bail out in this case.
4055  if (!Caller->use_empty())
4056  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4057  for (User *U : II->users())
4058  if (PHINode *PN = dyn_cast<PHINode>(U))
4059  if (PN->getParent() == II->getNormalDest() ||
4060  PN->getParent() == II->getUnwindDest())
4061  return false;
4062  }
4063 
4064  unsigned NumActualArgs = CS.arg_size();
4065  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4066 
4067  // Prevent us turning:
4068  // declare void @takes_i32_inalloca(i32* inalloca)
4069  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4070  //
4071  // into:
4072  // call void @takes_i32_inalloca(i32* null)
4073  //
4074  // Similarly, avoid folding away bitcasts of byval calls.
4075  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4076  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4077  return false;
4078 
4080  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4081  Type *ParamTy = FT->getParamType(i);
4082  Type *ActTy = (*AI)->getType();
4083 
4084  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4085  return false; // Cannot transform this parameter value.
4086 
4087  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4088  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4089  return false; // Attribute not compatible with transformed value.
4090 
4091  if (CS.isInAllocaArgument(i))
4092  return false; // Cannot transform to and from inalloca.
4093 
4094  // If the parameter is passed as a byval argument, then we have to have a
4095  // sized type and the sized type has to have the same size as the old type.
4096  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4097  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4098  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4099  return false;
4100 
4101  Type *CurElTy = ActTy->getPointerElementType();
4102  if (DL.getTypeAllocSize(CurElTy) !=
4103  DL.getTypeAllocSize(ParamPTy->getElementType()))
4104  return false;
4105  }
4106  }
4107 
4108  if (Callee->isDeclaration()) {
4109  // Do not delete arguments unless we have a function body.
4110  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4111  return false;
4112 
4113  // If the callee is just a declaration, don't change the varargsness of the
4114  // call. We don't want to introduce a varargs call where one doesn't
4115  // already exist.
4116  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4117  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4118  return false;
4119 
4120  // If both the callee and the cast type are varargs, we still have to make
4121  // sure the number of fixed parameters are the same or we have the same
4122  // ABI issues as if we introduce a varargs call.
4123  if (FT->isVarArg() &&
4124  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4125  FT->getNumParams() !=
4126  cast<FunctionType>(APTy->getElementType())->getNumParams())
4127  return false;
4128  }
4129 
4130  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4131  !CallerPAL.isEmpty()) {
4132  // In this case we have more arguments than the new function type, but we
4133  // won't be dropping them. Check that these extra arguments have attributes
4134  // that are compatible with being a vararg call argument.
4135  unsigned SRetIdx;
4136  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4137  SRetIdx > FT->getNumParams())
4138  return false;
4139  }
4140 
4141  // Okay, we decided that this is a safe thing to do: go ahead and start
4142  // inserting cast instructions as necessary.
4145  Args.reserve(NumActualArgs);
4146  ArgAttrs.reserve(NumActualArgs);
4147 
4148  // Get any return attributes.
4149  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4150 
4151  // If the return value is not being used, the type may not be compatible
4152  // with the existing attributes. Wipe out any problematic attributes.
4153  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4154 
4155  AI = CS.arg_begin();
4156  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4157  Type *ParamTy = FT->getParamType(i);
4158 
4159  Value *NewArg = *AI;
4160  if ((*AI)->getType() != ParamTy)
4161  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4162  Args.push_back(NewArg);
4163 
4164  // Add any parameter attributes.
4165  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4166  }
4167 
4168  // If the function takes more arguments than the call was taking, add them
4169  // now.
4170  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4172  ArgAttrs.push_back(AttributeSet());
4173  }
4174 
4175  // If we are removing arguments to the function, emit an obnoxious warning.
4176  if (FT->getNumParams() < NumActualArgs) {
4177  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4178  if (FT->isVarArg()) {
4179  // Add all of the arguments in their promoted form to the arg list.
4180  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4181  Type *PTy = getPromotedType((*AI)->getType());
4182  Value *NewArg = *AI;
4183  if (PTy != (*AI)->getType()) {
4184  // Must promote to pass through va_arg area!
4185  Instruction::CastOps opcode =
4186  CastInst::getCastOpcode(*AI, false, PTy, false);
4187  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4188  }
4189  Args.push_back(NewArg);
4190 
4191  // Add any parameter attributes.
4192  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4193  }
4194  }
4195  }
4196 
4197  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4198 
4199  if (NewRetTy->isVoidTy())
4200  Caller->setName(""); // Void type should not have a name.
4201 
4202  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4203  "missing argument attributes");
4204  LLVMContext &Ctx = Callee->getContext();
4205  AttributeList NewCallerPAL = AttributeList::get(
4206  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4207 
4209  CS.getOperandBundlesAsDefs(OpBundles);
4210 
4211  CallSite NewCS;
4212  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4213  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4214  II->getUnwindDest(), Args, OpBundles);
4215  } else {
4216  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4217  cast<CallInst>(NewCS.getInstruction())
4218  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4219  }
4220  NewCS->takeName(Caller);
4221  NewCS.setCallingConv(CS.getCallingConv());
4222  NewCS.setAttributes(NewCallerPAL);
4223 
4224  // Preserve the weight metadata for the new call instruction. The metadata
4225  // is used by SamplePGO to check callsite's hotness.
4226  uint64_t W;
4227  if (Caller->extractProfTotalWeight(W))
4228  NewCS->setProfWeight(W);
4229 
4230  // Insert a cast of the return type as necessary.
4231  Instruction *NC = NewCS.getInstruction();
4232  Value *NV = NC;
4233  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4234  if (!NV->getType()->isVoidTy()) {
4235  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4236  NC->setDebugLoc(Caller->getDebugLoc());
4237 
4238  // If this is an invoke instruction, we should insert it after the first
4239  // non-phi, instruction in the normal successor block.
4240  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4241  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4242  InsertNewInstBefore(NC, *I);
4243  } else {
4244  // Otherwise, it's a call, just insert cast right after the call.
4245  InsertNewInstBefore(NC, *Caller);
4246  }
4247  Worklist.AddUsersToWorkList(*Caller);
4248  } else {
4249  NV = UndefValue::get(Caller->getType());
4250  }
4251  }
4252 
4253  if (!Caller->use_empty())
4254  replaceInstUsesWith(*Caller, NV);
4255  else if (Caller->hasValueHandle()) {
4256  if (OldRetTy == NV->getType())
4257  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4258  else
4259  // We cannot call ValueIsRAUWd with a different type, and the
4260  // actual tracked value will disappear.
4262  }
4263 
4264  eraseInstFromFunction(*Caller);
4265  return true;
4266 }
4267 
4268 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4269 /// intrinsic pair into a direct call to the underlying function.
4270 Instruction *
4271 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4272  IntrinsicInst *Tramp) {
4273  Value *Callee = CS.getCalledValue();
4274  PointerType *PTy = cast<PointerType>(Callee->getType());
4275  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4277 
4278  // If the call already has the 'nest' attribute somewhere then give up -
4279  // otherwise 'nest' would occur twice after splicing in the chain.
4280  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4281  return nullptr;
4282 
4283  assert(Tramp &&
4284  "transformCallThroughTrampoline called with incorrect CallSite.");
4285 
4286  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4287  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4288 
4289  AttributeList NestAttrs = NestF->getAttributes();
4290  if (!NestAttrs.isEmpty()) {
4291  unsigned NestArgNo = 0;
4292  Type *NestTy = nullptr;
4293  AttributeSet NestAttr;
4294 
4295  // Look for a parameter marked with the 'nest' attribute.
4296  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4297  E = NestFTy->param_end();
4298  I != E; ++NestArgNo, ++I) {
4299  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4300  if (AS.hasAttribute(Attribute::Nest)) {
4301  // Record the parameter type and any other attributes.
4302  NestTy = *I;
4303  NestAttr = AS;
4304  break;
4305  }
4306  }
4307 
4308  if (NestTy) {
4309  Instruction *Caller = CS.getInstruction();
4310  std::vector<Value*> NewArgs;
4311  std::vector<AttributeSet> NewArgAttrs;
4312  NewArgs.reserve(CS.arg_size() + 1);
4313  NewArgAttrs.reserve(CS.arg_size());
4314 
4315  // Insert the nest argument into the call argument list, which may
4316  // mean appending it. Likewise for attributes.
4317 
4318  {
4319  unsigned ArgNo = 0;
4320  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4321  do {
4322  if (ArgNo == NestArgNo) {
4323  // Add the chain argument and attributes.
4324  Value *NestVal = Tramp->getArgOperand(2);
4325  if (NestVal->getType() != NestTy)
4326  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4327  NewArgs.push_back(NestVal);
4328  NewArgAttrs.push_back(NestAttr);
4329  }
4330 
4331  if (I == E)
4332  break;
4333 
4334  // Add the original argument and attributes.
4335  NewArgs.push_back(*I);
4336  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4337 
4338  ++ArgNo;
4339  ++I;
4340  } while (true);
4341  }
4342 
4343  // The trampoline may have been bitcast to a bogus type (FTy).
4344  // Handle this by synthesizing a new function type, equal to FTy
4345  // with the chain parameter inserted.
4346 
4347  std::vector<Type*> NewTypes;
4348  NewTypes.reserve(FTy->getNumParams()+1);
4349 
4350  // Insert the chain's type into the list of parameter types, which may
4351  // mean appending it.
4352  {
4353  unsigned ArgNo = 0;
4354  FunctionType::param_iterator I = FTy->param_begin(),
4355  E = FTy->param_end();
4356 
4357  do {
4358  if (ArgNo == NestArgNo)
4359  // Add the chain's type.
4360  NewTypes.push_back(NestTy);
4361 
4362  if (I == E)
4363  break;
4364 
4365  // Add the original type.
4366  NewTypes.push_back(*I);
4367 
4368  ++ArgNo;
4369  ++I;
4370  } while (true);
4371  }
4372 
4373  // Replace the trampoline call with a direct call. Let the generic
4374  // code sort out any function type mismatches.
4375  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4376  FTy->isVarArg());
4377  Constant *NewCallee =
4378  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4379  NestF : ConstantExpr::getBitCast(NestF,
4380  PointerType::getUnqual(NewFTy));
4381  AttributeList NewPAL =
4382  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4383  Attrs.getRetAttributes(), NewArgAttrs);
4384 
4386  CS.getOperandBundlesAsDefs(OpBundles);
4387 
4388  Instruction *NewCaller;
4389  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4390  NewCaller = InvokeInst::Create(NewCallee,
4391  II->getNormalDest(), II->getUnwindDest(),
4392  NewArgs, OpBundles);
4393  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4394  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4395  } else {
4396  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4397  cast<CallInst>(NewCaller)->setTailCallKind(
4398  cast<CallInst>(Caller)->getTailCallKind());
4399  cast<CallInst>(NewCaller)->setCallingConv(
4400  cast<CallInst>(Caller)->getCallingConv());
4401  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4402  }
4403 
4404  return NewCaller;
4405  }
4406  }
4407 
4408  // Replace the trampoline call with a direct call. Since there is no 'nest'
4409  // parameter, there is no need to adjust the argument list. Let the generic
4410  // code sort out any function type mismatches.
4411  Constant *NewCallee =
4412  NestF->getType() == PTy ? NestF :
4413  ConstantExpr::getBitCast(NestF, PTy);
4414  CS.setCalledFunction(NewCallee);
4415  return CS.getInstruction();
4416 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:951
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:735
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:550
uint64_t CallInst * C
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:210
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
void setDoesNotThrow()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:824
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1634
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1128
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1034
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions that feed it, giving the original input.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:555
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:216
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:309
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2645
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:188
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:577
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
An instruction for ordering other memory operations.
Definition: Instructions.h:440
match_zero m_Zero()
Match an arbitrary zero/null constant.
Definition: PatternMatch.h:145
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:446
Instruction * visitVACopyInst(VACopyInst &I)
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1237
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:641
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:239
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:212
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:481
unsigned less or equal
Definition: InstrTypes.h:886
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:885
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:697
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class wraps the llvm.memset intrinsic.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:818
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:818
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1386
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:862
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1140
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:496
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:883
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1832
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:168
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
fneg_match< LHS > m_FNeg(const LHS &L)
Match a floating point negate.
void reserve(size_type N)
Definition: SmallVector.h:380
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:528
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:138
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1518
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Type * getPointerElementType() const
Definition: Type.h:373
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:958
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getNumArgOperands() const
Return the number of call arguments.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:981
This class wraps the llvm.memmove intrinsic.
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:398
Value * SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const SimplifyQuery &Q)
Given a function and iterators over arguments, fold the result or return null.
IterTy arg_end() const
Definition: CallSite.h:549
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:888
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:664
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
bool hasUnsafeAlgebra() const
Determine whether the unsafe-algebra flag is set.
InstrTy * getInstruction() const
Definition: CallSite.h:89
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1556
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:284
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:966
This file implements a class to represent arbitrary precision integral constant values and operations...
not_match< LHS > m_Not(const LHS &L)
Definition: PatternMatch.h:961
All zero aggregate value.
Definition: Constants.h:332
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:97
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:193
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1444
bool isInfinity() const
Definition: APFloat.h:1129
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
#define F(x, y, z)
Definition: MD5.cpp:55
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1405
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
Definition: PatternMatch.h:931
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4441
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:900
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:374
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:190
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:1835
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:121
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:197
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:306
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1328
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
Definition: PatternMatch.h:845
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:877
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1383
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:290
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:408
static Value * simplifyX86movmsk(const IntrinsicInst &II)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:975
This class represents a truncation of integer types.
static unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:188
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2271
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
static Value * simplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
The shuffle mask for a perm2*128 selects any two halves of two 256-bit source vectors, unless a zero bit is set.
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:571
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:277
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1678
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:330
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:404
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
const Value * getCalledValue() const
Get a pointer to the function that is invoked by this instruction.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:501
bool isNegative() const
Definition: APFloat.h:1132
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:277
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1306
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1045
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to .objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:556
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:288
bool isNaN() const
Definition: APFloat.h:1130
This is an important base class in LLVM.
Definition: Constant.h:42
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1689
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
static cl::opt< unsigned > UnfoldElementAtomicMemcpyMaxElements("unfold-element-atomic-memcpy-max-elements", cl::init(16), cl::desc("Maximum number of elements in atomic memcpy the optimizer is " "allowed to unfold"))
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
#define A
Definition: LargeTest.cpp:12
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:358
unsigned getParamAlignment(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
This file declares a class to represent arbitrary precision floating point values and provide a varie...
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:860
static const unsigned End
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:907
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:313
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:443
void setNotConvergent()
Definition: CallSite.h:501
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:61
void setAlignment(unsigned Align)
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:527
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1223
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:933
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:959
static Value * simplifyX86muldq(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
signed greater than
Definition: InstrTypes.h:887
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:244
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
bool doesNotThrow() const
Determine if the call cannot unwind.
const APFloat & getValueAPF() const
Definition: Constants.h:294
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:894
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:444
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:178
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
#define E
Definition: LargeTest.cpp:27
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
#define B
Definition: LargeTest.cpp:24
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:545
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:482
signed less than
Definition: InstrTypes.h:889
Type * getReturnType() const
Definition: DerivedTypes.h:124
const size_t N
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:373
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1190
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1736
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:574
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:623
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:353
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1272
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
bool isDenormal() const
Definition: APFloat.h:1133
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
This class represents atomic memcpy intrinsic TODO: Integrate this class into MemIntrinsic hierarchy;...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:890
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:395
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1008
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:333
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:405
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:383
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:364
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:529
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
void setCalledFunction(Value *Fn)
Set the function called.
This class wraps the llvm.memcpy/memmove intrinsics.
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:280
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:884
match_one m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:194
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:556
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:479
void setArgOperand(unsigned i, Value *v)
bool isNormal() const
Definition: APFloat.h:1136
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:262
bool isKnownNonNullAt(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if this pointer couldn&#39;t possibly be null.
static IntrinsicInst * findInitTrampoline(Value *Callee)
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:582
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:81
AssumptionCache & getAssumptionCache() const
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:449
static PointerType * getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS=0)
Definition: Type.cpp:212
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
iterator_range< op_iterator > arg_operands()
Iteration adapter for range-for loops.
static Value * emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, InstCombiner::BuilderTy &Builder)
This represents the llvm.va_copy intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:538
match_all_ones m_AllOnes()
Match an integer or vector with all bits set to true.
Definition: PatternMatch.h:205
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1182
static Instruction * foldCtpop(IntrinsicInst &II, InstCombiner &IC)
Value * getLength() const
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Value * getRawSource() const
Return the arguments to the instruction.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
AttributeSet getFnAttributes() const
The function attributes are returned.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:262
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1027
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:148
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if the given value is known to be non-zero when defined.
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:408
unsigned greater than
Definition: InstrTypes.h:883
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:974
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:326
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2294
bool isConvergent() const
Determine if the call is convergent.
Definition: CallSite.h:495
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
int * Ptr
match_nan m_NaN()
Match an arbitrary NaN constant. This includes quiet and signalling nans.
Definition: PatternMatch.h:183
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2137
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This class represents an extension of floating point types.
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:646
Root of the metadata hierarchy.
Definition: Metadata.h:58
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
void setCalledFunction(Value *V)
Set the callee to the specified value.
Definition: CallSite.h:123
bool isSignaling() const
Definition: APFloat.h:1134
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
bool use_empty() const
Definition: Value.h:322
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:984
Type * getElementType() const
Definition: DerivedTypes.h:486
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1212
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:260
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:335
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
Definition: Attributes.cpp:868
bool isLosslessCast() const
A lossless cast is one that does not alter the basic value.
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:399
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:215
signed greater or equal
Definition: InstrTypes.h:888
User * user_back()
Definition: Value.h:381
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1102
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:813
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1659