LLVM  7.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/Twine.h"
28 #include "llvm/IR/Attributes.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/Constant.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/Metadata.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Statepoint.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/User.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/IR/ValueHandle.h"
51 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Compiler.h"
54 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/KnownBits.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstring>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 using namespace PatternMatch;
71 
72 #define DEBUG_TYPE "instcombine"
73 
74 STATISTIC(NumSimplified, "Number of library calls simplified");
75 
77  "unfold-element-atomic-memcpy-max-elements",
78  cl::init(16),
79  cl::desc("Maximum number of elements in atomic memcpy the optimizer is "
80  "allowed to unfold"));
81 
82 /// Return the specified type promoted as it would be to pass though a va_arg
83 /// area.
84 static Type *getPromotedType(Type *Ty) {
85  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
86  if (ITy->getBitWidth() < 32)
87  return Type::getInt32Ty(Ty->getContext());
88  }
89  return Ty;
90 }
91 
92 /// Return a constant boolean vector that has true elements in all positions
93 /// where the input constant data vector has an element with the sign bit set.
96  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
97  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
98  Constant *Elt = V->getElementAsConstant(I);
99  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
100  "Unexpected constant data vector element type");
101  bool Sign = V->getElementType()->isIntegerTy()
102  ? cast<ConstantInt>(Elt)->isNegative()
103  : cast<ConstantFP>(Elt)->isNegative();
104  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
105  }
106  return ConstantVector::get(BoolVec);
107 }
108 
109 Instruction *
110 InstCombiner::SimplifyElementUnorderedAtomicMemCpy(AtomicMemCpyInst *AMI) {
111  // Try to unfold this intrinsic into sequence of explicit atomic loads and
112  // stores.
113  // First check that number of elements is compile time constant.
114  auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength());
115  if (!LengthCI)
116  return nullptr;
117 
118  // Check that there are not too many elements.
119  uint64_t LengthInBytes = LengthCI->getZExtValue();
120  uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes();
121  uint64_t NumElements = LengthInBytes / ElementSizeInBytes;
122  if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
123  return nullptr;
124 
125  // Only expand if there are elements to copy.
126  if (NumElements > 0) {
127  // Don't unfold into illegal integers
128  uint64_t ElementSizeInBits = ElementSizeInBytes * 8;
129  if (!getDataLayout().isLegalInteger(ElementSizeInBits))
130  return nullptr;
131 
132  // Cast source and destination to the correct type. Intrinsic input
133  // arguments are usually represented as i8*. Often operands will be
134  // explicitly casted to i8* and we can just strip those casts instead of
135  // inserting new ones. However it's easier to rely on other InstCombine
136  // rules which will cover trivial cases anyway.
137  Value *Src = AMI->getRawSource();
138  Value *Dst = AMI->getRawDest();
139  Type *ElementPointerType =
140  Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
141  Src->getType()->getPointerAddressSpace());
142 
143  Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType,
144  "memcpy_unfold.src_casted");
145  Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType,
146  "memcpy_unfold.dst_casted");
147 
148  for (uint64_t i = 0; i < NumElements; ++i) {
149  // Get current element addresses
150  ConstantInt *ElementIdxCI =
151  ConstantInt::get(AMI->getContext(), APInt(64, i));
152  Value *SrcElementAddr =
153  Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
154  Value *DstElementAddr =
155  Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
156 
157  // Load from the source. Transfer alignment information and mark load as
158  // unordered atomic.
159  LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val");
161  // We know alignment of the first element. It is also guaranteed by the
162  // verifier that element size is less or equal than first element
163  // alignment and both of this values are powers of two. This means that
164  // all subsequent accesses are at least element size aligned.
165  // TODO: We can infer better alignment but there is no evidence that this
166  // will matter.
167  Load->setAlignment(i == 0 ? AMI->getParamAlignment(1)
168  : ElementSizeInBytes);
169  Load->setDebugLoc(AMI->getDebugLoc());
170 
171  // Store loaded value via unordered atomic store.
172  StoreInst *Store = Builder.CreateStore(Load, DstElementAddr);
174  Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
175  : ElementSizeInBytes);
176  Store->setDebugLoc(AMI->getDebugLoc());
177  }
178  }
179 
180  // Set the number of elements of the copy to 0, it will be deleted on the
181  // next iteration.
182  AMI->setLength(Constant::getNullValue(LengthCI->getType()));
183  return AMI;
184 }
185 
186 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
187  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
188  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
189  unsigned MinAlign = std::min(DstAlign, SrcAlign);
190  unsigned CopyAlign = MI->getAlignment();
191 
192  if (CopyAlign < MinAlign) {
193  MI->setAlignment(MinAlign);
194  return MI;
195  }
196 
197  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
198  // load/store.
199  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
200  if (!MemOpLength) return nullptr;
201 
202  // Source and destination pointer types are always "i8*" for intrinsic. See
203  // if the size is something we can handle with a single primitive load/store.
204  // A single load+store correctly handles overlapping memory in the memmove
205  // case.
206  uint64_t Size = MemOpLength->getLimitedValue();
207  assert(Size && "0-sized memory transferring should be removed already.");
208 
209  if (Size > 8 || (Size&(Size-1)))
210  return nullptr; // If not 1/2/4/8 bytes, exit.
211 
212  // Use an integer load+store unless we can find something better.
213  unsigned SrcAddrSp =
214  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
215  unsigned DstAddrSp =
216  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
217 
218  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
219  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
220  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
221 
222  // If the memcpy has metadata describing the members, see if we can get the
223  // TBAA tag describing our copy.
224  MDNode *CopyMD = nullptr;
225  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
226  if (M->getNumOperands() == 3 && M->getOperand(0) &&
227  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
228  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
229  M->getOperand(1) &&
230  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
231  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
232  Size &&
233  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
234  CopyMD = cast<MDNode>(M->getOperand(2));
235  }
236 
237  // If the memcpy/memmove provides better alignment info than we can
238  // infer, use it.
239  SrcAlign = std::max(SrcAlign, CopyAlign);
240  DstAlign = std::max(DstAlign, CopyAlign);
241 
242  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
243  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
244  LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile());
245  L->setAlignment(SrcAlign);
246  if (CopyMD)
247  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
248  MDNode *LoopMemParallelMD =
250  if (LoopMemParallelMD)
252 
253  StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile());
254  S->setAlignment(DstAlign);
255  if (CopyMD)
256  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
257  if (LoopMemParallelMD)
259 
260  // Set the size of the copy to 0, it will be deleted on the next iteration.
261  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
262  return MI;
263 }
264 
265 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
266  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
267  if (MI->getAlignment() < Alignment) {
268  MI->setAlignment(Alignment);
269  return MI;
270  }
271 
272  // Extract the length and alignment and fill if they are constant.
273  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
274  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
275  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
276  return nullptr;
277  uint64_t Len = LenC->getLimitedValue();
278  Alignment = MI->getAlignment();
279  assert(Len && "0-sized memory setting should be removed already.");
280 
281  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
282  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
283  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
284 
285  Value *Dest = MI->getDest();
286  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
287  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
288  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
289 
290  // Alignment 0 is identity for alignment 1 for memset, but not store.
291  if (Alignment == 0) Alignment = 1;
292 
293  // Extract the fill value and store.
294  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
295  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
296  MI->isVolatile());
297  S->setAlignment(Alignment);
298 
299  // Set the size of the copy to 0, it will be deleted on the next iteration.
300  MI->setLength(Constant::getNullValue(LenC->getType()));
301  return MI;
302  }
303 
304  return nullptr;
305 }
306 
308  InstCombiner::BuilderTy &Builder) {
309  bool LogicalShift = false;
310  bool ShiftLeft = false;
311 
312  switch (II.getIntrinsicID()) {
313  default: llvm_unreachable("Unexpected intrinsic!");
314  case Intrinsic::x86_sse2_psra_d:
315  case Intrinsic::x86_sse2_psra_w:
316  case Intrinsic::x86_sse2_psrai_d:
317  case Intrinsic::x86_sse2_psrai_w:
318  case Intrinsic::x86_avx2_psra_d:
319  case Intrinsic::x86_avx2_psra_w:
320  case Intrinsic::x86_avx2_psrai_d:
321  case Intrinsic::x86_avx2_psrai_w:
322  case Intrinsic::x86_avx512_psra_q_128:
323  case Intrinsic::x86_avx512_psrai_q_128:
324  case Intrinsic::x86_avx512_psra_q_256:
325  case Intrinsic::x86_avx512_psrai_q_256:
326  case Intrinsic::x86_avx512_psra_d_512:
327  case Intrinsic::x86_avx512_psra_q_512:
328  case Intrinsic::x86_avx512_psra_w_512:
329  case Intrinsic::x86_avx512_psrai_d_512:
330  case Intrinsic::x86_avx512_psrai_q_512:
331  case Intrinsic::x86_avx512_psrai_w_512:
332  LogicalShift = false; ShiftLeft = false;
333  break;
334  case Intrinsic::x86_sse2_psrl_d:
335  case Intrinsic::x86_sse2_psrl_q:
336  case Intrinsic::x86_sse2_psrl_w:
337  case Intrinsic::x86_sse2_psrli_d:
338  case Intrinsic::x86_sse2_psrli_q:
339  case Intrinsic::x86_sse2_psrli_w:
340  case Intrinsic::x86_avx2_psrl_d:
341  case Intrinsic::x86_avx2_psrl_q:
342  case Intrinsic::x86_avx2_psrl_w:
343  case Intrinsic::x86_avx2_psrli_d:
344  case Intrinsic::x86_avx2_psrli_q:
345  case Intrinsic::x86_avx2_psrli_w:
346  case Intrinsic::x86_avx512_psrl_d_512:
347  case Intrinsic::x86_avx512_psrl_q_512:
348  case Intrinsic::x86_avx512_psrl_w_512:
349  case Intrinsic::x86_avx512_psrli_d_512:
350  case Intrinsic::x86_avx512_psrli_q_512:
351  case Intrinsic::x86_avx512_psrli_w_512:
352  LogicalShift = true; ShiftLeft = false;
353  break;
354  case Intrinsic::x86_sse2_psll_d:
355  case Intrinsic::x86_sse2_psll_q:
356  case Intrinsic::x86_sse2_psll_w:
357  case Intrinsic::x86_sse2_pslli_d:
358  case Intrinsic::x86_sse2_pslli_q:
359  case Intrinsic::x86_sse2_pslli_w:
360  case Intrinsic::x86_avx2_psll_d:
361  case Intrinsic::x86_avx2_psll_q:
362  case Intrinsic::x86_avx2_psll_w:
363  case Intrinsic::x86_avx2_pslli_d:
364  case Intrinsic::x86_avx2_pslli_q:
365  case Intrinsic::x86_avx2_pslli_w:
366  case Intrinsic::x86_avx512_psll_d_512:
367  case Intrinsic::x86_avx512_psll_q_512:
368  case Intrinsic::x86_avx512_psll_w_512:
369  case Intrinsic::x86_avx512_pslli_d_512:
370  case Intrinsic::x86_avx512_pslli_q_512:
371  case Intrinsic::x86_avx512_pslli_w_512:
372  LogicalShift = true; ShiftLeft = true;
373  break;
374  }
375  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
376 
377  // Simplify if count is constant.
378  auto Arg1 = II.getArgOperand(1);
379  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
380  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
381  auto CInt = dyn_cast<ConstantInt>(Arg1);
382  if (!CAZ && !CDV && !CInt)
383  return nullptr;
384 
385  APInt Count(64, 0);
386  if (CDV) {
387  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
388  // operand to compute the shift amount.
389  auto VT = cast<VectorType>(CDV->getType());
390  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
391  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
392  unsigned NumSubElts = 64 / BitWidth;
393 
394  // Concatenate the sub-elements to create the 64-bit value.
395  for (unsigned i = 0; i != NumSubElts; ++i) {
396  unsigned SubEltIdx = (NumSubElts - 1) - i;
397  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
398  Count <<= BitWidth;
399  Count |= SubElt->getValue().zextOrTrunc(64);
400  }
401  }
402  else if (CInt)
403  Count = CInt->getValue();
404 
405  auto Vec = II.getArgOperand(0);
406  auto VT = cast<VectorType>(Vec->getType());
407  auto SVT = VT->getElementType();
408  unsigned VWidth = VT->getNumElements();
409  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
410 
411  // If shift-by-zero then just return the original value.
412  if (Count.isNullValue())
413  return Vec;
414 
415  // Handle cases when Shift >= BitWidth.
416  if (Count.uge(BitWidth)) {
417  // If LogicalShift - just return zero.
418  if (LogicalShift)
419  return ConstantAggregateZero::get(VT);
420 
421  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
422  Count = APInt(64, BitWidth - 1);
423  }
424 
425  // Get a constant vector of the same type as the first operand.
426  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
427  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
428 
429  if (ShiftLeft)
430  return Builder.CreateShl(Vec, ShiftVec);
431 
432  if (LogicalShift)
433  return Builder.CreateLShr(Vec, ShiftVec);
434 
435  return Builder.CreateAShr(Vec, ShiftVec);
436 }
437 
438 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
439 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
440 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
442  InstCombiner::BuilderTy &Builder) {
443  bool LogicalShift = false;
444  bool ShiftLeft = false;
445 
446  switch (II.getIntrinsicID()) {
447  default: llvm_unreachable("Unexpected intrinsic!");
448  case Intrinsic::x86_avx2_psrav_d:
449  case Intrinsic::x86_avx2_psrav_d_256:
450  case Intrinsic::x86_avx512_psrav_q_128:
451  case Intrinsic::x86_avx512_psrav_q_256:
452  case Intrinsic::x86_avx512_psrav_d_512:
453  case Intrinsic::x86_avx512_psrav_q_512:
454  case Intrinsic::x86_avx512_psrav_w_128:
455  case Intrinsic::x86_avx512_psrav_w_256:
456  case Intrinsic::x86_avx512_psrav_w_512:
457  LogicalShift = false;
458  ShiftLeft = false;
459  break;
460  case Intrinsic::x86_avx2_psrlv_d:
461  case Intrinsic::x86_avx2_psrlv_d_256:
462  case Intrinsic::x86_avx2_psrlv_q:
463  case Intrinsic::x86_avx2_psrlv_q_256:
464  case Intrinsic::x86_avx512_psrlv_d_512:
465  case Intrinsic::x86_avx512_psrlv_q_512:
466  case Intrinsic::x86_avx512_psrlv_w_128:
467  case Intrinsic::x86_avx512_psrlv_w_256:
468  case Intrinsic::x86_avx512_psrlv_w_512:
469  LogicalShift = true;
470  ShiftLeft = false;
471  break;
472  case Intrinsic::x86_avx2_psllv_d:
473  case Intrinsic::x86_avx2_psllv_d_256:
474  case Intrinsic::x86_avx2_psllv_q:
475  case Intrinsic::x86_avx2_psllv_q_256:
476  case Intrinsic::x86_avx512_psllv_d_512:
477  case Intrinsic::x86_avx512_psllv_q_512:
478  case Intrinsic::x86_avx512_psllv_w_128:
479  case Intrinsic::x86_avx512_psllv_w_256:
480  case Intrinsic::x86_avx512_psllv_w_512:
481  LogicalShift = true;
482  ShiftLeft = true;
483  break;
484  }
485  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
486 
487  // Simplify if all shift amounts are constant/undef.
488  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
489  if (!CShift)
490  return nullptr;
491 
492  auto Vec = II.getArgOperand(0);
493  auto VT = cast<VectorType>(II.getType());
494  auto SVT = VT->getVectorElementType();
495  int NumElts = VT->getNumElements();
496  int BitWidth = SVT->getIntegerBitWidth();
497 
498  // Collect each element's shift amount.
499  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
500  bool AnyOutOfRange = false;
501  SmallVector<int, 8> ShiftAmts;
502  for (int I = 0; I < NumElts; ++I) {
503  auto *CElt = CShift->getAggregateElement(I);
504  if (CElt && isa<UndefValue>(CElt)) {
505  ShiftAmts.push_back(-1);
506  continue;
507  }
508 
509  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
510  if (!COp)
511  return nullptr;
512 
513  // Handle out of range shifts.
514  // If LogicalShift - set to BitWidth (special case).
515  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
516  APInt ShiftVal = COp->getValue();
517  if (ShiftVal.uge(BitWidth)) {
518  AnyOutOfRange = LogicalShift;
519  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
520  continue;
521  }
522 
523  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
524  }
525 
526  // If all elements out of range or UNDEF, return vector of zeros/undefs.
527  // ArithmeticShift should only hit this if they are all UNDEF.
528  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
529  if (llvm::all_of(ShiftAmts, OutOfRange)) {
530  SmallVector<Constant *, 8> ConstantVec;
531  for (int Idx : ShiftAmts) {
532  if (Idx < 0) {
533  ConstantVec.push_back(UndefValue::get(SVT));
534  } else {
535  assert(LogicalShift && "Logical shift expected");
536  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
537  }
538  }
539  return ConstantVector::get(ConstantVec);
540  }
541 
542  // We can't handle only some out of range values with generic logical shifts.
543  if (AnyOutOfRange)
544  return nullptr;
545 
546  // Build the shift amount constant vector.
547  SmallVector<Constant *, 8> ShiftVecAmts;
548  for (int Idx : ShiftAmts) {
549  if (Idx < 0)
550  ShiftVecAmts.push_back(UndefValue::get(SVT));
551  else
552  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
553  }
554  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
555 
556  if (ShiftLeft)
557  return Builder.CreateShl(Vec, ShiftVec);
558 
559  if (LogicalShift)
560  return Builder.CreateLShr(Vec, ShiftVec);
561 
562  return Builder.CreateAShr(Vec, ShiftVec);
563 }
564 
566  InstCombiner::BuilderTy &Builder) {
567  Value *Arg0 = II.getArgOperand(0);
568  Value *Arg1 = II.getArgOperand(1);
569  Type *ResTy = II.getType();
570  assert(Arg0->getType()->getScalarSizeInBits() == 32 &&
571  Arg1->getType()->getScalarSizeInBits() == 32 &&
572  ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types");
573 
574  // muldq/muludq(undef, undef) -> zero (matches generic mul behavior)
575  if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
576  return ConstantAggregateZero::get(ResTy);
577 
578  // Constant folding.
579  // PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)),
580  // vXi64 sext(shuffle<0,2,..>(Arg1))))
581  // PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)),
582  // vXi64 zext(shuffle<0,2,..>(Arg1))))
583  if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
584  return nullptr;
585 
586  unsigned NumElts = ResTy->getVectorNumElements();
587  assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) &&
588  Arg1->getType()->getVectorNumElements() == (2 * NumElts) &&
589  "Unexpected muldq/muludq types");
590 
591  unsigned IntrinsicID = II.getIntrinsicID();
592  bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID ||
593  Intrinsic::x86_avx2_pmul_dq == IntrinsicID ||
594  Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID);
595 
596  SmallVector<unsigned, 16> ShuffleMask;
597  for (unsigned i = 0; i != NumElts; ++i)
598  ShuffleMask.push_back(i * 2);
599 
600  auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask);
601  auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask);
602 
603  if (IsSigned) {
604  LHS = Builder.CreateSExt(LHS, ResTy);
605  RHS = Builder.CreateSExt(RHS, ResTy);
606  } else {
607  LHS = Builder.CreateZExt(LHS, ResTy);
608  RHS = Builder.CreateZExt(RHS, ResTy);
609  }
610 
611  return Builder.CreateMul(LHS, RHS);
612 }
613 
614 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
615  Value *Arg0 = II.getArgOperand(0);
616  Value *Arg1 = II.getArgOperand(1);
617  Type *ResTy = II.getType();
618 
619  // Fast all undef handling.
620  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
621  return UndefValue::get(ResTy);
622 
623  Type *ArgTy = Arg0->getType();
624  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
625  unsigned NumDstElts = ResTy->getVectorNumElements();
626  unsigned NumSrcElts = ArgTy->getVectorNumElements();
627  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
628 
629  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
630  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
631  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
632  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
633  "Unexpected packing types");
634 
635  // Constant folding.
636  auto *Cst0 = dyn_cast<Constant>(Arg0);
637  auto *Cst1 = dyn_cast<Constant>(Arg1);
638  if (!Cst0 || !Cst1)
639  return nullptr;
640 
642  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
643  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
644  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
645  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
646  auto *COp = Cst->getAggregateElement(SrcIdx);
647  if (COp && isa<UndefValue>(COp)) {
648  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
649  continue;
650  }
651 
652  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
653  if (!CInt)
654  return nullptr;
655 
656  APInt Val = CInt->getValue();
657  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
658  "Unexpected constant bitwidth");
659 
660  if (IsSigned) {
661  // PACKSS: Truncate signed value with signed saturation.
662  // Source values less than dst minint are saturated to minint.
663  // Source values greater than dst maxint are saturated to maxint.
664  if (Val.isSignedIntN(DstScalarSizeInBits))
665  Val = Val.trunc(DstScalarSizeInBits);
666  else if (Val.isNegative())
667  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
668  else
669  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
670  } else {
671  // PACKUS: Truncate signed value with unsigned saturation.
672  // Source values less than zero are saturated to zero.
673  // Source values greater than dst maxuint are saturated to maxuint.
674  if (Val.isIntN(DstScalarSizeInBits))
675  Val = Val.trunc(DstScalarSizeInBits);
676  else if (Val.isNegative())
677  Val = APInt::getNullValue(DstScalarSizeInBits);
678  else
679  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
680  }
681 
682  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
683  }
684  }
685 
686  return ConstantVector::get(Vals);
687 }
688 
690  Value *Arg = II.getArgOperand(0);
691  Type *ResTy = II.getType();
692  Type *ArgTy = Arg->getType();
693 
694  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
695  if (isa<UndefValue>(Arg))
696  return Constant::getNullValue(ResTy);
697 
698  // We can't easily peek through x86_mmx types.
699  if (!ArgTy->isVectorTy())
700  return nullptr;
701 
702  auto *C = dyn_cast<Constant>(Arg);
703  if (!C)
704  return nullptr;
705 
706  // Extract signbits of the vector input and pack into integer result.
707  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
708  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
709  auto *COp = C->getAggregateElement(I);
710  if (!COp)
711  return nullptr;
712  if (isa<UndefValue>(COp))
713  continue;
714 
715  auto *CInt = dyn_cast<ConstantInt>(COp);
716  auto *CFp = dyn_cast<ConstantFP>(COp);
717  if (!CInt && !CFp)
718  return nullptr;
719 
720  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
721  Result.setBit(I);
722  }
723 
724  return Constant::getIntegerValue(ResTy, Result);
725 }
726 
728  InstCombiner::BuilderTy &Builder) {
729  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
730  if (!CInt)
731  return nullptr;
732 
733  VectorType *VecTy = cast<VectorType>(II.getType());
734  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
735 
736  // The immediate permute control byte looks like this:
737  // [3:0] - zero mask for each 32-bit lane
738  // [5:4] - select one 32-bit destination lane
739  // [7:6] - select one 32-bit source lane
740 
741  uint8_t Imm = CInt->getZExtValue();
742  uint8_t ZMask = Imm & 0xf;
743  uint8_t DestLane = (Imm >> 4) & 0x3;
744  uint8_t SourceLane = (Imm >> 6) & 0x3;
745 
747 
748  // If all zero mask bits are set, this was just a weird way to
749  // generate a zero vector.
750  if (ZMask == 0xf)
751  return ZeroVector;
752 
753  // Initialize by passing all of the first source bits through.
754  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
755 
756  // We may replace the second operand with the zero vector.
757  Value *V1 = II.getArgOperand(1);
758 
759  if (ZMask) {
760  // If the zero mask is being used with a single input or the zero mask
761  // overrides the destination lane, this is a shuffle with the zero vector.
762  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
763  (ZMask & (1 << DestLane))) {
764  V1 = ZeroVector;
765  // We may still move 32-bits of the first source vector from one lane
766  // to another.
767  ShuffleMask[DestLane] = SourceLane;
768  // The zero mask may override the previous insert operation.
769  for (unsigned i = 0; i < 4; ++i)
770  if ((ZMask >> i) & 0x1)
771  ShuffleMask[i] = i + 4;
772  } else {
773  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
774  return nullptr;
775  }
776  } else {
777  // Replace the selected destination lane with the selected source lane.
778  ShuffleMask[DestLane] = SourceLane + 4;
779  }
780 
781  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
782 }
783 
784 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
785 /// or conversion to a shuffle vector.
787  ConstantInt *CILength, ConstantInt *CIIndex,
788  InstCombiner::BuilderTy &Builder) {
789  auto LowConstantHighUndef = [&](uint64_t Val) {
790  Type *IntTy64 = Type::getInt64Ty(II.getContext());
791  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
792  UndefValue::get(IntTy64)};
793  return ConstantVector::get(Args);
794  };
795 
796  // See if we're dealing with constant values.
797  Constant *C0 = dyn_cast<Constant>(Op0);
798  ConstantInt *CI0 =
799  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
800  : nullptr;
801 
802  // Attempt to constant fold.
803  if (CILength && CIIndex) {
804  // From AMD documentation: "The bit index and field length are each six
805  // bits in length other bits of the field are ignored."
806  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
807  APInt APLength = CILength->getValue().zextOrTrunc(6);
808 
809  unsigned Index = APIndex.getZExtValue();
810 
811  // From AMD documentation: "a value of zero in the field length is
812  // defined as length of 64".
813  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
814 
815  // From AMD documentation: "If the sum of the bit index + length field
816  // is greater than 64, the results are undefined".
817  unsigned End = Index + Length;
818 
819  // Note that both field index and field length are 8-bit quantities.
820  // Since variables 'Index' and 'Length' are unsigned values
821  // obtained from zero-extending field index and field length
822  // respectively, their sum should never wrap around.
823  if (End > 64)
824  return UndefValue::get(II.getType());
825 
826  // If we are inserting whole bytes, we can convert this to a shuffle.
827  // Lowering can recognize EXTRQI shuffle masks.
828  if ((Length % 8) == 0 && (Index % 8) == 0) {
829  // Convert bit indices to byte indices.
830  Length /= 8;
831  Index /= 8;
832 
833  Type *IntTy8 = Type::getInt8Ty(II.getContext());
834  Type *IntTy32 = Type::getInt32Ty(II.getContext());
835  VectorType *ShufTy = VectorType::get(IntTy8, 16);
836 
837  SmallVector<Constant *, 16> ShuffleMask;
838  for (int i = 0; i != (int)Length; ++i)
839  ShuffleMask.push_back(
840  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
841  for (int i = Length; i != 8; ++i)
842  ShuffleMask.push_back(
843  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
844  for (int i = 8; i != 16; ++i)
845  ShuffleMask.push_back(UndefValue::get(IntTy32));
846 
847  Value *SV = Builder.CreateShuffleVector(
848  Builder.CreateBitCast(Op0, ShufTy),
849  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
850  return Builder.CreateBitCast(SV, II.getType());
851  }
852 
853  // Constant Fold - shift Index'th bit to lowest position and mask off
854  // Length bits.
855  if (CI0) {
856  APInt Elt = CI0->getValue();
857  Elt.lshrInPlace(Index);
858  Elt = Elt.zextOrTrunc(Length);
859  return LowConstantHighUndef(Elt.getZExtValue());
860  }
861 
862  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
863  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
864  Value *Args[] = {Op0, CILength, CIIndex};
865  Module *M = II.getModule();
866  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
867  return Builder.CreateCall(F, Args);
868  }
869  }
870 
871  // Constant Fold - extraction from zero is always {zero, undef}.
872  if (CI0 && CI0->isZero())
873  return LowConstantHighUndef(0);
874 
875  return nullptr;
876 }
877 
878 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
879 /// folding or conversion to a shuffle vector.
881  APInt APLength, APInt APIndex,
882  InstCombiner::BuilderTy &Builder) {
883  // From AMD documentation: "The bit index and field length are each six bits
884  // in length other bits of the field are ignored."
885  APIndex = APIndex.zextOrTrunc(6);
886  APLength = APLength.zextOrTrunc(6);
887 
888  // Attempt to constant fold.
889  unsigned Index = APIndex.getZExtValue();
890 
891  // From AMD documentation: "a value of zero in the field length is
892  // defined as length of 64".
893  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
894 
895  // From AMD documentation: "If the sum of the bit index + length field
896  // is greater than 64, the results are undefined".
897  unsigned End = Index + Length;
898 
899  // Note that both field index and field length are 8-bit quantities.
900  // Since variables 'Index' and 'Length' are unsigned values
901  // obtained from zero-extending field index and field length
902  // respectively, their sum should never wrap around.
903  if (End > 64)
904  return UndefValue::get(II.getType());
905 
906  // If we are inserting whole bytes, we can convert this to a shuffle.
907  // Lowering can recognize INSERTQI shuffle masks.
908  if ((Length % 8) == 0 && (Index % 8) == 0) {
909  // Convert bit indices to byte indices.
910  Length /= 8;
911  Index /= 8;
912 
913  Type *IntTy8 = Type::getInt8Ty(II.getContext());
914  Type *IntTy32 = Type::getInt32Ty(II.getContext());
915  VectorType *ShufTy = VectorType::get(IntTy8, 16);
916 
917  SmallVector<Constant *, 16> ShuffleMask;
918  for (int i = 0; i != (int)Index; ++i)
919  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
920  for (int i = 0; i != (int)Length; ++i)
921  ShuffleMask.push_back(
922  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
923  for (int i = Index + Length; i != 8; ++i)
924  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
925  for (int i = 8; i != 16; ++i)
926  ShuffleMask.push_back(UndefValue::get(IntTy32));
927 
928  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
929  Builder.CreateBitCast(Op1, ShufTy),
930  ConstantVector::get(ShuffleMask));
931  return Builder.CreateBitCast(SV, II.getType());
932  }
933 
934  // See if we're dealing with constant values.
935  Constant *C0 = dyn_cast<Constant>(Op0);
936  Constant *C1 = dyn_cast<Constant>(Op1);
937  ConstantInt *CI00 =
938  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
939  : nullptr;
940  ConstantInt *CI10 =
941  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
942  : nullptr;
943 
944  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
945  if (CI00 && CI10) {
946  APInt V00 = CI00->getValue();
947  APInt V10 = CI10->getValue();
948  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
949  V00 = V00 & ~Mask;
950  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
951  APInt Val = V00 | V10;
952  Type *IntTy64 = Type::getInt64Ty(II.getContext());
953  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
954  UndefValue::get(IntTy64)};
955  return ConstantVector::get(Args);
956  }
957 
958  // If we were an INSERTQ call, we'll save demanded elements if we convert to
959  // INSERTQI.
960  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
961  Type *IntTy8 = Type::getInt8Ty(II.getContext());
962  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
963  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
964 
965  Value *Args[] = {Op0, Op1, CILength, CIIndex};
966  Module *M = II.getModule();
967  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
968  return Builder.CreateCall(F, Args);
969  }
970 
971  return nullptr;
972 }
973 
974 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
976  InstCombiner::BuilderTy &Builder) {
978  if (!V)
979  return nullptr;
980 
981  auto *VecTy = cast<VectorType>(II.getType());
982  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
983  unsigned NumElts = VecTy->getNumElements();
984  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
985  "Unexpected number of elements in shuffle mask!");
986 
987  // Construct a shuffle mask from constant integers or UNDEFs.
988  Constant *Indexes[64] = {nullptr};
989 
990  // Each byte in the shuffle control mask forms an index to permute the
991  // corresponding byte in the destination operand.
992  for (unsigned I = 0; I < NumElts; ++I) {
993  Constant *COp = V->getAggregateElement(I);
994  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
995  return nullptr;
996 
997  if (isa<UndefValue>(COp)) {
998  Indexes[I] = UndefValue::get(MaskEltTy);
999  continue;
1000  }
1001 
1002  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1003 
1004  // If the most significant bit (bit[7]) of each byte of the shuffle
1005  // control mask is set, then zero is written in the result byte.
1006  // The zero vector is in the right-hand side of the resulting
1007  // shufflevector.
1008 
1009  // The value of each index for the high 128-bit lane is the least
1010  // significant 4 bits of the respective shuffle control byte.
1011  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1012  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1013  }
1014 
1015  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1016  auto V1 = II.getArgOperand(0);
1017  auto V2 = Constant::getNullValue(VecTy);
1018  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1019 }
1020 
1021 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1023  InstCombiner::BuilderTy &Builder) {
1024  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1025  if (!V)
1026  return nullptr;
1027 
1028  auto *VecTy = cast<VectorType>(II.getType());
1029  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1030  unsigned NumElts = VecTy->getVectorNumElements();
1031  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1032  unsigned NumLaneElts = IsPD ? 2 : 4;
1033  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1034 
1035  // Construct a shuffle mask from constant integers or UNDEFs.
1036  Constant *Indexes[16] = {nullptr};
1037 
1038  // The intrinsics only read one or two bits, clear the rest.
1039  for (unsigned I = 0; I < NumElts; ++I) {
1040  Constant *COp = V->getAggregateElement(I);
1041  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1042  return nullptr;
1043 
1044  if (isa<UndefValue>(COp)) {
1045  Indexes[I] = UndefValue::get(MaskEltTy);
1046  continue;
1047  }
1048 
1049  APInt Index = cast<ConstantInt>(COp)->getValue();
1050  Index = Index.zextOrTrunc(32).getLoBits(2);
1051 
1052  // The PD variants uses bit 1 to select per-lane element index, so
1053  // shift down to convert to generic shuffle mask index.
1054  if (IsPD)
1055  Index.lshrInPlace(1);
1056 
1057  // The _256 variants are a bit trickier since the mask bits always index
1058  // into the corresponding 128 half. In order to convert to a generic
1059  // shuffle, we have to make that explicit.
1060  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1061 
1062  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1063  }
1064 
1065  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1066  auto V1 = II.getArgOperand(0);
1067  auto V2 = UndefValue::get(V1->getType());
1068  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1069 }
1070 
1071 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1073  InstCombiner::BuilderTy &Builder) {
1074  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1075  if (!V)
1076  return nullptr;
1077 
1078  auto *VecTy = cast<VectorType>(II.getType());
1079  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1080  unsigned Size = VecTy->getNumElements();
1081  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1082  "Unexpected shuffle mask size");
1083 
1084  // Construct a shuffle mask from constant integers or UNDEFs.
1085  Constant *Indexes[64] = {nullptr};
1086 
1087  for (unsigned I = 0; I < Size; ++I) {
1088  Constant *COp = V->getAggregateElement(I);
1089  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1090  return nullptr;
1091 
1092  if (isa<UndefValue>(COp)) {
1093  Indexes[I] = UndefValue::get(MaskEltTy);
1094  continue;
1095  }
1096 
1097  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1098  Index &= Size - 1;
1099  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1100  }
1101 
1102  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1103  auto V1 = II.getArgOperand(0);
1104  auto V2 = UndefValue::get(VecTy);
1105  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1106 }
1107 
1108 /// Decode XOP integer vector comparison intrinsics.
1110  InstCombiner::BuilderTy &Builder,
1111  bool IsSigned) {
1112  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1113  uint64_t Imm = CInt->getZExtValue() & 0x7;
1114  VectorType *VecTy = cast<VectorType>(II.getType());
1116 
1117  switch (Imm) {
1118  case 0x0:
1119  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1120  break;
1121  case 0x1:
1122  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1123  break;
1124  case 0x2:
1125  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1126  break;
1127  case 0x3:
1128  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1129  break;
1130  case 0x4:
1131  Pred = ICmpInst::ICMP_EQ; break;
1132  case 0x5:
1133  Pred = ICmpInst::ICMP_NE; break;
1134  case 0x6:
1135  return ConstantInt::getSigned(VecTy, 0); // FALSE
1136  case 0x7:
1137  return ConstantInt::getSigned(VecTy, -1); // TRUE
1138  }
1139 
1140  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1141  II.getArgOperand(1)))
1142  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1143  }
1144  return nullptr;
1145 }
1146 
1147 // Emit a select instruction and appropriate bitcasts to help simplify
1148 // masked intrinsics.
1150  InstCombiner::BuilderTy &Builder) {
1151  unsigned VWidth = Op0->getType()->getVectorNumElements();
1152 
1153  // If the mask is all ones we don't need the select. But we need to check
1154  // only the bit thats will be used in case VWidth is less than 8.
1155  if (auto *C = dyn_cast<ConstantInt>(Mask))
1156  if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
1157  return Op0;
1158 
1159  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
1160  cast<IntegerType>(Mask->getType())->getBitWidth());
1161  Mask = Builder.CreateBitCast(Mask, MaskTy);
1162 
1163  // If we have less than 8 elements, then the starting mask was an i8 and
1164  // we need to extract down to the right number of elements.
1165  if (VWidth < 8) {
1166  uint32_t Indices[4];
1167  for (unsigned i = 0; i != VWidth; ++i)
1168  Indices[i] = i;
1169  Mask = Builder.CreateShuffleVector(Mask, Mask,
1170  makeArrayRef(Indices, VWidth),
1171  "extract");
1172  }
1173 
1174  return Builder.CreateSelect(Mask, Op0, Op1);
1175 }
1176 
1178  Value *Arg0 = II.getArgOperand(0);
1179  Value *Arg1 = II.getArgOperand(1);
1180 
1181  // fmin(x, x) -> x
1182  if (Arg0 == Arg1)
1183  return Arg0;
1184 
1185  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
1186 
1187  // fmin(x, nan) -> x
1188  if (C1 && C1->isNaN())
1189  return Arg0;
1190 
1191  // This is the value because if undef were NaN, we would return the other
1192  // value and cannot return a NaN unless both operands are.
1193  //
1194  // fmin(undef, x) -> x
1195  if (isa<UndefValue>(Arg0))
1196  return Arg1;
1197 
1198  // fmin(x, undef) -> x
1199  if (isa<UndefValue>(Arg1))
1200  return Arg0;
1201 
1202  Value *X = nullptr;
1203  Value *Y = nullptr;
1204  if (II.getIntrinsicID() == Intrinsic::minnum) {
1205  // fmin(x, fmin(x, y)) -> fmin(x, y)
1206  // fmin(y, fmin(x, y)) -> fmin(x, y)
1207  if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
1208  if (Arg0 == X || Arg0 == Y)
1209  return Arg1;
1210  }
1211 
1212  // fmin(fmin(x, y), x) -> fmin(x, y)
1213  // fmin(fmin(x, y), y) -> fmin(x, y)
1214  if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
1215  if (Arg1 == X || Arg1 == Y)
1216  return Arg0;
1217  }
1218 
1219  // TODO: fmin(nnan x, inf) -> x
1220  // TODO: fmin(nnan ninf x, flt_max) -> x
1221  if (C1 && C1->isInfinity()) {
1222  // fmin(x, -inf) -> -inf
1223  if (C1->isNegative())
1224  return Arg1;
1225  }
1226  } else {
1228  // fmax(x, fmax(x, y)) -> fmax(x, y)
1229  // fmax(y, fmax(x, y)) -> fmax(x, y)
1230  if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
1231  if (Arg0 == X || Arg0 == Y)
1232  return Arg1;
1233  }
1234 
1235  // fmax(fmax(x, y), x) -> fmax(x, y)
1236  // fmax(fmax(x, y), y) -> fmax(x, y)
1237  if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
1238  if (Arg1 == X || Arg1 == Y)
1239  return Arg0;
1240  }
1241 
1242  // TODO: fmax(nnan x, -inf) -> x
1243  // TODO: fmax(nnan ninf x, -flt_max) -> x
1244  if (C1 && C1->isInfinity()) {
1245  // fmax(x, inf) -> inf
1246  if (!C1->isNegative())
1247  return Arg1;
1248  }
1249  }
1250  return nullptr;
1251 }
1252 
1254  auto *ConstMask = dyn_cast<Constant>(Mask);
1255  if (!ConstMask)
1256  return false;
1257  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1258  return true;
1259  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1260  ++I) {
1261  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1262  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1263  continue;
1264  return false;
1265  }
1266  return true;
1267 }
1268 
1270  InstCombiner::BuilderTy &Builder) {
1271  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1272  // argument.
1273  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1274  Value *LoadPtr = II.getArgOperand(0);
1275  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1276  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1277  }
1278 
1279  return nullptr;
1280 }
1281 
1283  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1284  if (!ConstMask)
1285  return nullptr;
1286 
1287  // If the mask is all zeros, this instruction does nothing.
1288  if (ConstMask->isNullValue())
1289  return IC.eraseInstFromFunction(II);
1290 
1291  // If the mask is all ones, this is a plain vector store of the 1st argument.
1292  if (ConstMask->isAllOnesValue()) {
1293  Value *StorePtr = II.getArgOperand(1);
1294  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1295  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1296  }
1297 
1298  return nullptr;
1299 }
1300 
1302  // If the mask is all zeros, return the "passthru" argument of the gather.
1303  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1304  if (ConstMask && ConstMask->isNullValue())
1305  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1306 
1307  return nullptr;
1308 }
1309 
1311  // If the mask is all zeros, a scatter does nothing.
1312  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1313  if (ConstMask && ConstMask->isNullValue())
1314  return IC.eraseInstFromFunction(II);
1315 
1316  return nullptr;
1317 }
1318 
1320  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1321  II.getIntrinsicID() == Intrinsic::ctlz) &&
1322  "Expected cttz or ctlz intrinsic");
1323  Value *Op0 = II.getArgOperand(0);
1324 
1325  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1326 
1327  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1328  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1329  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1330  : Known.countMaxLeadingZeros();
1331  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1332  : Known.countMinLeadingZeros();
1333 
1334  // If all bits above (ctlz) or below (cttz) the first known one are known
1335  // zero, this value is constant.
1336  // FIXME: This should be in InstSimplify because we're replacing an
1337  // instruction with a constant.
1338  if (PossibleZeros == DefiniteZeros) {
1339  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1340  return IC.replaceInstUsesWith(II, C);
1341  }
1342 
1343  // If the input to cttz/ctlz is known to be non-zero,
1344  // then change the 'ZeroIsUndef' parameter to 'true'
1345  // because we know the zero behavior can't affect the result.
1346  if (!Known.One.isNullValue() ||
1347  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1348  &IC.getDominatorTree())) {
1349  if (!match(II.getArgOperand(1), m_One())) {
1350  II.setOperand(1, IC.Builder.getTrue());
1351  return &II;
1352  }
1353  }
1354 
1355  // Add range metadata since known bits can't completely reflect what we know.
1356  // TODO: Handle splat vectors.
1357  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1358  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1359  Metadata *LowAndHigh[] = {
1360  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1361  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1364  return &II;
1365  }
1366 
1367  return nullptr;
1368 }
1369 
1371  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1372  "Expected ctpop intrinsic");
1373  Value *Op0 = II.getArgOperand(0);
1374  // FIXME: Try to simplify vectors of integers.
1375  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1376  if (!IT)
1377  return nullptr;
1378 
1379  unsigned BitWidth = IT->getBitWidth();
1380  KnownBits Known(BitWidth);
1381  IC.computeKnownBits(Op0, Known, 0, &II);
1382 
1383  unsigned MinCount = Known.countMinPopulation();
1384  unsigned MaxCount = Known.countMaxPopulation();
1385 
1386  // Add range metadata since known bits can't completely reflect what we know.
1387  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1388  Metadata *LowAndHigh[] = {
1390  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1393  return &II;
1394  }
1395 
1396  return nullptr;
1397 }
1398 
1399 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1400 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1401 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1403  Value *Ptr = II.getOperand(0);
1404  Value *Mask = II.getOperand(1);
1405  Constant *ZeroVec = Constant::getNullValue(II.getType());
1406 
1407  // Special case a zero mask since that's not a ConstantDataVector.
1408  // This masked load instruction creates a zero vector.
1409  if (isa<ConstantAggregateZero>(Mask))
1410  return IC.replaceInstUsesWith(II, ZeroVec);
1411 
1412  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1413  if (!ConstMask)
1414  return nullptr;
1415 
1416  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1417  // to allow target-independent optimizations.
1418 
1419  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1420  // the LLVM intrinsic definition for the pointer argument.
1421  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1422  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1423  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1424 
1425  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1426  // on each element's most significant bit (the sign bit).
1427  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1428 
1429  // The pass-through vector for an x86 masked load is a zero vector.
1430  CallInst *NewMaskedLoad =
1431  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1432  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1433 }
1434 
1435 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1436 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1437 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1439  Value *Ptr = II.getOperand(0);
1440  Value *Mask = II.getOperand(1);
1441  Value *Vec = II.getOperand(2);
1442 
1443  // Special case a zero mask since that's not a ConstantDataVector:
1444  // this masked store instruction does nothing.
1445  if (isa<ConstantAggregateZero>(Mask)) {
1446  IC.eraseInstFromFunction(II);
1447  return true;
1448  }
1449 
1450  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1451  // anything else at this level.
1452  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1453  return false;
1454 
1455  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1456  if (!ConstMask)
1457  return false;
1458 
1459  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1460  // to allow target-independent optimizations.
1461 
1462  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1463  // the LLVM intrinsic definition for the pointer argument.
1464  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1465  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1466  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1467 
1468  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1469  // on each element's most significant bit (the sign bit).
1470  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1471 
1472  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1473 
1474  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1475  IC.eraseInstFromFunction(II);
1476  return true;
1477 }
1478 
1479 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1480 //
1481 // A single NaN input is folded to minnum, so we rely on that folding for
1482 // handling NaNs.
1483 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1484  const APFloat &Src2) {
1485  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1486 
1487  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1488  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1489  if (Cmp0 == APFloat::cmpEqual)
1490  return maxnum(Src1, Src2);
1491 
1492  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1493  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1494  if (Cmp1 == APFloat::cmpEqual)
1495  return maxnum(Src0, Src2);
1496 
1497  return maxnum(Src0, Src1);
1498 }
1499 
1500 // Returns true iff the 2 intrinsics have the same operands, limiting the
1501 // comparison to the first NumOperands.
1502 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1503  unsigned NumOperands) {
1504  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1505  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1506  for (unsigned i = 0; i < NumOperands; i++)
1507  if (I.getArgOperand(i) != E.getArgOperand(i))
1508  return false;
1509  return true;
1510 }
1511 
1512 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1513 // immediately followed by an end (ignoring debuginfo or other
1514 // start/end intrinsics in between). As this handles only the most trivial
1515 // cases, tracking the nesting level is not needed:
1516 //
1517 // call @llvm.foo.start(i1 0) ; &I
1518 // call @llvm.foo.start(i1 0)
1519 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1520 // call @llvm.foo.end(i1 0)
1521 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1522  unsigned EndID, InstCombiner &IC) {
1523  assert(I.getIntrinsicID() == StartID &&
1524  "Start intrinsic does not have expected ID");
1525  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1526  for (++BI; BI != BE; ++BI) {
1527  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1528  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1529  continue;
1530  if (E->getIntrinsicID() == EndID &&
1531  haveSameOperands(I, *E, E->getNumArgOperands())) {
1532  IC.eraseInstFromFunction(*E);
1533  IC.eraseInstFromFunction(I);
1534  return true;
1535  }
1536  }
1537  break;
1538  }
1539 
1540  return false;
1541 }
1542 
1543 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1545  // Each NVVM intrinsic we can simplify can be replaced with one of:
1546  //
1547  // * an LLVM intrinsic,
1548  // * an LLVM cast operation,
1549  // * an LLVM binary operation, or
1550  // * ad-hoc LLVM IR for the particular operation.
1551 
1552  // Some transformations are only valid when the module's
1553  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1554  // transformations are valid regardless of the module's ftz setting.
1555  enum FtzRequirementTy {
1556  FTZ_Any, // Any ftz setting is ok.
1557  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1558  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1559  };
1560  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1561  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1562  // simplify.
1563  enum SpecialCase {
1564  SPC_Reciprocal,
1565  };
1566 
1567  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1568  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1569  struct SimplifyAction {
1570  // Invariant: At most one of these Optionals has a value.
1574  Optional<SpecialCase> Special;
1575 
1576  FtzRequirementTy FtzRequirement = FTZ_Any;
1577 
1578  SimplifyAction() = default;
1579 
1580  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1581  : IID(IID), FtzRequirement(FtzReq) {}
1582 
1583  // Cast operations don't have anything to do with FTZ, so we skip that
1584  // argument.
1585  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1586 
1587  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1588  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1589 
1590  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1591  : Special(Special), FtzRequirement(FtzReq) {}
1592  };
1593 
1594  // Try to generate a SimplifyAction describing how to replace our
1595  // IntrinsicInstr with target-generic LLVM IR.
1596  const SimplifyAction Action = [II]() -> SimplifyAction {
1597  switch (II->getIntrinsicID()) {
1598  // NVVM intrinsics that map directly to LLVM intrinsics.
1599  case Intrinsic::nvvm_ceil_d:
1600  return {Intrinsic::ceil, FTZ_Any};
1601  case Intrinsic::nvvm_ceil_f:
1602  return {Intrinsic::ceil, FTZ_MustBeOff};
1603  case Intrinsic::nvvm_ceil_ftz_f:
1604  return {Intrinsic::ceil, FTZ_MustBeOn};
1605  case Intrinsic::nvvm_fabs_d:
1606  return {Intrinsic::fabs, FTZ_Any};
1607  case Intrinsic::nvvm_fabs_f:
1608  return {Intrinsic::fabs, FTZ_MustBeOff};
1609  case Intrinsic::nvvm_fabs_ftz_f:
1610  return {Intrinsic::fabs, FTZ_MustBeOn};
1611  case Intrinsic::nvvm_floor_d:
1612  return {Intrinsic::floor, FTZ_Any};
1613  case Intrinsic::nvvm_floor_f:
1614  return {Intrinsic::floor, FTZ_MustBeOff};
1615  case Intrinsic::nvvm_floor_ftz_f:
1616  return {Intrinsic::floor, FTZ_MustBeOn};
1617  case Intrinsic::nvvm_fma_rn_d:
1618  return {Intrinsic::fma, FTZ_Any};
1619  case Intrinsic::nvvm_fma_rn_f:
1620  return {Intrinsic::fma, FTZ_MustBeOff};
1621  case Intrinsic::nvvm_fma_rn_ftz_f:
1622  return {Intrinsic::fma, FTZ_MustBeOn};
1623  case Intrinsic::nvvm_fmax_d:
1624  return {Intrinsic::maxnum, FTZ_Any};
1625  case Intrinsic::nvvm_fmax_f:
1626  return {Intrinsic::maxnum, FTZ_MustBeOff};
1627  case Intrinsic::nvvm_fmax_ftz_f:
1628  return {Intrinsic::maxnum, FTZ_MustBeOn};
1629  case Intrinsic::nvvm_fmin_d:
1630  return {Intrinsic::minnum, FTZ_Any};
1631  case Intrinsic::nvvm_fmin_f:
1632  return {Intrinsic::minnum, FTZ_MustBeOff};
1633  case Intrinsic::nvvm_fmin_ftz_f:
1634  return {Intrinsic::minnum, FTZ_MustBeOn};
1635  case Intrinsic::nvvm_round_d:
1636  return {Intrinsic::round, FTZ_Any};
1637  case Intrinsic::nvvm_round_f:
1638  return {Intrinsic::round, FTZ_MustBeOff};
1639  case Intrinsic::nvvm_round_ftz_f:
1640  return {Intrinsic::round, FTZ_MustBeOn};
1641  case Intrinsic::nvvm_sqrt_rn_d:
1642  return {Intrinsic::sqrt, FTZ_Any};
1643  case Intrinsic::nvvm_sqrt_f:
1644  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1645  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1646  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1647  // the versions with explicit ftz-ness.
1648  return {Intrinsic::sqrt, FTZ_Any};
1649  case Intrinsic::nvvm_sqrt_rn_f:
1650  return {Intrinsic::sqrt, FTZ_MustBeOff};
1651  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1652  return {Intrinsic::sqrt, FTZ_MustBeOn};
1653  case Intrinsic::nvvm_trunc_d:
1654  return {Intrinsic::trunc, FTZ_Any};
1655  case Intrinsic::nvvm_trunc_f:
1656  return {Intrinsic::trunc, FTZ_MustBeOff};
1657  case Intrinsic::nvvm_trunc_ftz_f:
1658  return {Intrinsic::trunc, FTZ_MustBeOn};
1659 
1660  // NVVM intrinsics that map to LLVM cast operations.
1661  //
1662  // Note that llvm's target-generic conversion operators correspond to the rz
1663  // (round to zero) versions of the nvvm conversion intrinsics, even though
1664  // most everything else here uses the rn (round to nearest even) nvvm ops.
1665  case Intrinsic::nvvm_d2i_rz:
1666  case Intrinsic::nvvm_f2i_rz:
1667  case Intrinsic::nvvm_d2ll_rz:
1668  case Intrinsic::nvvm_f2ll_rz:
1669  return {Instruction::FPToSI};
1670  case Intrinsic::nvvm_d2ui_rz:
1671  case Intrinsic::nvvm_f2ui_rz:
1672  case Intrinsic::nvvm_d2ull_rz:
1673  case Intrinsic::nvvm_f2ull_rz:
1674  return {Instruction::FPToUI};
1675  case Intrinsic::nvvm_i2d_rz:
1676  case Intrinsic::nvvm_i2f_rz:
1677  case Intrinsic::nvvm_ll2d_rz:
1678  case Intrinsic::nvvm_ll2f_rz:
1679  return {Instruction::SIToFP};
1680  case Intrinsic::nvvm_ui2d_rz:
1681  case Intrinsic::nvvm_ui2f_rz:
1682  case Intrinsic::nvvm_ull2d_rz:
1683  case Intrinsic::nvvm_ull2f_rz:
1684  return {Instruction::UIToFP};
1685 
1686  // NVVM intrinsics that map to LLVM binary ops.
1687  case Intrinsic::nvvm_add_rn_d:
1688  return {Instruction::FAdd, FTZ_Any};
1689  case Intrinsic::nvvm_add_rn_f:
1690  return {Instruction::FAdd, FTZ_MustBeOff};
1691  case Intrinsic::nvvm_add_rn_ftz_f:
1692  return {Instruction::FAdd, FTZ_MustBeOn};
1693  case Intrinsic::nvvm_mul_rn_d:
1694  return {Instruction::FMul, FTZ_Any};
1695  case Intrinsic::nvvm_mul_rn_f:
1696  return {Instruction::FMul, FTZ_MustBeOff};
1697  case Intrinsic::nvvm_mul_rn_ftz_f:
1698  return {Instruction::FMul, FTZ_MustBeOn};
1699  case Intrinsic::nvvm_div_rn_d:
1700  return {Instruction::FDiv, FTZ_Any};
1701  case Intrinsic::nvvm_div_rn_f:
1702  return {Instruction::FDiv, FTZ_MustBeOff};
1703  case Intrinsic::nvvm_div_rn_ftz_f:
1704  return {Instruction::FDiv, FTZ_MustBeOn};
1705 
1706  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1707  // need special handling.
1708  //
1709  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1710  // as well.
1711  case Intrinsic::nvvm_rcp_rn_d:
1712  return {SPC_Reciprocal, FTZ_Any};
1713  case Intrinsic::nvvm_rcp_rn_f:
1714  return {SPC_Reciprocal, FTZ_MustBeOff};
1715  case Intrinsic::nvvm_rcp_rn_ftz_f:
1716  return {SPC_Reciprocal, FTZ_MustBeOn};
1717 
1718  // We do not currently simplify intrinsics that give an approximate answer.
1719  // These include:
1720  //
1721  // - nvvm_cos_approx_{f,ftz_f}
1722  // - nvvm_ex2_approx_{d,f,ftz_f}
1723  // - nvvm_lg2_approx_{d,f,ftz_f}
1724  // - nvvm_sin_approx_{f,ftz_f}
1725  // - nvvm_sqrt_approx_{f,ftz_f}
1726  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1727  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1728  // - nvvm_rcp_approx_ftz_d
1729  //
1730  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1731  // means that fastmath is enabled in the intrinsic. Unfortunately only
1732  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1733  // information gets lost and we can't select on it.
1734  //
1735  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1736  // lower them to "fast fdiv".
1737 
1738  default:
1739  return {};
1740  }
1741  }();
1742 
1743  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1744  // can bail out now. (Notice that in the case that IID is not an NVVM
1745  // intrinsic, we don't have to look up any module metadata, as
1746  // FtzRequirementTy will be FTZ_Any.)
1747  if (Action.FtzRequirement != FTZ_Any) {
1748  bool FtzEnabled =
1749  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1750  "true";
1751 
1752  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1753  return nullptr;
1754  }
1755 
1756  // Simplify to target-generic intrinsic.
1757  if (Action.IID) {
1759  // All the target-generic intrinsics currently of interest to us have one
1760  // type argument, equal to that of the nvvm intrinsic's argument.
1761  Type *Tys[] = {II->getArgOperand(0)->getType()};
1762  return CallInst::Create(
1763  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1764  }
1765 
1766  // Simplify to target-generic binary op.
1767  if (Action.BinaryOp)
1768  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1769  II->getArgOperand(1), II->getName());
1770 
1771  // Simplify to target-generic cast op.
1772  if (Action.CastOp)
1773  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1774  II->getName());
1775 
1776  // All that's left are the special cases.
1777  if (!Action.Special)
1778  return nullptr;
1779 
1780  switch (*Action.Special) {
1781  case SPC_Reciprocal:
1782  // Simplify reciprocal.
1783  return BinaryOperator::Create(
1784  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1785  II->getArgOperand(0), II->getName());
1786  }
1787  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1788 }
1789 
1791  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1792  return nullptr;
1793 }
1794 
1796  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1797  return nullptr;
1798 }
1799 
1800 /// CallInst simplification. This mostly only handles folding of intrinsic
1801 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1802 /// lifting.
1804  if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
1805  return replaceInstUsesWith(CI, V);
1806 
1807  if (isFreeCall(&CI, &TLI))
1808  return visitFree(CI);
1809 
1810  // If the caller function is nounwind, mark the call as nounwind, even if the
1811  // callee isn't.
1812  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1813  CI.setDoesNotThrow();
1814  return &CI;
1815  }
1816 
1817  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1818  if (!II) return visitCallSite(&CI);
1819 
1820  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1821  // visitCallSite.
1822  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
1823  bool Changed = false;
1824 
1825  // memmove/cpy/set of zero bytes is a noop.
1826  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1827  if (NumBytes->isNullValue())
1828  return eraseInstFromFunction(CI);
1829 
1830  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1831  if (CI->getZExtValue() == 1) {
1832  // Replace the instruction with just byte operations. We would
1833  // transform other cases to loads/stores, but we don't know if
1834  // alignment is sufficient.
1835  }
1836  }
1837 
1838  // No other transformations apply to volatile transfers.
1839  if (MI->isVolatile())
1840  return nullptr;
1841 
1842  // If we have a memmove and the source operation is a constant global,
1843  // then the source and dest pointers can't alias, so we can change this
1844  // into a call to memcpy.
1845  if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
1846  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1847  if (GVSrc->isConstant()) {
1848  Module *M = CI.getModule();
1849  Intrinsic::ID MemCpyID = Intrinsic::memcpy;
1850  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1851  CI.getArgOperand(1)->getType(),
1852  CI.getArgOperand(2)->getType() };
1853  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1854  Changed = true;
1855  }
1856  }
1857 
1858  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1859  // memmove(x,x,size) -> noop.
1860  if (MTI->getSource() == MTI->getDest())
1861  return eraseInstFromFunction(CI);
1862  }
1863 
1864  // If we can determine a pointer alignment that is bigger than currently
1865  // set, update the alignment.
1866  if (isa<MemTransferInst>(MI)) {
1867  if (Instruction *I = SimplifyMemTransfer(MI))
1868  return I;
1869  } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
1870  if (Instruction *I = SimplifyMemSet(MSI))
1871  return I;
1872  }
1873 
1874  if (Changed) return II;
1875  }
1876 
1877  if (auto *AMI = dyn_cast<AtomicMemCpyInst>(II)) {
1878  if (Constant *C = dyn_cast<Constant>(AMI->getLength()))
1879  if (C->isNullValue())
1880  return eraseInstFromFunction(*AMI);
1881 
1882  if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI))
1883  return I;
1884  }
1885 
1886  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1887  return I;
1888 
1889  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1890  unsigned DemandedWidth) {
1891  APInt UndefElts(Width, 0);
1892  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1893  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1894  };
1895 
1896  switch (II->getIntrinsicID()) {
1897  default: break;
1898  case Intrinsic::objectsize:
1899  if (ConstantInt *N =
1900  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1901  return replaceInstUsesWith(CI, N);
1902  return nullptr;
1903  case Intrinsic::bswap: {
1904  Value *IIOperand = II->getArgOperand(0);
1905  Value *X = nullptr;
1906 
1907  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1908  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1909  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1910  IIOperand->getType()->getPrimitiveSizeInBits();
1911  Value *CV = ConstantInt::get(X->getType(), C);
1912  Value *V = Builder.CreateLShr(X, CV);
1913  return new TruncInst(V, IIOperand->getType());
1914  }
1915  break;
1916  }
1917  case Intrinsic::masked_load:
1918  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1919  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1920  break;
1921  case Intrinsic::masked_store:
1922  return simplifyMaskedStore(*II, *this);
1923  case Intrinsic::masked_gather:
1924  return simplifyMaskedGather(*II, *this);
1925  case Intrinsic::masked_scatter:
1926  return simplifyMaskedScatter(*II, *this);
1927 
1928  case Intrinsic::powi:
1929  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1930  // 0 and 1 are handled in instsimplify
1931 
1932  // powi(x, -1) -> 1/x
1933  if (Power->isMinusOne())
1934  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1935  II->getArgOperand(0));
1936  // powi(x, 2) -> x*x
1937  if (Power->equalsInt(2))
1938  return BinaryOperator::CreateFMul(II->getArgOperand(0),
1939  II->getArgOperand(0));
1940  }
1941  break;
1942 
1943  case Intrinsic::cttz:
1944  case Intrinsic::ctlz:
1945  if (auto *I = foldCttzCtlz(*II, *this))
1946  return I;
1947  break;
1948 
1949  case Intrinsic::ctpop:
1950  if (auto *I = foldCtpop(*II, *this))
1951  return I;
1952  break;
1953 
1954  case Intrinsic::uadd_with_overflow:
1955  case Intrinsic::sadd_with_overflow:
1956  case Intrinsic::umul_with_overflow:
1957  case Intrinsic::smul_with_overflow:
1958  if (isa<Constant>(II->getArgOperand(0)) &&
1959  !isa<Constant>(II->getArgOperand(1))) {
1960  // Canonicalize constants into the RHS.
1961  Value *LHS = II->getArgOperand(0);
1962  II->setArgOperand(0, II->getArgOperand(1));
1963  II->setArgOperand(1, LHS);
1964  return II;
1965  }
1967 
1968  case Intrinsic::usub_with_overflow:
1969  case Intrinsic::ssub_with_overflow: {
1970  OverflowCheckFlavor OCF =
1972  assert(OCF != OCF_INVALID && "unexpected!");
1973 
1974  Value *OperationResult = nullptr;
1975  Constant *OverflowResult = nullptr;
1976  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
1977  *II, OperationResult, OverflowResult))
1978  return CreateOverflowTuple(II, OperationResult, OverflowResult);
1979 
1980  break;
1981  }
1982 
1983  case Intrinsic::minnum:
1984  case Intrinsic::maxnum: {
1985  Value *Arg0 = II->getArgOperand(0);
1986  Value *Arg1 = II->getArgOperand(1);
1987  // Canonicalize constants to the RHS.
1988  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
1989  II->setArgOperand(0, Arg1);
1990  II->setArgOperand(1, Arg0);
1991  return II;
1992  }
1993  if (Value *V = simplifyMinnumMaxnum(*II))
1994  return replaceInstUsesWith(*II, V);
1995  break;
1996  }
1997  case Intrinsic::fmuladd: {
1998  // Canonicalize fast fmuladd to the separate fmul + fadd.
1999  if (II->isFast()) {
2000  BuilderTy::FastMathFlagGuard Guard(Builder);
2001  Builder.setFastMathFlags(II->getFastMathFlags());
2002  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2003  II->getArgOperand(1));
2004  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2005  Add->takeName(II);
2006  return replaceInstUsesWith(*II, Add);
2007  }
2008 
2010  }
2011  case Intrinsic::fma: {
2012  Value *Src0 = II->getArgOperand(0);
2013  Value *Src1 = II->getArgOperand(1);
2014 
2015  // Canonicalize constants into the RHS.
2016  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2017  II->setArgOperand(0, Src1);
2018  II->setArgOperand(1, Src0);
2019  std::swap(Src0, Src1);
2020  }
2021 
2022  Value *LHS = nullptr;
2023  Value *RHS = nullptr;
2024 
2025  // fma fneg(x), fneg(y), z -> fma x, y, z
2026  if (match(Src0, m_FNeg(m_Value(LHS))) &&
2027  match(Src1, m_FNeg(m_Value(RHS)))) {
2028  II->setArgOperand(0, LHS);
2029  II->setArgOperand(1, RHS);
2030  return II;
2031  }
2032 
2033  // fma fabs(x), fabs(x), z -> fma x, x, z
2034  if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
2035  match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
2036  II->setArgOperand(0, LHS);
2037  II->setArgOperand(1, RHS);
2038  return II;
2039  }
2040 
2041  // fma x, 1, z -> fadd x, z
2042  if (match(Src1, m_FPOne())) {
2043  Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2044  RI->copyFastMathFlags(II);
2045  return RI;
2046  }
2047 
2048  break;
2049  }
2050  case Intrinsic::fabs: {
2051  Value *Cond;
2052  Constant *LHS, *RHS;
2053  if (match(II->getArgOperand(0),
2054  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2055  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2056  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2057  return SelectInst::Create(Cond, Call0, Call1);
2058  }
2059 
2061  }
2062  case Intrinsic::ceil:
2063  case Intrinsic::floor:
2064  case Intrinsic::round:
2065  case Intrinsic::nearbyint:
2066  case Intrinsic::rint:
2067  case Intrinsic::trunc: {
2068  Value *ExtSrc;
2069  if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
2070  II->getArgOperand(0)->hasOneUse()) {
2071  // fabs (fpext x) -> fpext (fabs x)
2073  { ExtSrc->getType() });
2074  CallInst *NewFabs = Builder.CreateCall(F, ExtSrc);
2075  NewFabs->copyFastMathFlags(II);
2076  NewFabs->takeName(II);
2077  return new FPExtInst(NewFabs, II->getType());
2078  }
2079 
2080  break;
2081  }
2082  case Intrinsic::cos:
2083  case Intrinsic::amdgcn_cos: {
2084  Value *SrcSrc;
2085  Value *Src = II->getArgOperand(0);
2086  if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
2087  match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
2088  // cos(-x) -> cos(x)
2089  // cos(fabs(x)) -> cos(x)
2090  II->setArgOperand(0, SrcSrc);
2091  return II;
2092  }
2093 
2094  break;
2095  }
2096  case Intrinsic::ppc_altivec_lvx:
2097  case Intrinsic::ppc_altivec_lvxl:
2098  // Turn PPC lvx -> load if the pointer is known aligned.
2099  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2100  &DT) >= 16) {
2101  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2102  PointerType::getUnqual(II->getType()));
2103  return new LoadInst(Ptr);
2104  }
2105  break;
2106  case Intrinsic::ppc_vsx_lxvw4x:
2107  case Intrinsic::ppc_vsx_lxvd2x: {
2108  // Turn PPC VSX loads into normal loads.
2109  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2110  PointerType::getUnqual(II->getType()));
2111  return new LoadInst(Ptr, Twine(""), false, 1);
2112  }
2113  case Intrinsic::ppc_altivec_stvx:
2114  case Intrinsic::ppc_altivec_stvxl:
2115  // Turn stvx -> store if the pointer is known aligned.
2116  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2117  &DT) >= 16) {
2118  Type *OpPtrTy =
2119  PointerType::getUnqual(II->getArgOperand(0)->getType());
2120  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2121  return new StoreInst(II->getArgOperand(0), Ptr);
2122  }
2123  break;
2124  case Intrinsic::ppc_vsx_stxvw4x:
2125  case Intrinsic::ppc_vsx_stxvd2x: {
2126  // Turn PPC VSX stores into normal stores.
2127  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2128  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2129  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2130  }
2131  case Intrinsic::ppc_qpx_qvlfs:
2132  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2133  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2134  &DT) >= 16) {
2135  Type *VTy = VectorType::get(Builder.getFloatTy(),
2136  II->getType()->getVectorNumElements());
2137  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2138  PointerType::getUnqual(VTy));
2139  Value *Load = Builder.CreateLoad(Ptr);
2140  return new FPExtInst(Load, II->getType());
2141  }
2142  break;
2143  case Intrinsic::ppc_qpx_qvlfd:
2144  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2145  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2146  &DT) >= 32) {
2147  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2148  PointerType::getUnqual(II->getType()));
2149  return new LoadInst(Ptr);
2150  }
2151  break;
2152  case Intrinsic::ppc_qpx_qvstfs:
2153  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2154  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2155  &DT) >= 16) {
2156  Type *VTy = VectorType::get(Builder.getFloatTy(),
2157  II->getArgOperand(0)->getType()->getVectorNumElements());
2158  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2159  Type *OpPtrTy = PointerType::getUnqual(VTy);
2160  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2161  return new StoreInst(TOp, Ptr);
2162  }
2163  break;
2164  case Intrinsic::ppc_qpx_qvstfd:
2165  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2166  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2167  &DT) >= 32) {
2168  Type *OpPtrTy =
2169  PointerType::getUnqual(II->getArgOperand(0)->getType());
2170  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2171  return new StoreInst(II->getArgOperand(0), Ptr);
2172  }
2173  break;
2174 
2175  case Intrinsic::x86_bmi_bextr_32:
2176  case Intrinsic::x86_bmi_bextr_64:
2177  case Intrinsic::x86_tbm_bextri_u32:
2178  case Intrinsic::x86_tbm_bextri_u64:
2179  // If the RHS is a constant we can try some simplifications.
2180  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2181  uint64_t Shift = C->getZExtValue();
2182  uint64_t Length = (Shift >> 8) & 0xff;
2183  Shift &= 0xff;
2184  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2185  // If the length is 0 or the shift is out of range, replace with zero.
2186  if (Length == 0 || Shift >= BitWidth)
2187  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2188  // If the LHS is also a constant, we can completely constant fold this.
2189  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2190  uint64_t Result = InC->getZExtValue() >> Shift;
2191  if (Length > BitWidth)
2192  Length = BitWidth;
2193  Result &= maskTrailingOnes<uint64_t>(Length);
2194  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2195  }
2196  // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2197  // are only masking bits that a shift already cleared?
2198  }
2199  break;
2200 
2201  case Intrinsic::x86_bmi_bzhi_32:
2202  case Intrinsic::x86_bmi_bzhi_64:
2203  // If the RHS is a constant we can try some simplifications.
2204  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2205  uint64_t Index = C->getZExtValue() & 0xff;
2206  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2207  if (Index >= BitWidth)
2208  return replaceInstUsesWith(CI, II->getArgOperand(0));
2209  if (Index == 0)
2210  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2211  // If the LHS is also a constant, we can completely constant fold this.
2212  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2213  uint64_t Result = InC->getZExtValue();
2214  Result &= maskTrailingOnes<uint64_t>(Index);
2215  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2216  }
2217  // TODO should we convert this to an AND if the RHS is constant?
2218  }
2219  break;
2220 
2221  case Intrinsic::x86_vcvtph2ps_128:
2222  case Intrinsic::x86_vcvtph2ps_256: {
2223  auto Arg = II->getArgOperand(0);
2224  auto ArgType = cast<VectorType>(Arg->getType());
2225  auto RetType = cast<VectorType>(II->getType());
2226  unsigned ArgWidth = ArgType->getNumElements();
2227  unsigned RetWidth = RetType->getNumElements();
2228  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2229  assert(ArgType->isIntOrIntVectorTy() &&
2230  ArgType->getScalarSizeInBits() == 16 &&
2231  "CVTPH2PS input type should be 16-bit integer vector");
2232  assert(RetType->getScalarType()->isFloatTy() &&
2233  "CVTPH2PS output type should be 32-bit float vector");
2234 
2235  // Constant folding: Convert to generic half to single conversion.
2236  if (isa<ConstantAggregateZero>(Arg))
2237  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2238 
2239  if (isa<ConstantDataVector>(Arg)) {
2240  auto VectorHalfAsShorts = Arg;
2241  if (RetWidth < ArgWidth) {
2242  SmallVector<uint32_t, 8> SubVecMask;
2243  for (unsigned i = 0; i != RetWidth; ++i)
2244  SubVecMask.push_back((int)i);
2245  VectorHalfAsShorts = Builder.CreateShuffleVector(
2246  Arg, UndefValue::get(ArgType), SubVecMask);
2247  }
2248 
2249  auto VectorHalfType =
2250  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2251  auto VectorHalfs =
2252  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2253  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2254  return replaceInstUsesWith(*II, VectorFloats);
2255  }
2256 
2257  // We only use the lowest lanes of the argument.
2258  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2259  II->setArgOperand(0, V);
2260  return II;
2261  }
2262  break;
2263  }
2264 
2265  case Intrinsic::x86_sse_cvtss2si:
2266  case Intrinsic::x86_sse_cvtss2si64:
2267  case Intrinsic::x86_sse_cvttss2si:
2268  case Intrinsic::x86_sse_cvttss2si64:
2269  case Intrinsic::x86_sse2_cvtsd2si:
2270  case Intrinsic::x86_sse2_cvtsd2si64:
2271  case Intrinsic::x86_sse2_cvttsd2si:
2272  case Intrinsic::x86_sse2_cvttsd2si64:
2273  case Intrinsic::x86_avx512_vcvtss2si32:
2274  case Intrinsic::x86_avx512_vcvtss2si64:
2275  case Intrinsic::x86_avx512_vcvtss2usi32:
2276  case Intrinsic::x86_avx512_vcvtss2usi64:
2277  case Intrinsic::x86_avx512_vcvtsd2si32:
2278  case Intrinsic::x86_avx512_vcvtsd2si64:
2279  case Intrinsic::x86_avx512_vcvtsd2usi32:
2280  case Intrinsic::x86_avx512_vcvtsd2usi64:
2281  case Intrinsic::x86_avx512_cvttss2si:
2282  case Intrinsic::x86_avx512_cvttss2si64:
2283  case Intrinsic::x86_avx512_cvttss2usi:
2284  case Intrinsic::x86_avx512_cvttss2usi64:
2285  case Intrinsic::x86_avx512_cvttsd2si:
2286  case Intrinsic::x86_avx512_cvttsd2si64:
2287  case Intrinsic::x86_avx512_cvttsd2usi:
2288  case Intrinsic::x86_avx512_cvttsd2usi64: {
2289  // These intrinsics only demand the 0th element of their input vectors. If
2290  // we can simplify the input based on that, do so now.
2291  Value *Arg = II->getArgOperand(0);
2292  unsigned VWidth = Arg->getType()->getVectorNumElements();
2293  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2294  II->setArgOperand(0, V);
2295  return II;
2296  }
2297  break;
2298  }
2299 
2300  case Intrinsic::x86_mmx_pmovmskb:
2301  case Intrinsic::x86_sse_movmsk_ps:
2302  case Intrinsic::x86_sse2_movmsk_pd:
2303  case Intrinsic::x86_sse2_pmovmskb_128:
2304  case Intrinsic::x86_avx_movmsk_pd_256:
2305  case Intrinsic::x86_avx_movmsk_ps_256:
2306  case Intrinsic::x86_avx2_pmovmskb:
2307  if (Value *V = simplifyX86movmsk(*II))
2308  return replaceInstUsesWith(*II, V);
2309  break;
2310 
2311  case Intrinsic::x86_sse_comieq_ss:
2312  case Intrinsic::x86_sse_comige_ss:
2313  case Intrinsic::x86_sse_comigt_ss:
2314  case Intrinsic::x86_sse_comile_ss:
2315  case Intrinsic::x86_sse_comilt_ss:
2316  case Intrinsic::x86_sse_comineq_ss:
2317  case Intrinsic::x86_sse_ucomieq_ss:
2318  case Intrinsic::x86_sse_ucomige_ss:
2319  case Intrinsic::x86_sse_ucomigt_ss:
2320  case Intrinsic::x86_sse_ucomile_ss:
2321  case Intrinsic::x86_sse_ucomilt_ss:
2322  case Intrinsic::x86_sse_ucomineq_ss:
2323  case Intrinsic::x86_sse2_comieq_sd:
2324  case Intrinsic::x86_sse2_comige_sd:
2325  case Intrinsic::x86_sse2_comigt_sd:
2326  case Intrinsic::x86_sse2_comile_sd:
2327  case Intrinsic::x86_sse2_comilt_sd:
2328  case Intrinsic::x86_sse2_comineq_sd:
2329  case Intrinsic::x86_sse2_ucomieq_sd:
2330  case Intrinsic::x86_sse2_ucomige_sd:
2331  case Intrinsic::x86_sse2_ucomigt_sd:
2332  case Intrinsic::x86_sse2_ucomile_sd:
2333  case Intrinsic::x86_sse2_ucomilt_sd:
2334  case Intrinsic::x86_sse2_ucomineq_sd:
2335  case Intrinsic::x86_avx512_vcomi_ss:
2336  case Intrinsic::x86_avx512_vcomi_sd:
2337  case Intrinsic::x86_avx512_mask_cmp_ss:
2338  case Intrinsic::x86_avx512_mask_cmp_sd: {
2339  // These intrinsics only demand the 0th element of their input vectors. If
2340  // we can simplify the input based on that, do so now.
2341  bool MadeChange = false;
2342  Value *Arg0 = II->getArgOperand(0);
2343  Value *Arg1 = II->getArgOperand(1);
2344  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2345  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2346  II->setArgOperand(0, V);
2347  MadeChange = true;
2348  }
2349  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2350  II->setArgOperand(1, V);
2351  MadeChange = true;
2352  }
2353  if (MadeChange)
2354  return II;
2355  break;
2356  }
2357  case Intrinsic::x86_avx512_mask_cmp_pd_128:
2358  case Intrinsic::x86_avx512_mask_cmp_pd_256:
2359  case Intrinsic::x86_avx512_mask_cmp_pd_512:
2360  case Intrinsic::x86_avx512_mask_cmp_ps_128:
2361  case Intrinsic::x86_avx512_mask_cmp_ps_256:
2362  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
2363  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2364  Value *Arg0 = II->getArgOperand(0);
2365  Value *Arg1 = II->getArgOperand(1);
2366  bool Arg0IsZero = match(Arg0, m_Zero());
2367  if (Arg0IsZero)
2368  std::swap(Arg0, Arg1);
2369  Value *A, *B;
2370  // This fold requires only the NINF(not +/- inf) since inf minus
2371  // inf is nan.
2372  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2373  // equal for both compares.
2374  // NNAN is not needed because nans compare the same for both compares.
2375  // The compare intrinsic uses the above assumptions and therefore
2376  // doesn't require additional flags.
2377  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2378  match(Arg1, m_Zero()) && isa<Instruction>(Arg0) &&
2379  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2380  if (Arg0IsZero)
2381  std::swap(A, B);
2382  II->setArgOperand(0, A);
2383  II->setArgOperand(1, B);
2384  return II;
2385  }
2386  break;
2387  }
2388 
2389  case Intrinsic::x86_avx512_mask_add_ps_512:
2390  case Intrinsic::x86_avx512_mask_div_ps_512:
2391  case Intrinsic::x86_avx512_mask_mul_ps_512:
2392  case Intrinsic::x86_avx512_mask_sub_ps_512:
2393  case Intrinsic::x86_avx512_mask_add_pd_512:
2394  case Intrinsic::x86_avx512_mask_div_pd_512:
2395  case Intrinsic::x86_avx512_mask_mul_pd_512:
2396  case Intrinsic::x86_avx512_mask_sub_pd_512:
2397  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2398  // IR operations.
2399  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2400  if (R->getValue() == 4) {
2401  Value *Arg0 = II->getArgOperand(0);
2402  Value *Arg1 = II->getArgOperand(1);
2403 
2404  Value *V;
2405  switch (II->getIntrinsicID()) {
2406  default: llvm_unreachable("Case stmts out of sync!");
2407  case Intrinsic::x86_avx512_mask_add_ps_512:
2408  case Intrinsic::x86_avx512_mask_add_pd_512:
2409  V = Builder.CreateFAdd(Arg0, Arg1);
2410  break;
2411  case Intrinsic::x86_avx512_mask_sub_ps_512:
2412  case Intrinsic::x86_avx512_mask_sub_pd_512:
2413  V = Builder.CreateFSub(Arg0, Arg1);
2414  break;
2415  case Intrinsic::x86_avx512_mask_mul_ps_512:
2416  case Intrinsic::x86_avx512_mask_mul_pd_512:
2417  V = Builder.CreateFMul(Arg0, Arg1);
2418  break;
2419  case Intrinsic::x86_avx512_mask_div_ps_512:
2420  case Intrinsic::x86_avx512_mask_div_pd_512:
2421  V = Builder.CreateFDiv(Arg0, Arg1);
2422  break;
2423  }
2424 
2425  // Create a select for the masking.
2426  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2427  Builder);
2428  return replaceInstUsesWith(*II, V);
2429  }
2430  }
2431  break;
2432 
2433  case Intrinsic::x86_avx512_mask_add_ss_round:
2434  case Intrinsic::x86_avx512_mask_div_ss_round:
2435  case Intrinsic::x86_avx512_mask_mul_ss_round:
2436  case Intrinsic::x86_avx512_mask_sub_ss_round:
2437  case Intrinsic::x86_avx512_mask_add_sd_round:
2438  case Intrinsic::x86_avx512_mask_div_sd_round:
2439  case Intrinsic::x86_avx512_mask_mul_sd_round:
2440  case Intrinsic::x86_avx512_mask_sub_sd_round:
2441  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2442  // IR operations.
2443  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2444  if (R->getValue() == 4) {
2445  // Extract the element as scalars.
2446  Value *Arg0 = II->getArgOperand(0);
2447  Value *Arg1 = II->getArgOperand(1);
2448  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2449  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2450 
2451  Value *V;
2452  switch (II->getIntrinsicID()) {
2453  default: llvm_unreachable("Case stmts out of sync!");
2454  case Intrinsic::x86_avx512_mask_add_ss_round:
2455  case Intrinsic::x86_avx512_mask_add_sd_round:
2456  V = Builder.CreateFAdd(LHS, RHS);
2457  break;
2458  case Intrinsic::x86_avx512_mask_sub_ss_round:
2459  case Intrinsic::x86_avx512_mask_sub_sd_round:
2460  V = Builder.CreateFSub(LHS, RHS);
2461  break;
2462  case Intrinsic::x86_avx512_mask_mul_ss_round:
2463  case Intrinsic::x86_avx512_mask_mul_sd_round:
2464  V = Builder.CreateFMul(LHS, RHS);
2465  break;
2466  case Intrinsic::x86_avx512_mask_div_ss_round:
2467  case Intrinsic::x86_avx512_mask_div_sd_round:
2468  V = Builder.CreateFDiv(LHS, RHS);
2469  break;
2470  }
2471 
2472  // Handle the masking aspect of the intrinsic.
2473  Value *Mask = II->getArgOperand(3);
2474  auto *C = dyn_cast<ConstantInt>(Mask);
2475  // We don't need a select if we know the mask bit is a 1.
2476  if (!C || !C->getValue()[0]) {
2477  // Cast the mask to an i1 vector and then extract the lowest element.
2478  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2479  cast<IntegerType>(Mask->getType())->getBitWidth());
2480  Mask = Builder.CreateBitCast(Mask, MaskTy);
2481  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2482  // Extract the lowest element from the passthru operand.
2483  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2484  (uint64_t)0);
2485  V = Builder.CreateSelect(Mask, V, Passthru);
2486  }
2487 
2488  // Insert the result back into the original argument 0.
2489  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2490 
2491  return replaceInstUsesWith(*II, V);
2492  }
2493  }
2495 
2496  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2497  case Intrinsic::x86_avx512_mask_max_ss_round:
2498  case Intrinsic::x86_avx512_mask_min_ss_round:
2499  case Intrinsic::x86_avx512_mask_max_sd_round:
2500  case Intrinsic::x86_avx512_mask_min_sd_round:
2501  case Intrinsic::x86_avx512_mask_vfmadd_ss:
2502  case Intrinsic::x86_avx512_mask_vfmadd_sd:
2503  case Intrinsic::x86_avx512_maskz_vfmadd_ss:
2504  case Intrinsic::x86_avx512_maskz_vfmadd_sd:
2505  case Intrinsic::x86_avx512_mask3_vfmadd_ss:
2506  case Intrinsic::x86_avx512_mask3_vfmadd_sd:
2507  case Intrinsic::x86_avx512_mask3_vfmsub_ss:
2508  case Intrinsic::x86_avx512_mask3_vfmsub_sd:
2509  case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
2510  case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
2511  case Intrinsic::x86_fma_vfmadd_ss:
2512  case Intrinsic::x86_fma_vfmsub_ss:
2513  case Intrinsic::x86_fma_vfnmadd_ss:
2514  case Intrinsic::x86_fma_vfnmsub_ss:
2515  case Intrinsic::x86_fma_vfmadd_sd:
2516  case Intrinsic::x86_fma_vfmsub_sd:
2517  case Intrinsic::x86_fma_vfnmadd_sd:
2518  case Intrinsic::x86_fma_vfnmsub_sd:
2519  case Intrinsic::x86_sse_cmp_ss:
2520  case Intrinsic::x86_sse_min_ss:
2521  case Intrinsic::x86_sse_max_ss:
2522  case Intrinsic::x86_sse2_cmp_sd:
2523  case Intrinsic::x86_sse2_min_sd:
2524  case Intrinsic::x86_sse2_max_sd:
2525  case Intrinsic::x86_sse41_round_ss:
2526  case Intrinsic::x86_sse41_round_sd:
2527  case Intrinsic::x86_xop_vfrcz_ss:
2528  case Intrinsic::x86_xop_vfrcz_sd: {
2529  unsigned VWidth = II->getType()->getVectorNumElements();
2530  APInt UndefElts(VWidth, 0);
2531  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2532  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2533  if (V != II)
2534  return replaceInstUsesWith(*II, V);
2535  return II;
2536  }
2537  break;
2538  }
2539 
2540  // Constant fold ashr( <A x Bi>, Ci ).
2541  // Constant fold lshr( <A x Bi>, Ci ).
2542  // Constant fold shl( <A x Bi>, Ci ).
2543  case Intrinsic::x86_sse2_psrai_d:
2544  case Intrinsic::x86_sse2_psrai_w:
2545  case Intrinsic::x86_avx2_psrai_d:
2546  case Intrinsic::x86_avx2_psrai_w:
2547  case Intrinsic::x86_avx512_psrai_q_128:
2548  case Intrinsic::x86_avx512_psrai_q_256:
2549  case Intrinsic::x86_avx512_psrai_d_512:
2550  case Intrinsic::x86_avx512_psrai_q_512:
2551  case Intrinsic::x86_avx512_psrai_w_512:
2552  case Intrinsic::x86_sse2_psrli_d:
2553  case Intrinsic::x86_sse2_psrli_q:
2554  case Intrinsic::x86_sse2_psrli_w:
2555  case Intrinsic::x86_avx2_psrli_d:
2556  case Intrinsic::x86_avx2_psrli_q:
2557  case Intrinsic::x86_avx2_psrli_w:
2558  case Intrinsic::x86_avx512_psrli_d_512:
2559  case Intrinsic::x86_avx512_psrli_q_512:
2560  case Intrinsic::x86_avx512_psrli_w_512:
2561  case Intrinsic::x86_sse2_pslli_d:
2562  case Intrinsic::x86_sse2_pslli_q:
2563  case Intrinsic::x86_sse2_pslli_w:
2564  case Intrinsic::x86_avx2_pslli_d:
2565  case Intrinsic::x86_avx2_pslli_q:
2566  case Intrinsic::x86_avx2_pslli_w:
2567  case Intrinsic::x86_avx512_pslli_d_512:
2568  case Intrinsic::x86_avx512_pslli_q_512:
2569  case Intrinsic::x86_avx512_pslli_w_512:
2570  if (Value *V = simplifyX86immShift(*II, Builder))
2571  return replaceInstUsesWith(*II, V);
2572  break;
2573 
2574  case Intrinsic::x86_sse2_psra_d:
2575  case Intrinsic::x86_sse2_psra_w:
2576  case Intrinsic::x86_avx2_psra_d:
2577  case Intrinsic::x86_avx2_psra_w:
2578  case Intrinsic::x86_avx512_psra_q_128:
2579  case Intrinsic::x86_avx512_psra_q_256:
2580  case Intrinsic::x86_avx512_psra_d_512:
2581  case Intrinsic::x86_avx512_psra_q_512:
2582  case Intrinsic::x86_avx512_psra_w_512:
2583  case Intrinsic::x86_sse2_psrl_d:
2584  case Intrinsic::x86_sse2_psrl_q:
2585  case Intrinsic::x86_sse2_psrl_w:
2586  case Intrinsic::x86_avx2_psrl_d:
2587  case Intrinsic::x86_avx2_psrl_q:
2588  case Intrinsic::x86_avx2_psrl_w:
2589  case Intrinsic::x86_avx512_psrl_d_512:
2590  case Intrinsic::x86_avx512_psrl_q_512:
2591  case Intrinsic::x86_avx512_psrl_w_512:
2592  case Intrinsic::x86_sse2_psll_d:
2593  case Intrinsic::x86_sse2_psll_q:
2594  case Intrinsic::x86_sse2_psll_w:
2595  case Intrinsic::x86_avx2_psll_d:
2596  case Intrinsic::x86_avx2_psll_q:
2597  case Intrinsic::x86_avx2_psll_w:
2598  case Intrinsic::x86_avx512_psll_d_512:
2599  case Intrinsic::x86_avx512_psll_q_512:
2600  case Intrinsic::x86_avx512_psll_w_512: {
2601  if (Value *V = simplifyX86immShift(*II, Builder))
2602  return replaceInstUsesWith(*II, V);
2603 
2604  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2605  // operand to compute the shift amount.
2606  Value *Arg1 = II->getArgOperand(1);
2607  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2608  "Unexpected packed shift size");
2609  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2610 
2611  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2612  II->setArgOperand(1, V);
2613  return II;
2614  }
2615  break;
2616  }
2617 
2618  case Intrinsic::x86_avx2_psllv_d:
2619  case Intrinsic::x86_avx2_psllv_d_256:
2620  case Intrinsic::x86_avx2_psllv_q:
2621  case Intrinsic::x86_avx2_psllv_q_256:
2622  case Intrinsic::x86_avx512_psllv_d_512:
2623  case Intrinsic::x86_avx512_psllv_q_512:
2624  case Intrinsic::x86_avx512_psllv_w_128:
2625  case Intrinsic::x86_avx512_psllv_w_256:
2626  case Intrinsic::x86_avx512_psllv_w_512:
2627  case Intrinsic::x86_avx2_psrav_d:
2628  case Intrinsic::x86_avx2_psrav_d_256:
2629  case Intrinsic::x86_avx512_psrav_q_128:
2630  case Intrinsic::x86_avx512_psrav_q_256:
2631  case Intrinsic::x86_avx512_psrav_d_512:
2632  case Intrinsic::x86_avx512_psrav_q_512:
2633  case Intrinsic::x86_avx512_psrav_w_128:
2634  case Intrinsic::x86_avx512_psrav_w_256:
2635  case Intrinsic::x86_avx512_psrav_w_512:
2636  case Intrinsic::x86_avx2_psrlv_d:
2637  case Intrinsic::x86_avx2_psrlv_d_256:
2638  case Intrinsic::x86_avx2_psrlv_q:
2639  case Intrinsic::x86_avx2_psrlv_q_256:
2640  case Intrinsic::x86_avx512_psrlv_d_512:
2641  case Intrinsic::x86_avx512_psrlv_q_512:
2642  case Intrinsic::x86_avx512_psrlv_w_128:
2643  case Intrinsic::x86_avx512_psrlv_w_256:
2644  case Intrinsic::x86_avx512_psrlv_w_512:
2645  if (Value *V = simplifyX86varShift(*II, Builder))
2646  return replaceInstUsesWith(*II, V);
2647  break;
2648 
2649  case Intrinsic::x86_sse2_pmulu_dq:
2650  case Intrinsic::x86_sse41_pmuldq:
2651  case Intrinsic::x86_avx2_pmul_dq:
2652  case Intrinsic::x86_avx2_pmulu_dq:
2653  case Intrinsic::x86_avx512_pmul_dq_512:
2654  case Intrinsic::x86_avx512_pmulu_dq_512: {
2655  if (Value *V = simplifyX86muldq(*II, Builder))
2656  return replaceInstUsesWith(*II, V);
2657 
2658  unsigned VWidth = II->getType()->getVectorNumElements();
2659  APInt UndefElts(VWidth, 0);
2660  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
2661  if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
2662  if (V != II)
2663  return replaceInstUsesWith(*II, V);
2664  return II;
2665  }
2666  break;
2667  }
2668 
2669  case Intrinsic::x86_sse2_packssdw_128:
2670  case Intrinsic::x86_sse2_packsswb_128:
2671  case Intrinsic::x86_avx2_packssdw:
2672  case Intrinsic::x86_avx2_packsswb:
2673  case Intrinsic::x86_avx512_packssdw_512:
2674  case Intrinsic::x86_avx512_packsswb_512:
2675  if (Value *V = simplifyX86pack(*II, true))
2676  return replaceInstUsesWith(*II, V);
2677  break;
2678 
2679  case Intrinsic::x86_sse2_packuswb_128:
2680  case Intrinsic::x86_sse41_packusdw:
2681  case Intrinsic::x86_avx2_packusdw:
2682  case Intrinsic::x86_avx2_packuswb:
2683  case Intrinsic::x86_avx512_packusdw_512:
2684  case Intrinsic::x86_avx512_packuswb_512:
2685  if (Value *V = simplifyX86pack(*II, false))
2686  return replaceInstUsesWith(*II, V);
2687  break;
2688 
2689  case Intrinsic::x86_pclmulqdq: {
2690  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2691  unsigned Imm = C->getZExtValue();
2692 
2693  bool MadeChange = false;
2694  Value *Arg0 = II->getArgOperand(0);
2695  Value *Arg1 = II->getArgOperand(1);
2696  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2697  APInt DemandedElts(VWidth, 0);
2698 
2699  APInt UndefElts1(VWidth, 0);
2700  DemandedElts = (Imm & 0x01) ? 2 : 1;
2701  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
2702  UndefElts1)) {
2703  II->setArgOperand(0, V);
2704  MadeChange = true;
2705  }
2706 
2707  APInt UndefElts2(VWidth, 0);
2708  DemandedElts = (Imm & 0x10) ? 2 : 1;
2709  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
2710  UndefElts2)) {
2711  II->setArgOperand(1, V);
2712  MadeChange = true;
2713  }
2714 
2715  // If both input elements are undef, the result is undef.
2716  if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
2717  UndefElts2[(Imm & 0x10) ? 1 : 0])
2718  return replaceInstUsesWith(*II,
2719  ConstantAggregateZero::get(II->getType()));
2720 
2721  if (MadeChange)
2722  return II;
2723  }
2724  break;
2725  }
2726 
2727  case Intrinsic::x86_sse41_insertps:
2728  if (Value *V = simplifyX86insertps(*II, Builder))
2729  return replaceInstUsesWith(*II, V);
2730  break;
2731 
2732  case Intrinsic::x86_sse4a_extrq: {
2733  Value *Op0 = II->getArgOperand(0);
2734  Value *Op1 = II->getArgOperand(1);
2735  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2736  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2737  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2738  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2739  VWidth1 == 16 && "Unexpected operand sizes");
2740 
2741  // See if we're dealing with constant values.
2742  Constant *C1 = dyn_cast<Constant>(Op1);
2743  ConstantInt *CILength =
2744  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2745  : nullptr;
2746  ConstantInt *CIIndex =
2747  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2748  : nullptr;
2749 
2750  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2751  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2752  return replaceInstUsesWith(*II, V);
2753 
2754  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2755  // operands and the lowest 16-bits of the second.
2756  bool MadeChange = false;
2757  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2758  II->setArgOperand(0, V);
2759  MadeChange = true;
2760  }
2761  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2762  II->setArgOperand(1, V);
2763  MadeChange = true;
2764  }
2765  if (MadeChange)
2766  return II;
2767  break;
2768  }
2769 
2770  case Intrinsic::x86_sse4a_extrqi: {
2771  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2772  // bits of the lower 64-bits. The upper 64-bits are undefined.
2773  Value *Op0 = II->getArgOperand(0);
2774  unsigned VWidth = Op0->getType()->getVectorNumElements();
2775  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2776  "Unexpected operand size");
2777 
2778  // See if we're dealing with constant values.
2779  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2780  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2781 
2782  // Attempt to simplify to a constant or shuffle vector.
2783  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2784  return replaceInstUsesWith(*II, V);
2785 
2786  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2787  // operand.
2788  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2789  II->setArgOperand(0, V);
2790  return II;
2791  }
2792  break;
2793  }
2794 
2795  case Intrinsic::x86_sse4a_insertq: {
2796  Value *Op0 = II->getArgOperand(0);
2797  Value *Op1 = II->getArgOperand(1);
2798  unsigned VWidth = Op0->getType()->getVectorNumElements();
2799  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2800  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2801  Op1->getType()->getVectorNumElements() == 2 &&
2802  "Unexpected operand size");
2803 
2804  // See if we're dealing with constant values.
2805  Constant *C1 = dyn_cast<Constant>(Op1);
2806  ConstantInt *CI11 =
2807  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2808  : nullptr;
2809 
2810  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2811  if (CI11) {
2812  const APInt &V11 = CI11->getValue();
2813  APInt Len = V11.zextOrTrunc(6);
2814  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2815  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2816  return replaceInstUsesWith(*II, V);
2817  }
2818 
2819  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2820  // operand.
2821  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2822  II->setArgOperand(0, V);
2823  return II;
2824  }
2825  break;
2826  }
2827 
2828  case Intrinsic::x86_sse4a_insertqi: {
2829  // INSERTQI: Extract lowest Length bits from lower half of second source and
2830  // insert over first source starting at Index bit. The upper 64-bits are
2831  // undefined.
2832  Value *Op0 = II->getArgOperand(0);
2833  Value *Op1 = II->getArgOperand(1);
2834  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2835  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2836  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2837  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2838  VWidth1 == 2 && "Unexpected operand sizes");
2839 
2840  // See if we're dealing with constant values.
2841  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2842  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2843 
2844  // Attempt to simplify to a constant or shuffle vector.
2845  if (CILength && CIIndex) {
2846  APInt Len = CILength->getValue().zextOrTrunc(6);
2847  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2848  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2849  return replaceInstUsesWith(*II, V);
2850  }
2851 
2852  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2853  // operands.
2854  bool MadeChange = false;
2855  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2856  II->setArgOperand(0, V);
2857  MadeChange = true;
2858  }
2859  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2860  II->setArgOperand(1, V);
2861  MadeChange = true;
2862  }
2863  if (MadeChange)
2864  return II;
2865  break;
2866  }
2867 
2868  case Intrinsic::x86_sse41_pblendvb:
2869  case Intrinsic::x86_sse41_blendvps:
2870  case Intrinsic::x86_sse41_blendvpd:
2871  case Intrinsic::x86_avx_blendv_ps_256:
2872  case Intrinsic::x86_avx_blendv_pd_256:
2873  case Intrinsic::x86_avx2_pblendvb: {
2874  // Convert blendv* to vector selects if the mask is constant.
2875  // This optimization is convoluted because the intrinsic is defined as
2876  // getting a vector of floats or doubles for the ps and pd versions.
2877  // FIXME: That should be changed.
2878 
2879  Value *Op0 = II->getArgOperand(0);
2880  Value *Op1 = II->getArgOperand(1);
2881  Value *Mask = II->getArgOperand(2);
2882 
2883  // fold (blend A, A, Mask) -> A
2884  if (Op0 == Op1)
2885  return replaceInstUsesWith(CI, Op0);
2886 
2887  // Zero Mask - select 1st argument.
2888  if (isa<ConstantAggregateZero>(Mask))
2889  return replaceInstUsesWith(CI, Op0);
2890 
2891  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2892  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2893  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2894  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2895  }
2896  break;
2897  }
2898 
2899  case Intrinsic::x86_ssse3_pshuf_b_128:
2900  case Intrinsic::x86_avx2_pshuf_b:
2901  case Intrinsic::x86_avx512_pshuf_b_512:
2902  if (Value *V = simplifyX86pshufb(*II, Builder))
2903  return replaceInstUsesWith(*II, V);
2904  break;
2905 
2906  case Intrinsic::x86_avx_vpermilvar_ps:
2907  case Intrinsic::x86_avx_vpermilvar_ps_256:
2908  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2909  case Intrinsic::x86_avx_vpermilvar_pd:
2910  case Intrinsic::x86_avx_vpermilvar_pd_256:
2911  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2912  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2913  return replaceInstUsesWith(*II, V);
2914  break;
2915 
2916  case Intrinsic::x86_avx2_permd:
2917  case Intrinsic::x86_avx2_permps:
2918  if (Value *V = simplifyX86vpermv(*II, Builder))
2919  return replaceInstUsesWith(*II, V);
2920  break;
2921 
2922  case Intrinsic::x86_avx512_mask_permvar_df_256:
2923  case Intrinsic::x86_avx512_mask_permvar_df_512:
2924  case Intrinsic::x86_avx512_mask_permvar_di_256:
2925  case Intrinsic::x86_avx512_mask_permvar_di_512:
2926  case Intrinsic::x86_avx512_mask_permvar_hi_128:
2927  case Intrinsic::x86_avx512_mask_permvar_hi_256:
2928  case Intrinsic::x86_avx512_mask_permvar_hi_512:
2929  case Intrinsic::x86_avx512_mask_permvar_qi_128:
2930  case Intrinsic::x86_avx512_mask_permvar_qi_256:
2931  case Intrinsic::x86_avx512_mask_permvar_qi_512:
2932  case Intrinsic::x86_avx512_mask_permvar_sf_256:
2933  case Intrinsic::x86_avx512_mask_permvar_sf_512:
2934  case Intrinsic::x86_avx512_mask_permvar_si_256:
2935  case Intrinsic::x86_avx512_mask_permvar_si_512:
2936  if (Value *V = simplifyX86vpermv(*II, Builder)) {
2937  // We simplified the permuting, now create a select for the masking.
2938  V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
2939  Builder);
2940  return replaceInstUsesWith(*II, V);
2941  }
2942  break;
2943 
2944  case Intrinsic::x86_avx_maskload_ps:
2945  case Intrinsic::x86_avx_maskload_pd:
2946  case Intrinsic::x86_avx_maskload_ps_256:
2947  case Intrinsic::x86_avx_maskload_pd_256:
2948  case Intrinsic::x86_avx2_maskload_d:
2949  case Intrinsic::x86_avx2_maskload_q:
2950  case Intrinsic::x86_avx2_maskload_d_256:
2951  case Intrinsic::x86_avx2_maskload_q_256:
2952  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
2953  return I;
2954  break;
2955 
2956  case Intrinsic::x86_sse2_maskmov_dqu:
2957  case Intrinsic::x86_avx_maskstore_ps:
2958  case Intrinsic::x86_avx_maskstore_pd:
2959  case Intrinsic::x86_avx_maskstore_ps_256:
2960  case Intrinsic::x86_avx_maskstore_pd_256:
2961  case Intrinsic::x86_avx2_maskstore_d:
2962  case Intrinsic::x86_avx2_maskstore_q:
2963  case Intrinsic::x86_avx2_maskstore_d_256:
2964  case Intrinsic::x86_avx2_maskstore_q_256:
2965  if (simplifyX86MaskedStore(*II, *this))
2966  return nullptr;
2967  break;
2968 
2969  case Intrinsic::x86_xop_vpcomb:
2970  case Intrinsic::x86_xop_vpcomd:
2971  case Intrinsic::x86_xop_vpcomq:
2972  case Intrinsic::x86_xop_vpcomw:
2973  if (Value *V = simplifyX86vpcom(*II, Builder, true))
2974  return replaceInstUsesWith(*II, V);
2975  break;
2976 
2977  case Intrinsic::x86_xop_vpcomub:
2978  case Intrinsic::x86_xop_vpcomud:
2979  case Intrinsic::x86_xop_vpcomuq:
2980  case Intrinsic::x86_xop_vpcomuw:
2981  if (Value *V = simplifyX86vpcom(*II, Builder, false))
2982  return replaceInstUsesWith(*II, V);
2983  break;
2984 
2985  case Intrinsic::ppc_altivec_vperm:
2986  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
2987  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
2988  // a vectorshuffle for little endian, we must undo the transformation
2989  // performed on vec_perm in altivec.h. That is, we must complement
2990  // the permutation mask with respect to 31 and reverse the order of
2991  // V1 and V2.
2992  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
2993  assert(Mask->getType()->getVectorNumElements() == 16 &&
2994  "Bad type for intrinsic!");
2995 
2996  // Check that all of the elements are integer constants or undefs.
2997  bool AllEltsOk = true;
2998  for (unsigned i = 0; i != 16; ++i) {
2999  Constant *Elt = Mask->getAggregateElement(i);
3000  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3001  AllEltsOk = false;
3002  break;
3003  }
3004  }
3005 
3006  if (AllEltsOk) {
3007  // Cast the input vectors to byte vectors.
3008  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3009  Mask->getType());
3010  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3011  Mask->getType());
3012  Value *Result = UndefValue::get(Op0->getType());
3013 
3014  // Only extract each element once.
3015  Value *ExtractedElts[32];
3016  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3017 
3018  for (unsigned i = 0; i != 16; ++i) {
3019  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3020  continue;
3021  unsigned Idx =
3022  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3023  Idx &= 31; // Match the hardware behavior.
3024  if (DL.isLittleEndian())
3025  Idx = 31 - Idx;
3026 
3027  if (!ExtractedElts[Idx]) {
3028  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3029  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3030  ExtractedElts[Idx] =
3031  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3032  Builder.getInt32(Idx&15));
3033  }
3034 
3035  // Insert this value into the result vector.
3036  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3037  Builder.getInt32(i));
3038  }
3039  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3040  }
3041  }
3042  break;
3043 
3044  case Intrinsic::arm_neon_vld1:
3045  case Intrinsic::arm_neon_vld2:
3046  case Intrinsic::arm_neon_vld3:
3047  case Intrinsic::arm_neon_vld4:
3048  case Intrinsic::arm_neon_vld2lane:
3049  case Intrinsic::arm_neon_vld3lane:
3050  case Intrinsic::arm_neon_vld4lane:
3051  case Intrinsic::arm_neon_vst1:
3052  case Intrinsic::arm_neon_vst2:
3053  case Intrinsic::arm_neon_vst3:
3054  case Intrinsic::arm_neon_vst4:
3055  case Intrinsic::arm_neon_vst2lane:
3056  case Intrinsic::arm_neon_vst3lane:
3057  case Intrinsic::arm_neon_vst4lane: {
3058  unsigned MemAlign =
3059  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3060  unsigned AlignArg = II->getNumArgOperands() - 1;
3061  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3062  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3063  II->setArgOperand(AlignArg,
3064  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3065  MemAlign, false));
3066  return II;
3067  }
3068  break;
3069  }
3070 
3071  case Intrinsic::arm_neon_vmulls:
3072  case Intrinsic::arm_neon_vmullu:
3073  case Intrinsic::aarch64_neon_smull:
3074  case Intrinsic::aarch64_neon_umull: {
3075  Value *Arg0 = II->getArgOperand(0);
3076  Value *Arg1 = II->getArgOperand(1);
3077 
3078  // Handle mul by zero first:
3079  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3080  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3081  }
3082 
3083  // Check for constant LHS & RHS - in this case we just simplify.
3084  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3085  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3086  VectorType *NewVT = cast<VectorType>(II->getType());
3087  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3088  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3089  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3090  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3091 
3092  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3093  }
3094 
3095  // Couldn't simplify - canonicalize constant to the RHS.
3096  std::swap(Arg0, Arg1);
3097  }
3098 
3099  // Handle mul by one:
3100  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3101  if (ConstantInt *Splat =
3102  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3103  if (Splat->isOne())
3104  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3105  /*isSigned=*/!Zext);
3106 
3107  break;
3108  }
3109  case Intrinsic::amdgcn_rcp: {
3110  Value *Src = II->getArgOperand(0);
3111 
3112  // TODO: Move to ConstantFolding/InstSimplify?
3113  if (isa<UndefValue>(Src))
3114  return replaceInstUsesWith(CI, Src);
3115 
3116  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3117  const APFloat &ArgVal = C->getValueAPF();
3118  APFloat Val(ArgVal.getSemantics(), 1.0);
3119  APFloat::opStatus Status = Val.divide(ArgVal,
3121  // Only do this if it was exact and therefore not dependent on the
3122  // rounding mode.
3123  if (Status == APFloat::opOK)
3124  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3125  }
3126 
3127  break;
3128  }
3129  case Intrinsic::amdgcn_rsq: {
3130  Value *Src = II->getArgOperand(0);
3131 
3132  // TODO: Move to ConstantFolding/InstSimplify?
3133  if (isa<UndefValue>(Src))
3134  return replaceInstUsesWith(CI, Src);
3135  break;
3136  }
3137  case Intrinsic::amdgcn_frexp_mant:
3138  case Intrinsic::amdgcn_frexp_exp: {
3139  Value *Src = II->getArgOperand(0);
3140  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3141  int Exp;
3142  APFloat Significand = frexp(C->getValueAPF(), Exp,
3144 
3145  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3146  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3147  Significand));
3148  }
3149 
3150  // Match instruction special case behavior.
3151  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3152  Exp = 0;
3153 
3154  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3155  }
3156 
3157  if (isa<UndefValue>(Src))
3158  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3159 
3160  break;
3161  }
3162  case Intrinsic::amdgcn_class: {
3163  enum {
3164  S_NAN = 1 << 0, // Signaling NaN
3165  Q_NAN = 1 << 1, // Quiet NaN
3166  N_INFINITY = 1 << 2, // Negative infinity
3167  N_NORMAL = 1 << 3, // Negative normal
3168  N_SUBNORMAL = 1 << 4, // Negative subnormal
3169  N_ZERO = 1 << 5, // Negative zero
3170  P_ZERO = 1 << 6, // Positive zero
3171  P_SUBNORMAL = 1 << 7, // Positive subnormal
3172  P_NORMAL = 1 << 8, // Positive normal
3173  P_INFINITY = 1 << 9 // Positive infinity
3174  };
3175 
3176  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3178 
3179  Value *Src0 = II->getArgOperand(0);
3180  Value *Src1 = II->getArgOperand(1);
3181  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3182  if (!CMask) {
3183  if (isa<UndefValue>(Src0))
3184  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3185 
3186  if (isa<UndefValue>(Src1))
3187  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3188  break;
3189  }
3190 
3191  uint32_t Mask = CMask->getZExtValue();
3192 
3193  // If all tests are made, it doesn't matter what the value is.
3194  if ((Mask & FullMask) == FullMask)
3195  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3196 
3197  if ((Mask & FullMask) == 0)
3198  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3199 
3200  if (Mask == (S_NAN | Q_NAN)) {
3201  // Equivalent of isnan. Replace with standard fcmp.
3202  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3203  FCmp->takeName(II);
3204  return replaceInstUsesWith(*II, FCmp);
3205  }
3206 
3207  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3208  if (!CVal) {
3209  if (isa<UndefValue>(Src0))
3210  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3211 
3212  // Clamp mask to used bits
3213  if ((Mask & FullMask) != Mask) {
3214  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3215  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3216  );
3217 
3218  NewCall->takeName(II);
3219  return replaceInstUsesWith(*II, NewCall);
3220  }
3221 
3222  break;
3223  }
3224 
3225  const APFloat &Val = CVal->getValueAPF();
3226 
3227  bool Result =
3228  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3229  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3230  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3231  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3232  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3233  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3234  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3235  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3236  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3237  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3238 
3239  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3240  }
3241  case Intrinsic::amdgcn_cvt_pkrtz: {
3242  Value *Src0 = II->getArgOperand(0);
3243  Value *Src1 = II->getArgOperand(1);
3244  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3245  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3246  const fltSemantics &HalfSem
3247  = II->getType()->getScalarType()->getFltSemantics();
3248  bool LosesInfo;
3249  APFloat Val0 = C0->getValueAPF();
3250  APFloat Val1 = C1->getValueAPF();
3251  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3252  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3253 
3254  Constant *Folded = ConstantVector::get({
3255  ConstantFP::get(II->getContext(), Val0),
3256  ConstantFP::get(II->getContext(), Val1) });
3257  return replaceInstUsesWith(*II, Folded);
3258  }
3259  }
3260 
3261  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3262  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3263 
3264  break;
3265  }
3266  case Intrinsic::amdgcn_ubfe:
3267  case Intrinsic::amdgcn_sbfe: {
3268  // Decompose simple cases into standard shifts.
3269  Value *Src = II->getArgOperand(0);
3270  if (isa<UndefValue>(Src))
3271  return replaceInstUsesWith(*II, Src);
3272 
3273  unsigned Width;
3274  Type *Ty = II->getType();
3275  unsigned IntSize = Ty->getIntegerBitWidth();
3276 
3277  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3278  if (CWidth) {
3279  Width = CWidth->getZExtValue();
3280  if ((Width & (IntSize - 1)) == 0)
3281  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3282 
3283  if (Width >= IntSize) {
3284  // Hardware ignores high bits, so remove those.
3285  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3286  Width & (IntSize - 1)));
3287  return II;
3288  }
3289  }
3290 
3291  unsigned Offset;
3292  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3293  if (COffset) {
3294  Offset = COffset->getZExtValue();
3295  if (Offset >= IntSize) {
3296  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3297  Offset & (IntSize - 1)));
3298  return II;
3299  }
3300  }
3301 
3302  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3303 
3304  // TODO: Also emit sub if only width is constant.
3305  if (!CWidth && COffset && Offset == 0) {
3306  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3307  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3308  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3309 
3310  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3311  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3312  : Builder.CreateLShr(Shl, ShiftVal);
3313  RightShift->takeName(II);
3314  return replaceInstUsesWith(*II, RightShift);
3315  }
3316 
3317  if (!CWidth || !COffset)
3318  break;
3319 
3320  // TODO: This allows folding to undef when the hardware has specific
3321  // behavior?
3322  if (Offset + Width < IntSize) {
3323  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3324  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3325  : Builder.CreateLShr(Shl, IntSize - Width);
3326  RightShift->takeName(II);
3327  return replaceInstUsesWith(*II, RightShift);
3328  }
3329 
3330  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3331  : Builder.CreateLShr(Src, Offset);
3332 
3333  RightShift->takeName(II);
3334  return replaceInstUsesWith(*II, RightShift);
3335  }
3336  case Intrinsic::amdgcn_exp:
3337  case Intrinsic::amdgcn_exp_compr: {
3338  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3339  if (!En) // Illegal.
3340  break;
3341 
3342  unsigned EnBits = En->getZExtValue();
3343  if (EnBits == 0xf)
3344  break; // All inputs enabled.
3345 
3346  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3347  bool Changed = false;
3348  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3349  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3350  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3351  Value *Src = II->getArgOperand(I + 2);
3352  if (!isa<UndefValue>(Src)) {
3353  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3354  Changed = true;
3355  }
3356  }
3357  }
3358 
3359  if (Changed)
3360  return II;
3361 
3362  break;
3363  }
3364  case Intrinsic::amdgcn_fmed3: {
3365  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3366  // for the shader.
3367 
3368  Value *Src0 = II->getArgOperand(0);
3369  Value *Src1 = II->getArgOperand(1);
3370  Value *Src2 = II->getArgOperand(2);
3371 
3372  bool Swap = false;
3373  // Canonicalize constants to RHS operands.
3374  //
3375  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3376  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3377  std::swap(Src0, Src1);
3378  Swap = true;
3379  }
3380 
3381  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3382  std::swap(Src1, Src2);
3383  Swap = true;
3384  }
3385 
3386  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3387  std::swap(Src0, Src1);
3388  Swap = true;
3389  }
3390 
3391  if (Swap) {
3392  II->setArgOperand(0, Src0);
3393  II->setArgOperand(1, Src1);
3394  II->setArgOperand(2, Src2);
3395  return II;
3396  }
3397 
3398  if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3399  CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
3400  NewCall->copyFastMathFlags(II);
3401  NewCall->takeName(II);
3402  return replaceInstUsesWith(*II, NewCall);
3403  }
3404 
3405  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3406  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3407  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3408  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3409  C2->getValueAPF());
3410  return replaceInstUsesWith(*II,
3411  ConstantFP::get(Builder.getContext(), Result));
3412  }
3413  }
3414  }
3415 
3416  break;
3417  }
3418  case Intrinsic::amdgcn_icmp:
3419  case Intrinsic::amdgcn_fcmp: {
3420  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3421  if (!CC)
3422  break;
3423 
3424  // Guard against invalid arguments.
3425  int64_t CCVal = CC->getZExtValue();
3426  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3427  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3428  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3429  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3430  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3431  break;
3432 
3433  Value *Src0 = II->getArgOperand(0);
3434  Value *Src1 = II->getArgOperand(1);
3435 
3436  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3437  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3438  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3439  if (CCmp->isNullValue()) {
3440  return replaceInstUsesWith(
3441  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3442  }
3443 
3444  // The result of V_ICMP/V_FCMP assembly instructions (which this
3445  // intrinsic exposes) is one bit per thread, masked with the EXEC
3446  // register (which contains the bitmask of live threads). So a
3447  // comparison that always returns true is the same as a read of the
3448  // EXEC register.
3450  II->getModule(), Intrinsic::read_register, II->getType());
3451  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3452  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3453  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3454  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3457  NewCall->takeName(II);
3458  return replaceInstUsesWith(*II, NewCall);
3459  }
3460 
3461  // Canonicalize constants to RHS.
3462  CmpInst::Predicate SwapPred
3463  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3464  II->setArgOperand(0, Src1);
3465  II->setArgOperand(1, Src0);
3466  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3467  static_cast<int>(SwapPred)));
3468  return II;
3469  }
3470 
3471  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3472  break;
3473 
3474  // Canonicalize compare eq with true value to compare != 0
3475  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3476  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3477  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3478  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3479  Value *ExtSrc;
3480  if (CCVal == CmpInst::ICMP_EQ &&
3481  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3482  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3483  ExtSrc->getType()->isIntegerTy(1)) {
3484  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3485  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3486  return II;
3487  }
3488 
3489  CmpInst::Predicate SrcPred;
3490  Value *SrcLHS;
3491  Value *SrcRHS;
3492 
3493  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3494  // intrinsic. The typical use is a wave vote function in the library, which
3495  // will be fed from a user code condition compared with 0. Fold in the
3496  // redundant compare.
3497 
3498  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3499  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3500  //
3501  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3502  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3503  if (match(Src1, m_Zero()) &&
3504  match(Src0,
3505  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3506  if (CCVal == CmpInst::ICMP_EQ)
3507  SrcPred = CmpInst::getInversePredicate(SrcPred);
3508 
3509  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3510  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3511 
3512  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3513  SrcLHS->getType());
3514  Value *Args[] = { SrcLHS, SrcRHS,
3515  ConstantInt::get(CC->getType(), SrcPred) };
3516  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3517  NewCall->takeName(II);
3518  return replaceInstUsesWith(*II, NewCall);
3519  }
3520 
3521  break;
3522  }
3523  case Intrinsic::amdgcn_wqm_vote: {
3524  // wqm_vote is identity when the argument is constant.
3525  if (!isa<Constant>(II->getArgOperand(0)))
3526  break;
3527 
3528  return replaceInstUsesWith(*II, II->getArgOperand(0));
3529  }
3530  case Intrinsic::amdgcn_kill: {
3531  const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
3532  if (!C || !C->getZExtValue())
3533  break;
3534 
3535  // amdgcn.kill(i1 1) is a no-op
3536  return eraseInstFromFunction(CI);
3537  }
3538  case Intrinsic::stackrestore: {
3539  // If the save is right next to the restore, remove the restore. This can
3540  // happen when variable allocas are DCE'd.
3541  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3542  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3543  if (&*++SS->getIterator() == II)
3544  return eraseInstFromFunction(CI);
3545  }
3546  }
3547 
3548  // Scan down this block to see if there is another stack restore in the
3549  // same block without an intervening call/alloca.
3550  BasicBlock::iterator BI(II);
3551  TerminatorInst *TI = II->getParent()->getTerminator();
3552  bool CannotRemove = false;
3553  for (++BI; &*BI != TI; ++BI) {
3554  if (isa<AllocaInst>(BI)) {
3555  CannotRemove = true;
3556  break;
3557  }
3558  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3559  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3560  // If there is a stackrestore below this one, remove this one.
3561  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3562  return eraseInstFromFunction(CI);
3563 
3564  // Bail if we cross over an intrinsic with side effects, such as
3565  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3566  if (II->mayHaveSideEffects()) {
3567  CannotRemove = true;
3568  break;
3569  }
3570  } else {
3571  // If we found a non-intrinsic call, we can't remove the stack
3572  // restore.
3573  CannotRemove = true;
3574  break;
3575  }
3576  }
3577  }
3578 
3579  // If the stack restore is in a return, resume, or unwind block and if there
3580  // are no allocas or calls between the restore and the return, nuke the
3581  // restore.
3582  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3583  return eraseInstFromFunction(CI);
3584  break;
3585  }
3586  case Intrinsic::lifetime_start:
3587  // Asan needs to poison memory to detect invalid access which is possible
3588  // even for empty lifetime range.
3589  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3590  II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3591  break;
3592 
3593  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3594  Intrinsic::lifetime_end, *this))
3595  return nullptr;
3596  break;
3597  case Intrinsic::assume: {
3598  Value *IIOperand = II->getArgOperand(0);
3599  // Remove an assume if it is immediately followed by an identical assume.
3600  if (match(II->getNextNode(),
3601  m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3602  return eraseInstFromFunction(CI);
3603 
3604  // Canonicalize assume(a && b) -> assume(a); assume(b);
3605  // Note: New assumption intrinsics created here are registered by
3606  // the InstCombineIRInserter object.
3607  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3608  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3609  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3610  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3611  return eraseInstFromFunction(*II);
3612  }
3613  // assume(!(a || b)) -> assume(!a); assume(!b);
3614  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3615  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3616  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3617  return eraseInstFromFunction(*II);
3618  }
3619 
3620  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3621  // (if assume is valid at the load)
3622  CmpInst::Predicate Pred;
3623  Instruction *LHS;
3624  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3625  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3626  LHS->getType()->isPointerTy() &&
3627  isValidAssumeForContext(II, LHS, &DT)) {
3628  MDNode *MD = MDNode::get(II->getContext(), None);
3630  return eraseInstFromFunction(*II);
3631 
3632  // TODO: apply nonnull return attributes to calls and invokes
3633  // TODO: apply range metadata for range check patterns?
3634  }
3635 
3636  // If there is a dominating assume with the same condition as this one,
3637  // then this one is redundant, and should be removed.
3638  KnownBits Known(1);
3639  computeKnownBits(IIOperand, Known, 0, II);
3640  if (Known.isAllOnes())
3641  return eraseInstFromFunction(*II);
3642 
3643  // Update the cache of affected values for this assumption (we might be
3644  // here because we just simplified the condition).
3645  AC.updateAffectedValues(II);
3646  break;
3647  }
3648  case Intrinsic::experimental_gc_relocate: {
3649  // Translate facts known about a pointer before relocating into
3650  // facts about the relocate value, while being careful to
3651  // preserve relocation semantics.
3652  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3653 
3654  // Remove the relocation if unused, note that this check is required
3655  // to prevent the cases below from looping forever.
3656  if (II->use_empty())
3657  return eraseInstFromFunction(*II);
3658 
3659  // Undef is undef, even after relocation.
3660  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3661  // most practical collectors, but there was discussion in the review thread
3662  // about whether it was legal for all possible collectors.
3663  if (isa<UndefValue>(DerivedPtr))
3664  // Use undef of gc_relocate's type to replace it.
3665  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3666 
3667  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3668  // The relocation of null will be null for most any collector.
3669  // TODO: provide a hook for this in GCStrategy. There might be some
3670  // weird collector this property does not hold for.
3671  if (isa<ConstantPointerNull>(DerivedPtr))
3672  // Use null-pointer of gc_relocate's type to replace it.
3673  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3674 
3675  // isKnownNonNull -> nonnull attribute
3676  if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
3677  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3678  }
3679 
3680  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3681  // Canonicalize on the type from the uses to the defs
3682 
3683  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3684  break;
3685  }
3686 
3687  case Intrinsic::experimental_guard: {
3688  // Is this guard followed by another guard?
3689  Instruction *NextInst = II->getNextNode();
3690  Value *NextCond = nullptr;
3691  if (match(NextInst,
3692  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3693  Value *CurrCond = II->getArgOperand(0);
3694 
3695  // Remove a guard that it is immediately preceded by an identical guard.
3696  if (CurrCond == NextCond)
3697  return eraseInstFromFunction(*NextInst);
3698 
3699  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3700  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3701  return eraseInstFromFunction(*NextInst);
3702  }
3703  break;
3704  }
3705  }
3706  return visitCallSite(II);
3707 }
3708 
3709 // Fence instruction simplification
3711  // Remove identical consecutive fences.
3712  if (auto *NFI = dyn_cast<FenceInst>(FI.getNextNode()))
3713  if (FI.isIdenticalTo(NFI))
3714  return eraseInstFromFunction(FI);
3715  return nullptr;
3716 }
3717 
3718 // InvokeInst simplification
3720  return visitCallSite(&II);
3721 }
3722 
3723 /// If this cast does not affect the value passed through the varargs area, we
3724 /// can eliminate the use of the cast.
3726  const DataLayout &DL,
3727  const CastInst *const CI,
3728  const int ix) {
3729  if (!CI->isLosslessCast())
3730  return false;
3731 
3732  // If this is a GC intrinsic, avoid munging types. We need types for
3733  // statepoint reconstruction in SelectionDAG.
3734  // TODO: This is probably something which should be expanded to all
3735  // intrinsics since the entire point of intrinsics is that
3736  // they are understandable by the optimizer.
3737  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3738  return false;
3739 
3740  // The size of ByVal or InAlloca arguments is derived from the type, so we
3741  // can't change to a type with a different size. If the size were
3742  // passed explicitly we could avoid this check.
3743  if (!CS.isByValOrInAllocaArgument(ix))
3744  return true;
3745 
3746  Type* SrcTy =
3747  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3748  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3749  if (!SrcTy->isSized() || !DstTy->isSized())
3750  return false;
3751  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3752  return false;
3753  return true;
3754 }
3755 
3756 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3757  if (!CI->getCalledFunction()) return nullptr;
3758 
3759  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3760  replaceInstUsesWith(*From, With);
3761  };
3762  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
3763  if (Value *With = Simplifier.optimizeCall(CI)) {
3764  ++NumSimplified;
3765  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3766  }
3767 
3768  return nullptr;
3769 }
3770 
3772  // Strip off at most one level of pointer casts, looking for an alloca. This
3773  // is good enough in practice and simpler than handling any number of casts.
3774  Value *Underlying = TrampMem->stripPointerCasts();
3775  if (Underlying != TrampMem &&
3776  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3777  return nullptr;
3778  if (!isa<AllocaInst>(Underlying))
3779  return nullptr;
3780 
3781  IntrinsicInst *InitTrampoline = nullptr;
3782  for (User *U : TrampMem->users()) {
3784  if (!II)
3785  return nullptr;
3786  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3787  if (InitTrampoline)
3788  // More than one init_trampoline writes to this value. Give up.
3789  return nullptr;
3790  InitTrampoline = II;
3791  continue;
3792  }
3793  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3794  // Allow any number of calls to adjust.trampoline.
3795  continue;
3796  return nullptr;
3797  }
3798 
3799  // No call to init.trampoline found.
3800  if (!InitTrampoline)
3801  return nullptr;
3802 
3803  // Check that the alloca is being used in the expected way.
3804  if (InitTrampoline->getOperand(0) != TrampMem)
3805  return nullptr;
3806 
3807  return InitTrampoline;
3808 }
3809 
3811  Value *TrampMem) {
3812  // Visit all the previous instructions in the basic block, and try to find a
3813  // init.trampoline which has a direct path to the adjust.trampoline.
3814  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3815  E = AdjustTramp->getParent()->begin();
3816  I != E;) {
3817  Instruction *Inst = &*--I;
3818  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
3819  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3820  II->getOperand(0) == TrampMem)
3821  return II;
3822  if (Inst->mayWriteToMemory())
3823  return nullptr;
3824  }
3825  return nullptr;
3826 }
3827 
3828 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3829 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3830 // to a direct call to a function. Otherwise return NULL.
3832  Callee = Callee->stripPointerCasts();
3833  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
3834  if (!AdjustTramp ||
3835  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3836  return nullptr;
3837 
3838  Value *TrampMem = AdjustTramp->getOperand(0);
3839 
3841  return IT;
3842  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3843  return IT;
3844  return nullptr;
3845 }
3846 
3847 /// Improvements for call and invoke instructions.
3848 Instruction *InstCombiner::visitCallSite(CallSite CS) {
3849  if (isAllocLikeFn(CS.getInstruction(), &TLI))
3850  return visitAllocSite(*CS.getInstruction());
3851 
3852  bool Changed = false;
3853 
3854  // Mark any parameters that are known to be non-null with the nonnull
3855  // attribute. This is helpful for inlining calls to functions with null
3856  // checks on their arguments.
3857  SmallVector<unsigned, 4> ArgNos;
3858  unsigned ArgNo = 0;
3859 
3860  for (Value *V : CS.args()) {
3861  if (V->getType()->isPointerTy() &&
3862  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
3863  isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
3864  ArgNos.push_back(ArgNo);
3865  ArgNo++;
3866  }
3867 
3868  assert(ArgNo == CS.arg_size() && "sanity check");
3869 
3870  if (!ArgNos.empty()) {
3872  LLVMContext &Ctx = CS.getInstruction()->getContext();
3873  AS = AS.addParamAttribute(Ctx, ArgNos,
3874  Attribute::get(Ctx, Attribute::NonNull));
3875  CS.setAttributes(AS);
3876  Changed = true;
3877  }
3878 
3879  // If the callee is a pointer to a function, attempt to move any casts to the
3880  // arguments of the call/invoke.
3881  Value *Callee = CS.getCalledValue();
3882  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
3883  return nullptr;
3884 
3885  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
3886  // Remove the convergent attr on calls when the callee is not convergent.
3887  if (CS.isConvergent() && !CalleeF->isConvergent() &&
3888  !CalleeF->isIntrinsic()) {
3889  DEBUG(dbgs() << "Removing convergent attr from instr "
3890  << CS.getInstruction() << "\n");
3891  CS.setNotConvergent();
3892  return CS.getInstruction();
3893  }
3894 
3895  // If the call and callee calling conventions don't match, this call must
3896  // be unreachable, as the call is undefined.
3897  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
3898  // Only do this for calls to a function with a body. A prototype may
3899  // not actually end up matching the implementation's calling conv for a
3900  // variety of reasons (e.g. it may be written in assembly).
3901  !CalleeF->isDeclaration()) {
3902  Instruction *OldCall = CS.getInstruction();
3903  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3905  OldCall);
3906  // If OldCall does not return void then replaceAllUsesWith undef.
3907  // This allows ValueHandlers and custom metadata to adjust itself.
3908  if (!OldCall->getType()->isVoidTy())
3909  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
3910  if (isa<CallInst>(OldCall))
3911  return eraseInstFromFunction(*OldCall);
3912 
3913  // We cannot remove an invoke, because it would change the CFG, just
3914  // change the callee to a null pointer.
3915  cast<InvokeInst>(OldCall)->setCalledFunction(
3916  Constant::getNullValue(CalleeF->getType()));
3917  return nullptr;
3918  }
3919  }
3920 
3921  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
3922  // If CS does not return void then replaceAllUsesWith undef.
3923  // This allows ValueHandlers and custom metadata to adjust itself.
3924  if (!CS.getInstruction()->getType()->isVoidTy())
3925  replaceInstUsesWith(*CS.getInstruction(),
3927 
3928  if (isa<InvokeInst>(CS.getInstruction())) {
3929  // Can't remove an invoke because we cannot change the CFG.
3930  return nullptr;
3931  }
3932 
3933  // This instruction is not reachable, just remove it. We insert a store to
3934  // undef so that we know that this code is not reachable, despite the fact
3935  // that we can't modify the CFG here.
3936  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3938  CS.getInstruction());
3939 
3940  return eraseInstFromFunction(*CS.getInstruction());
3941  }
3942 
3943  if (IntrinsicInst *II = findInitTrampoline(Callee))
3944  return transformCallThroughTrampoline(CS, II);
3945 
3946  PointerType *PTy = cast<PointerType>(Callee->getType());
3947  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
3948  if (FTy->isVarArg()) {
3949  int ix = FTy->getNumParams();
3950  // See if we can optimize any arguments passed through the varargs area of
3951  // the call.
3952  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
3953  E = CS.arg_end(); I != E; ++I, ++ix) {
3954  CastInst *CI = dyn_cast<CastInst>(*I);
3955  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
3956  *I = CI->getOperand(0);
3957  Changed = true;
3958  }
3959  }
3960  }
3961 
3962  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
3963  // Inline asm calls cannot throw - mark them 'nounwind'.
3964  CS.setDoesNotThrow();
3965  Changed = true;
3966  }
3967 
3968  // Try to optimize the call if possible, we require DataLayout for most of
3969  // this. None of these calls are seen as possibly dead so go ahead and
3970  // delete the instruction now.
3971  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
3972  Instruction *I = tryOptimizeCall(CI);
3973  // If we changed something return the result, etc. Otherwise let
3974  // the fallthrough check.
3975  if (I) return eraseInstFromFunction(*I);
3976  }
3977 
3978  return Changed ? CS.getInstruction() : nullptr;
3979 }
3980 
3981 /// If the callee is a constexpr cast of a function, attempt to move the cast to
3982 /// the arguments of the call/invoke.
3983 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
3985  if (!Callee)
3986  return false;
3987 
3988  // The prototype of a thunk is a lie. Don't directly call such a function.
3989  if (Callee->hasFnAttribute("thunk"))
3990  return false;
3991 
3992  Instruction *Caller = CS.getInstruction();
3993  const AttributeList &CallerPAL = CS.getAttributes();
3994 
3995  // Okay, this is a cast from a function to a different type. Unless doing so
3996  // would cause a type conversion of one of our arguments, change this call to
3997  // be a direct call with arguments casted to the appropriate types.
3998  FunctionType *FT = Callee->getFunctionType();
3999  Type *OldRetTy = Caller->getType();
4000  Type *NewRetTy = FT->getReturnType();
4001 
4002  // Check to see if we are changing the return type...
4003  if (OldRetTy != NewRetTy) {
4004 
4005  if (NewRetTy->isStructTy())
4006  return false; // TODO: Handle multiple return values.
4007 
4008  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4009  if (Callee->isDeclaration())
4010  return false; // Cannot transform this return value.
4011 
4012  if (!Caller->use_empty() &&
4013  // void -> non-void is handled specially
4014  !NewRetTy->isVoidTy())
4015  return false; // Cannot transform this return value.
4016  }
4017 
4018  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4019  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4020  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4021  return false; // Attribute not compatible with transformed value.
4022  }
4023 
4024  // If the callsite is an invoke instruction, and the return value is used by
4025  // a PHI node in a successor, we cannot change the return type of the call
4026  // because there is no place to put the cast instruction (without breaking
4027  // the critical edge). Bail out in this case.
4028  if (!Caller->use_empty())
4029  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4030  for (User *U : II->users())
4031  if (PHINode *PN = dyn_cast<PHINode>(U))
4032  if (PN->getParent() == II->getNormalDest() ||
4033  PN->getParent() == II->getUnwindDest())
4034  return false;
4035  }
4036 
4037  unsigned NumActualArgs = CS.arg_size();
4038  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4039 
4040  // Prevent us turning:
4041  // declare void @takes_i32_inalloca(i32* inalloca)
4042  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4043  //
4044  // into:
4045  // call void @takes_i32_inalloca(i32* null)
4046  //
4047  // Similarly, avoid folding away bitcasts of byval calls.
4048  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4049  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4050  return false;
4051 
4053  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4054  Type *ParamTy = FT->getParamType(i);
4055  Type *ActTy = (*AI)->getType();
4056 
4057  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4058  return false; // Cannot transform this parameter value.
4059 
4060  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4061  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4062  return false; // Attribute not compatible with transformed value.
4063 
4064  if (CS.isInAllocaArgument(i))
4065  return false; // Cannot transform to and from inalloca.
4066 
4067  // If the parameter is passed as a byval argument, then we have to have a
4068  // sized type and the sized type has to have the same size as the old type.
4069  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4070  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4071  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4072  return false;
4073 
4074  Type *CurElTy = ActTy->getPointerElementType();
4075  if (DL.getTypeAllocSize(CurElTy) !=
4076  DL.getTypeAllocSize(ParamPTy->getElementType()))
4077  return false;
4078  }
4079  }
4080 
4081  if (Callee->isDeclaration()) {
4082  // Do not delete arguments unless we have a function body.
4083  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4084  return false;
4085 
4086  // If the callee is just a declaration, don't change the varargsness of the
4087  // call. We don't want to introduce a varargs call where one doesn't
4088  // already exist.
4089  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4090  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4091  return false;
4092 
4093  // If both the callee and the cast type are varargs, we still have to make
4094  // sure the number of fixed parameters are the same or we have the same
4095  // ABI issues as if we introduce a varargs call.
4096  if (FT->isVarArg() &&
4097  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4098  FT->getNumParams() !=
4099  cast<FunctionType>(APTy->getElementType())->getNumParams())
4100  return false;
4101  }
4102 
4103  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4104  !CallerPAL.isEmpty()) {
4105  // In this case we have more arguments than the new function type, but we
4106  // won't be dropping them. Check that these extra arguments have attributes
4107  // that are compatible with being a vararg call argument.
4108  unsigned SRetIdx;
4109  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4110  SRetIdx > FT->getNumParams())
4111  return false;
4112  }
4113 
4114  // Okay, we decided that this is a safe thing to do: go ahead and start
4115  // inserting cast instructions as necessary.
4118  Args.reserve(NumActualArgs);
4119  ArgAttrs.reserve(NumActualArgs);
4120 
4121  // Get any return attributes.
4122  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4123 
4124  // If the return value is not being used, the type may not be compatible
4125  // with the existing attributes. Wipe out any problematic attributes.
4126  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4127 
4128  AI = CS.arg_begin();
4129  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4130  Type *ParamTy = FT->getParamType(i);
4131 
4132  Value *NewArg = *AI;
4133  if ((*AI)->getType() != ParamTy)
4134  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4135  Args.push_back(NewArg);
4136 
4137  // Add any parameter attributes.
4138  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4139  }
4140 
4141  // If the function takes more arguments than the call was taking, add them
4142  // now.
4143  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4145  ArgAttrs.push_back(AttributeSet());
4146  }
4147 
4148  // If we are removing arguments to the function, emit an obnoxious warning.
4149  if (FT->getNumParams() < NumActualArgs) {
4150  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4151  if (FT->isVarArg()) {
4152  // Add all of the arguments in their promoted form to the arg list.
4153  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4154  Type *PTy = getPromotedType((*AI)->getType());
4155  Value *NewArg = *AI;
4156  if (PTy != (*AI)->getType()) {
4157  // Must promote to pass through va_arg area!
4158  Instruction::CastOps opcode =
4159  CastInst::getCastOpcode(*AI, false, PTy, false);
4160  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4161  }
4162  Args.push_back(NewArg);
4163 
4164  // Add any parameter attributes.
4165  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4166  }
4167  }
4168  }
4169 
4170  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4171 
4172  if (NewRetTy->isVoidTy())
4173  Caller->setName(""); // Void type should not have a name.
4174 
4175  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4176  "missing argument attributes");
4177  LLVMContext &Ctx = Callee->getContext();
4178  AttributeList NewCallerPAL = AttributeList::get(
4179  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4180 
4182  CS.getOperandBundlesAsDefs(OpBundles);
4183 
4184  CallSite NewCS;
4185  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4186  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4187  II->getUnwindDest(), Args, OpBundles);
4188  } else {
4189  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4190  cast<CallInst>(NewCS.getInstruction())
4191  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4192  }
4193  NewCS->takeName(Caller);
4194  NewCS.setCallingConv(CS.getCallingConv());
4195  NewCS.setAttributes(NewCallerPAL);
4196 
4197  // Preserve the weight metadata for the new call instruction. The metadata
4198  // is used by SamplePGO to check callsite's hotness.
4199  uint64_t W;
4200  if (Caller->extractProfTotalWeight(W))
4201  NewCS->setProfWeight(W);
4202 
4203  // Insert a cast of the return type as necessary.
4204  Instruction *NC = NewCS.getInstruction();
4205  Value *NV = NC;
4206  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4207  if (!NV->getType()->isVoidTy()) {
4208  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4209  NC->setDebugLoc(Caller->getDebugLoc());
4210 
4211  // If this is an invoke instruction, we should insert it after the first
4212  // non-phi, instruction in the normal successor block.
4213  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4214  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4215  InsertNewInstBefore(NC, *I);
4216  } else {
4217  // Otherwise, it's a call, just insert cast right after the call.
4218  InsertNewInstBefore(NC, *Caller);
4219  }
4220  Worklist.AddUsersToWorkList(*Caller);
4221  } else {
4222  NV = UndefValue::get(Caller->getType());
4223  }
4224  }
4225 
4226  if (!Caller->use_empty())
4227  replaceInstUsesWith(*Caller, NV);
4228  else if (Caller->hasValueHandle()) {
4229  if (OldRetTy == NV->getType())
4230  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4231  else
4232  // We cannot call ValueIsRAUWd with a different type, and the
4233  // actual tracked value will disappear.
4235  }
4236 
4237  eraseInstFromFunction(*Caller);
4238  return true;
4239 }
4240 
4241 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4242 /// intrinsic pair into a direct call to the underlying function.
4243 Instruction *
4244 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4245  IntrinsicInst *Tramp) {
4246  Value *Callee = CS.getCalledValue();
4247  PointerType *PTy = cast<PointerType>(Callee->getType());
4248  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4250 
4251  // If the call already has the 'nest' attribute somewhere then give up -
4252  // otherwise 'nest' would occur twice after splicing in the chain.
4253  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4254  return nullptr;
4255 
4256  assert(Tramp &&
4257  "transformCallThroughTrampoline called with incorrect CallSite.");
4258 
4259  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4260  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4261 
4262  AttributeList NestAttrs = NestF->getAttributes();
4263  if (!NestAttrs.isEmpty()) {
4264  unsigned NestArgNo = 0;
4265  Type *NestTy = nullptr;
4266  AttributeSet NestAttr;
4267 
4268  // Look for a parameter marked with the 'nest' attribute.
4269  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4270  E = NestFTy->param_end();
4271  I != E; ++NestArgNo, ++I) {
4272  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4273  if (AS.hasAttribute(Attribute::Nest)) {
4274  // Record the parameter type and any other attributes.
4275  NestTy = *I;
4276  NestAttr = AS;
4277  break;
4278  }
4279  }
4280 
4281  if (NestTy) {
4282  Instruction *Caller = CS.getInstruction();
4283  std::vector<Value*> NewArgs;
4284  std::vector<AttributeSet> NewArgAttrs;
4285  NewArgs.reserve(CS.arg_size() + 1);
4286  NewArgAttrs.reserve(CS.arg_size());
4287 
4288  // Insert the nest argument into the call argument list, which may
4289  // mean appending it. Likewise for attributes.
4290 
4291  {
4292  unsigned ArgNo = 0;
4293  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4294  do {
4295  if (ArgNo == NestArgNo) {
4296  // Add the chain argument and attributes.
4297  Value *NestVal = Tramp->getArgOperand(2);
4298  if (NestVal->getType() != NestTy)
4299  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4300  NewArgs.push_back(NestVal);
4301  NewArgAttrs.push_back(NestAttr);
4302  }
4303 
4304  if (I == E)
4305  break;
4306 
4307  // Add the original argument and attributes.
4308  NewArgs.push_back(*I);
4309  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4310 
4311  ++ArgNo;
4312  ++I;
4313  } while (true);
4314  }
4315 
4316  // The trampoline may have been bitcast to a bogus type (FTy).
4317  // Handle this by synthesizing a new function type, equal to FTy
4318  // with the chain parameter inserted.
4319 
4320  std::vector<Type*> NewTypes;
4321  NewTypes.reserve(FTy->getNumParams()+1);
4322 
4323  // Insert the chain's type into the list of parameter types, which may
4324  // mean appending it.
4325  {
4326  unsigned ArgNo = 0;
4327  FunctionType::param_iterator I = FTy->param_begin(),
4328  E = FTy->param_end();
4329 
4330  do {
4331  if (ArgNo == NestArgNo)
4332  // Add the chain's type.
4333  NewTypes.push_back(NestTy);
4334 
4335  if (I == E)
4336  break;
4337 
4338  // Add the original type.
4339  NewTypes.push_back(*I);
4340 
4341  ++ArgNo;
4342  ++I;
4343  } while (true);
4344  }
4345 
4346  // Replace the trampoline call with a direct call. Let the generic
4347  // code sort out any function type mismatches.
4348  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4349  FTy->isVarArg());
4350  Constant *NewCallee =
4351  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4352  NestF : ConstantExpr::getBitCast(NestF,
4353  PointerType::getUnqual(NewFTy));
4354  AttributeList NewPAL =
4355  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4356  Attrs.getRetAttributes(), NewArgAttrs);
4357 
4359  CS.getOperandBundlesAsDefs(OpBundles);
4360 
4361  Instruction *NewCaller;
4362  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4363  NewCaller = InvokeInst::Create(NewCallee,
4364  II->getNormalDest(), II->getUnwindDest(),
4365  NewArgs, OpBundles);
4366  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4367  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4368  } else {
4369  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4370  cast<CallInst>(NewCaller)->setTailCallKind(
4371  cast<CallInst>(Caller)->getTailCallKind());
4372  cast<CallInst>(NewCaller)->setCallingConv(
4373  cast<CallInst>(Caller)->getCallingConv());
4374  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4375  }
4376  NewCaller->setDebugLoc(Caller->getDebugLoc());
4377 
4378  return NewCaller;
4379  }
4380  }
4381 
4382  // Replace the trampoline call with a direct call. Since there is no 'nest'
4383  // parameter, there is no need to adjust the argument list. Let the generic
4384  // code sort out any function type mismatches.
4385  Constant *NewCallee =
4386  NestF->getType() == PTy ? NestF :
4387  ConstantExpr::getBitCast(NestF, PTy);
4388  CS.setCalledFunction(NewCallee);
4389  return CS.getInstruction();
4390 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:944
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:735
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:574
uint64_t CallInst * C
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
void setDoesNotThrow()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:865
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1638
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1143
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1143
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:555
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
Atomic ordering constants.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2644
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:186
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:603
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
An instruction for ordering other memory operations.
Definition: Instructions.h:440
match_zero m_Zero()
Match an arbitrary zero/null constant.
Definition: PatternMatch.h:145
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:446
Instruction * visitVACopyInst(VACopyInst &I)
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1236
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:641
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:239
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:215
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:491
unsigned less or equal
Definition: InstrTypes.h:879
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:878
This class represents the atomic memcpy intrinsic i.e.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class wraps the llvm.memset intrinsic.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:813
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:818
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1390
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:862
F(f)
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:520
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:883
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1831
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:166
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
fneg_match< LHS > m_FNeg(const LHS &L)
Match a floating point negate.
void reserve(size_type N)
Definition: SmallVector.h:378
Value * getLength() const
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:528
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:206
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:136
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1517
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Type * getPointerElementType() const
Definition: Type.h:373
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:951
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getNumArgOperands() const
Return the number of call arguments.
Value * getRawSource() const
Return the arguments to the instruction.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:560
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:981
This class wraps the llvm.memmove intrinsic.
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:398
IterTy arg_end() const
Definition: CallSite.h:575
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:912
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1555
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:966
This file implements a class to represent arbitrary precision integral constant values and operations...
not_match< LHS > m_Not(const LHS &L)
All zero aggregate value.
Definition: Constants.h:332
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:191
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1448
bool isInfinity() const
Definition: APFloat.h:1144
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1409
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
Definition: PatternMatch.h:955
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4441
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:924
void setLength(Value *L)
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:195
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:1841
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:205
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:306
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1328
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
Definition: PatternMatch.h:869
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:918
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1387
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:408
static Value * simplifyX86movmsk(const IntrinsicInst &II)
amdgpu Simplify well known AMD library false Value * Callee
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:979
This class represents a truncation of integer types.
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2270
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:575
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:276
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1677
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:333
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:505
bool isNegative() const
Definition: APFloat.h:1147
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:282
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1305
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1049
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to .objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:580
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:288
bool isNaN() const
Definition: APFloat.h:1145
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1693
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
static cl::opt< unsigned > UnfoldElementAtomicMemcpyMaxElements("unfold-element-atomic-memcpy-max-elements", cl::init(16), cl::desc("Maximum number of elements in atomic memcpy the optimizer is " "allowed to unfold"))
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:382
unsigned getParamAlignment(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool isFast() const
Determine whether all fast-math-flags are set.
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
static const unsigned End
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:931
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:443
void setNotConvergent()
Definition: CallSite.h:527
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1319
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:567
uint32_t getElementSizeInBytes() const
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:937
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:959
static Value * simplifyX86muldq(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
signed greater than
Definition: InstrTypes.h:880
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:243
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
bool doesNotThrow() const
Determine if the call cannot unwind.
const APFloat & getValueAPF() const
Definition: Constants.h:294
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:918
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:466
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:176
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:254
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:571
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:508
signed less than
Definition: InstrTypes.h:882
Type * getReturnType() const
Definition: DerivedTypes.h:124
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:383
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1205
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1740
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:559
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:573
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:622
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:362
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1272
Value * SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q)
Given a callsite, fold the result or return null.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:515
bool isDenormal() const
Definition: APFloat.h:1148
void setOperand(unsigned i, Value *Val)
Definition: User.h:159
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:883
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:405
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1012
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:333
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:403
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:407
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:364
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
void setCalledFunction(Value *Fn)
Set the function called.
This class wraps the llvm.memcpy/memmove intrinsics.
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:877
match_one m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:194
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:582
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:505
void setArgOperand(unsigned i, Value *v)
bool isNormal() const
Definition: APFloat.h:1151
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:246
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:268
static IntrinsicInst * findInitTrampoline(Value *Callee)
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:608
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:81
AssumptionCache & getAssumptionCache() const
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:449
static PointerType * getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS=0)
Definition: Type.cpp:212
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
iterator_range< op_iterator > arg_operands()
Iteration adapter for range-for loops.
static Value * emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, InstCombiner::BuilderTy &Builder)
This represents the llvm.va_copy intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:538
match_all_ones m_AllOnes()
Match an integer or vector with all bits set to true.
Definition: PatternMatch.h:205
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1186
static Instruction * foldCtpop(IntrinsicInst &II, InstCombiner &IC)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
AttributeSet getFnAttributes() const
The function attributes are returned.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:280
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1031
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:146
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if the given value is known to be non-zero when defined.
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:418
unsigned greater than
Definition: InstrTypes.h:876
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:967
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:329
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2293
bool isConvergent() const
Determine if the call is convergent.
Definition: CallSite.h:521
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
match_nan m_NaN()
Match an arbitrary NaN constant. This includes quiet and signalling nans.
Definition: PatternMatch.h:183
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2136
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This class represents an extension of floating point types.
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:646
Root of the metadata hierarchy.
Definition: Metadata.h:58
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
void setCalledFunction(Value *V)
Set the callee to the specified value.
Definition: CallSite.h:126
bool isSignaling() const
Definition: APFloat.h:1149
Value * getRawDest() const
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
bool use_empty() const
Definition: Value.h:328
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:983
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Type * getElementType() const
Definition: DerivedTypes.h:486
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:266
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:359
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
Definition: Attributes.cpp:870
bool isLosslessCast() const
A lossless cast is one that does not alter the basic value.
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:399
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:215
signed greater or equal
Definition: InstrTypes.h:881
User * user_back()
Definition: Value.h:391
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1102
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:837
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1663