LLVM  7.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/Twine.h"
29 #include "llvm/IR/Attributes.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/CallSite.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Statepoint.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/IR/ValueHandle.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/KnownBits.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstring>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 using namespace PatternMatch;
71 
72 #define DEBUG_TYPE "instcombine"
73 
74 STATISTIC(NumSimplified, "Number of library calls simplified");
75 
77  "instcombine-guard-widening-window",
78  cl::init(3),
79  cl::desc("How wide an instruction window to bypass looking for "
80  "another guard"));
81 
82 /// Return the specified type promoted as it would be to pass though a va_arg
83 /// area.
84 static Type *getPromotedType(Type *Ty) {
85  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
86  if (ITy->getBitWidth() < 32)
87  return Type::getInt32Ty(Ty->getContext());
88  }
89  return Ty;
90 }
91 
92 /// Return a constant boolean vector that has true elements in all positions
93 /// where the input constant data vector has an element with the sign bit set.
96  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
97  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
98  Constant *Elt = V->getElementAsConstant(I);
99  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
100  "Unexpected constant data vector element type");
101  bool Sign = V->getElementType()->isIntegerTy()
102  ? cast<ConstantInt>(Elt)->isNegative()
103  : cast<ConstantFP>(Elt)->isNegative();
104  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
105  }
106  return ConstantVector::get(BoolVec);
107 }
108 
109 Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
110  unsigned DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
111  unsigned CopyDstAlign = MI->getDestAlignment();
112  if (CopyDstAlign < DstAlign){
113  MI->setDestAlignment(DstAlign);
114  return MI;
115  }
116 
117  unsigned SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
118  unsigned CopySrcAlign = MI->getSourceAlignment();
119  if (CopySrcAlign < SrcAlign) {
120  MI->setSourceAlignment(SrcAlign);
121  return MI;
122  }
123 
124  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
125  // load/store.
126  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
127  if (!MemOpLength) return nullptr;
128 
129  // Source and destination pointer types are always "i8*" for intrinsic. See
130  // if the size is something we can handle with a single primitive load/store.
131  // A single load+store correctly handles overlapping memory in the memmove
132  // case.
133  uint64_t Size = MemOpLength->getLimitedValue();
134  assert(Size && "0-sized memory transferring should be removed already.");
135 
136  if (Size > 8 || (Size&(Size-1)))
137  return nullptr; // If not 1/2/4/8 bytes, exit.
138 
139  // Use an integer load+store unless we can find something better.
140  unsigned SrcAddrSp =
141  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
142  unsigned DstAddrSp =
143  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
144 
145  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
146  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
147  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
148 
149  // If the memcpy has metadata describing the members, see if we can get the
150  // TBAA tag describing our copy.
151  MDNode *CopyMD = nullptr;
152  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
153  CopyMD = M;
154  } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
155  if (M->getNumOperands() == 3 && M->getOperand(0) &&
156  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
157  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
158  M->getOperand(1) &&
159  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
160  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
161  Size &&
162  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
163  CopyMD = cast<MDNode>(M->getOperand(2));
164  }
165 
166  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
167  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
168  LoadInst *L = Builder.CreateLoad(Src);
169  // Alignment from the mem intrinsic will be better, so use it.
170  L->setAlignment(CopySrcAlign);
171  if (CopyMD)
172  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
173  MDNode *LoopMemParallelMD =
175  if (LoopMemParallelMD)
177 
178  StoreInst *S = Builder.CreateStore(L, Dest);
179  // Alignment from the mem intrinsic will be better, so use it.
180  S->setAlignment(CopyDstAlign);
181  if (CopyMD)
182  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
183  if (LoopMemParallelMD)
185 
186  if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
187  // non-atomics can be volatile
188  L->setVolatile(MT->isVolatile());
189  S->setVolatile(MT->isVolatile());
190  }
191  if (isa<AtomicMemTransferInst>(MI)) {
192  // atomics have to be unordered
195  }
196 
197  // Set the size of the copy to 0, it will be deleted on the next iteration.
198  MI->setLength(Constant::getNullValue(MemOpLength->getType()));
199  return MI;
200 }
201 
202 Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
203  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
204  if (MI->getDestAlignment() < Alignment) {
205  MI->setDestAlignment(Alignment);
206  return MI;
207  }
208 
209  // Extract the length and alignment and fill if they are constant.
210  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
211  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
212  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
213  return nullptr;
214  uint64_t Len = LenC->getLimitedValue();
215  Alignment = MI->getDestAlignment();
216  assert(Len && "0-sized memory setting should be removed already.");
217 
218  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
219  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
220  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
221 
222  Value *Dest = MI->getDest();
223  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
224  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
225  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
226 
227  // Alignment 0 is identity for alignment 1 for memset, but not store.
228  if (Alignment == 0) Alignment = 1;
229 
230  // Extract the fill value and store.
231  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
232  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
233  MI->isVolatile());
234  S->setAlignment(Alignment);
235  if (isa<AtomicMemSetInst>(MI))
237 
238  // Set the size of the copy to 0, it will be deleted on the next iteration.
239  MI->setLength(Constant::getNullValue(LenC->getType()));
240  return MI;
241  }
242 
243  return nullptr;
244 }
245 
247  InstCombiner::BuilderTy &Builder) {
248  bool LogicalShift = false;
249  bool ShiftLeft = false;
250 
251  switch (II.getIntrinsicID()) {
252  default: llvm_unreachable("Unexpected intrinsic!");
253  case Intrinsic::x86_sse2_psra_d:
254  case Intrinsic::x86_sse2_psra_w:
255  case Intrinsic::x86_sse2_psrai_d:
256  case Intrinsic::x86_sse2_psrai_w:
257  case Intrinsic::x86_avx2_psra_d:
258  case Intrinsic::x86_avx2_psra_w:
259  case Intrinsic::x86_avx2_psrai_d:
260  case Intrinsic::x86_avx2_psrai_w:
261  case Intrinsic::x86_avx512_psra_q_128:
262  case Intrinsic::x86_avx512_psrai_q_128:
263  case Intrinsic::x86_avx512_psra_q_256:
264  case Intrinsic::x86_avx512_psrai_q_256:
265  case Intrinsic::x86_avx512_psra_d_512:
266  case Intrinsic::x86_avx512_psra_q_512:
267  case Intrinsic::x86_avx512_psra_w_512:
268  case Intrinsic::x86_avx512_psrai_d_512:
269  case Intrinsic::x86_avx512_psrai_q_512:
270  case Intrinsic::x86_avx512_psrai_w_512:
271  LogicalShift = false; ShiftLeft = false;
272  break;
273  case Intrinsic::x86_sse2_psrl_d:
274  case Intrinsic::x86_sse2_psrl_q:
275  case Intrinsic::x86_sse2_psrl_w:
276  case Intrinsic::x86_sse2_psrli_d:
277  case Intrinsic::x86_sse2_psrli_q:
278  case Intrinsic::x86_sse2_psrli_w:
279  case Intrinsic::x86_avx2_psrl_d:
280  case Intrinsic::x86_avx2_psrl_q:
281  case Intrinsic::x86_avx2_psrl_w:
282  case Intrinsic::x86_avx2_psrli_d:
283  case Intrinsic::x86_avx2_psrli_q:
284  case Intrinsic::x86_avx2_psrli_w:
285  case Intrinsic::x86_avx512_psrl_d_512:
286  case Intrinsic::x86_avx512_psrl_q_512:
287  case Intrinsic::x86_avx512_psrl_w_512:
288  case Intrinsic::x86_avx512_psrli_d_512:
289  case Intrinsic::x86_avx512_psrli_q_512:
290  case Intrinsic::x86_avx512_psrli_w_512:
291  LogicalShift = true; ShiftLeft = false;
292  break;
293  case Intrinsic::x86_sse2_psll_d:
294  case Intrinsic::x86_sse2_psll_q:
295  case Intrinsic::x86_sse2_psll_w:
296  case Intrinsic::x86_sse2_pslli_d:
297  case Intrinsic::x86_sse2_pslli_q:
298  case Intrinsic::x86_sse2_pslli_w:
299  case Intrinsic::x86_avx2_psll_d:
300  case Intrinsic::x86_avx2_psll_q:
301  case Intrinsic::x86_avx2_psll_w:
302  case Intrinsic::x86_avx2_pslli_d:
303  case Intrinsic::x86_avx2_pslli_q:
304  case Intrinsic::x86_avx2_pslli_w:
305  case Intrinsic::x86_avx512_psll_d_512:
306  case Intrinsic::x86_avx512_psll_q_512:
307  case Intrinsic::x86_avx512_psll_w_512:
308  case Intrinsic::x86_avx512_pslli_d_512:
309  case Intrinsic::x86_avx512_pslli_q_512:
310  case Intrinsic::x86_avx512_pslli_w_512:
311  LogicalShift = true; ShiftLeft = true;
312  break;
313  }
314  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
315 
316  // Simplify if count is constant.
317  auto Arg1 = II.getArgOperand(1);
318  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
319  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
320  auto CInt = dyn_cast<ConstantInt>(Arg1);
321  if (!CAZ && !CDV && !CInt)
322  return nullptr;
323 
324  APInt Count(64, 0);
325  if (CDV) {
326  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
327  // operand to compute the shift amount.
328  auto VT = cast<VectorType>(CDV->getType());
329  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
330  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
331  unsigned NumSubElts = 64 / BitWidth;
332 
333  // Concatenate the sub-elements to create the 64-bit value.
334  for (unsigned i = 0; i != NumSubElts; ++i) {
335  unsigned SubEltIdx = (NumSubElts - 1) - i;
336  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
337  Count <<= BitWidth;
338  Count |= SubElt->getValue().zextOrTrunc(64);
339  }
340  }
341  else if (CInt)
342  Count = CInt->getValue();
343 
344  auto Vec = II.getArgOperand(0);
345  auto VT = cast<VectorType>(Vec->getType());
346  auto SVT = VT->getElementType();
347  unsigned VWidth = VT->getNumElements();
348  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
349 
350  // If shift-by-zero then just return the original value.
351  if (Count.isNullValue())
352  return Vec;
353 
354  // Handle cases when Shift >= BitWidth.
355  if (Count.uge(BitWidth)) {
356  // If LogicalShift - just return zero.
357  if (LogicalShift)
358  return ConstantAggregateZero::get(VT);
359 
360  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
361  Count = APInt(64, BitWidth - 1);
362  }
363 
364  // Get a constant vector of the same type as the first operand.
365  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
366  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
367 
368  if (ShiftLeft)
369  return Builder.CreateShl(Vec, ShiftVec);
370 
371  if (LogicalShift)
372  return Builder.CreateLShr(Vec, ShiftVec);
373 
374  return Builder.CreateAShr(Vec, ShiftVec);
375 }
376 
377 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
378 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
379 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
381  InstCombiner::BuilderTy &Builder) {
382  bool LogicalShift = false;
383  bool ShiftLeft = false;
384 
385  switch (II.getIntrinsicID()) {
386  default: llvm_unreachable("Unexpected intrinsic!");
387  case Intrinsic::x86_avx2_psrav_d:
388  case Intrinsic::x86_avx2_psrav_d_256:
389  case Intrinsic::x86_avx512_psrav_q_128:
390  case Intrinsic::x86_avx512_psrav_q_256:
391  case Intrinsic::x86_avx512_psrav_d_512:
392  case Intrinsic::x86_avx512_psrav_q_512:
393  case Intrinsic::x86_avx512_psrav_w_128:
394  case Intrinsic::x86_avx512_psrav_w_256:
395  case Intrinsic::x86_avx512_psrav_w_512:
396  LogicalShift = false;
397  ShiftLeft = false;
398  break;
399  case Intrinsic::x86_avx2_psrlv_d:
400  case Intrinsic::x86_avx2_psrlv_d_256:
401  case Intrinsic::x86_avx2_psrlv_q:
402  case Intrinsic::x86_avx2_psrlv_q_256:
403  case Intrinsic::x86_avx512_psrlv_d_512:
404  case Intrinsic::x86_avx512_psrlv_q_512:
405  case Intrinsic::x86_avx512_psrlv_w_128:
406  case Intrinsic::x86_avx512_psrlv_w_256:
407  case Intrinsic::x86_avx512_psrlv_w_512:
408  LogicalShift = true;
409  ShiftLeft = false;
410  break;
411  case Intrinsic::x86_avx2_psllv_d:
412  case Intrinsic::x86_avx2_psllv_d_256:
413  case Intrinsic::x86_avx2_psllv_q:
414  case Intrinsic::x86_avx2_psllv_q_256:
415  case Intrinsic::x86_avx512_psllv_d_512:
416  case Intrinsic::x86_avx512_psllv_q_512:
417  case Intrinsic::x86_avx512_psllv_w_128:
418  case Intrinsic::x86_avx512_psllv_w_256:
419  case Intrinsic::x86_avx512_psllv_w_512:
420  LogicalShift = true;
421  ShiftLeft = true;
422  break;
423  }
424  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
425 
426  // Simplify if all shift amounts are constant/undef.
427  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
428  if (!CShift)
429  return nullptr;
430 
431  auto Vec = II.getArgOperand(0);
432  auto VT = cast<VectorType>(II.getType());
433  auto SVT = VT->getVectorElementType();
434  int NumElts = VT->getNumElements();
435  int BitWidth = SVT->getIntegerBitWidth();
436 
437  // Collect each element's shift amount.
438  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
439  bool AnyOutOfRange = false;
440  SmallVector<int, 8> ShiftAmts;
441  for (int I = 0; I < NumElts; ++I) {
442  auto *CElt = CShift->getAggregateElement(I);
443  if (CElt && isa<UndefValue>(CElt)) {
444  ShiftAmts.push_back(-1);
445  continue;
446  }
447 
448  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
449  if (!COp)
450  return nullptr;
451 
452  // Handle out of range shifts.
453  // If LogicalShift - set to BitWidth (special case).
454  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
455  APInt ShiftVal = COp->getValue();
456  if (ShiftVal.uge(BitWidth)) {
457  AnyOutOfRange = LogicalShift;
458  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
459  continue;
460  }
461 
462  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
463  }
464 
465  // If all elements out of range or UNDEF, return vector of zeros/undefs.
466  // ArithmeticShift should only hit this if they are all UNDEF.
467  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
468  if (llvm::all_of(ShiftAmts, OutOfRange)) {
469  SmallVector<Constant *, 8> ConstantVec;
470  for (int Idx : ShiftAmts) {
471  if (Idx < 0) {
472  ConstantVec.push_back(UndefValue::get(SVT));
473  } else {
474  assert(LogicalShift && "Logical shift expected");
475  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
476  }
477  }
478  return ConstantVector::get(ConstantVec);
479  }
480 
481  // We can't handle only some out of range values with generic logical shifts.
482  if (AnyOutOfRange)
483  return nullptr;
484 
485  // Build the shift amount constant vector.
486  SmallVector<Constant *, 8> ShiftVecAmts;
487  for (int Idx : ShiftAmts) {
488  if (Idx < 0)
489  ShiftVecAmts.push_back(UndefValue::get(SVT));
490  else
491  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
492  }
493  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
494 
495  if (ShiftLeft)
496  return Builder.CreateShl(Vec, ShiftVec);
497 
498  if (LogicalShift)
499  return Builder.CreateLShr(Vec, ShiftVec);
500 
501  return Builder.CreateAShr(Vec, ShiftVec);
502 }
503 
504 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
505  Value *Arg0 = II.getArgOperand(0);
506  Value *Arg1 = II.getArgOperand(1);
507  Type *ResTy = II.getType();
508 
509  // Fast all undef handling.
510  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
511  return UndefValue::get(ResTy);
512 
513  Type *ArgTy = Arg0->getType();
514  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
515  unsigned NumDstElts = ResTy->getVectorNumElements();
516  unsigned NumSrcElts = ArgTy->getVectorNumElements();
517  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
518 
519  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
520  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
521  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
522  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
523  "Unexpected packing types");
524 
525  // Constant folding.
526  auto *Cst0 = dyn_cast<Constant>(Arg0);
527  auto *Cst1 = dyn_cast<Constant>(Arg1);
528  if (!Cst0 || !Cst1)
529  return nullptr;
530 
532  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
533  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
534  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
535  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
536  auto *COp = Cst->getAggregateElement(SrcIdx);
537  if (COp && isa<UndefValue>(COp)) {
538  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
539  continue;
540  }
541 
542  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
543  if (!CInt)
544  return nullptr;
545 
546  APInt Val = CInt->getValue();
547  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
548  "Unexpected constant bitwidth");
549 
550  if (IsSigned) {
551  // PACKSS: Truncate signed value with signed saturation.
552  // Source values less than dst minint are saturated to minint.
553  // Source values greater than dst maxint are saturated to maxint.
554  if (Val.isSignedIntN(DstScalarSizeInBits))
555  Val = Val.trunc(DstScalarSizeInBits);
556  else if (Val.isNegative())
557  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
558  else
559  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
560  } else {
561  // PACKUS: Truncate signed value with unsigned saturation.
562  // Source values less than zero are saturated to zero.
563  // Source values greater than dst maxuint are saturated to maxuint.
564  if (Val.isIntN(DstScalarSizeInBits))
565  Val = Val.trunc(DstScalarSizeInBits);
566  else if (Val.isNegative())
567  Val = APInt::getNullValue(DstScalarSizeInBits);
568  else
569  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
570  }
571 
572  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
573  }
574  }
575 
576  return ConstantVector::get(Vals);
577 }
578 
579 // Replace X86-specific intrinsics with generic floor-ceil where applicable.
581  InstCombiner::BuilderTy &Builder) {
582  ConstantInt *Arg = nullptr;
583  Intrinsic::ID IntrinsicID = II.getIntrinsicID();
584 
585  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
586  IntrinsicID == Intrinsic::x86_sse41_round_sd)
587  Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
588  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
589  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
590  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
591  else
592  Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
593  if (!Arg)
594  return nullptr;
595  unsigned RoundControl = Arg->getZExtValue();
596 
597  Arg = nullptr;
598  unsigned SAE = 0;
599  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
600  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
601  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
602  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
603  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
604  Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
605  else
606  SAE = 4;
607  if (!SAE) {
608  if (!Arg)
609  return nullptr;
610  SAE = Arg->getZExtValue();
611  }
612 
613  if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
614  return nullptr;
615 
616  Value *Src, *Dst, *Mask;
617  bool IsScalar = false;
618  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
619  IntrinsicID == Intrinsic::x86_sse41_round_sd ||
620  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
621  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
622  IsScalar = true;
623  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
624  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
625  Mask = II.getArgOperand(3);
626  Value *Zero = Constant::getNullValue(Mask->getType());
627  Mask = Builder.CreateAnd(Mask, 1);
628  Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
629  Dst = II.getArgOperand(2);
630  } else
631  Dst = II.getArgOperand(0);
632  Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
633  } else {
634  Src = II.getArgOperand(0);
635  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
636  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
637  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
638  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
639  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
640  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
641  Dst = II.getArgOperand(2);
642  Mask = II.getArgOperand(3);
643  } else {
644  Dst = Src;
646  Builder.getIntNTy(Src->getType()->getVectorNumElements()));
647  }
648  }
649 
650  Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
651  Value *Res = Builder.CreateIntrinsic(ID, {Src}, &II);
652  if (!IsScalar) {
653  if (auto *C = dyn_cast<Constant>(Mask))
654  if (C->isAllOnesValue())
655  return Res;
656  auto *MaskTy = VectorType::get(
657  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
658  Mask = Builder.CreateBitCast(Mask, MaskTy);
659  unsigned Width = Src->getType()->getVectorNumElements();
660  if (MaskTy->getVectorNumElements() > Width) {
661  uint32_t Indices[4];
662  for (unsigned i = 0; i != Width; ++i)
663  Indices[i] = i;
664  Mask = Builder.CreateShuffleVector(Mask, Mask,
665  makeArrayRef(Indices, Width));
666  }
667  return Builder.CreateSelect(Mask, Res, Dst);
668  }
669  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
670  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
671  Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
672  Res = Builder.CreateSelect(Mask, Res, Dst);
673  Dst = II.getArgOperand(0);
674  }
675  return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
676 }
677 
679  Value *Arg = II.getArgOperand(0);
680  Type *ResTy = II.getType();
681  Type *ArgTy = Arg->getType();
682 
683  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
684  if (isa<UndefValue>(Arg))
685  return Constant::getNullValue(ResTy);
686 
687  // We can't easily peek through x86_mmx types.
688  if (!ArgTy->isVectorTy())
689  return nullptr;
690 
691  auto *C = dyn_cast<Constant>(Arg);
692  if (!C)
693  return nullptr;
694 
695  // Extract signbits of the vector input and pack into integer result.
696  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
697  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
698  auto *COp = C->getAggregateElement(I);
699  if (!COp)
700  return nullptr;
701  if (isa<UndefValue>(COp))
702  continue;
703 
704  auto *CInt = dyn_cast<ConstantInt>(COp);
705  auto *CFp = dyn_cast<ConstantFP>(COp);
706  if (!CInt && !CFp)
707  return nullptr;
708 
709  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
710  Result.setBit(I);
711  }
712 
713  return Constant::getIntegerValue(ResTy, Result);
714 }
715 
717  InstCombiner::BuilderTy &Builder) {
718  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
719  if (!CInt)
720  return nullptr;
721 
722  VectorType *VecTy = cast<VectorType>(II.getType());
723  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
724 
725  // The immediate permute control byte looks like this:
726  // [3:0] - zero mask for each 32-bit lane
727  // [5:4] - select one 32-bit destination lane
728  // [7:6] - select one 32-bit source lane
729 
730  uint8_t Imm = CInt->getZExtValue();
731  uint8_t ZMask = Imm & 0xf;
732  uint8_t DestLane = (Imm >> 4) & 0x3;
733  uint8_t SourceLane = (Imm >> 6) & 0x3;
734 
736 
737  // If all zero mask bits are set, this was just a weird way to
738  // generate a zero vector.
739  if (ZMask == 0xf)
740  return ZeroVector;
741 
742  // Initialize by passing all of the first source bits through.
743  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
744 
745  // We may replace the second operand with the zero vector.
746  Value *V1 = II.getArgOperand(1);
747 
748  if (ZMask) {
749  // If the zero mask is being used with a single input or the zero mask
750  // overrides the destination lane, this is a shuffle with the zero vector.
751  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
752  (ZMask & (1 << DestLane))) {
753  V1 = ZeroVector;
754  // We may still move 32-bits of the first source vector from one lane
755  // to another.
756  ShuffleMask[DestLane] = SourceLane;
757  // The zero mask may override the previous insert operation.
758  for (unsigned i = 0; i < 4; ++i)
759  if ((ZMask >> i) & 0x1)
760  ShuffleMask[i] = i + 4;
761  } else {
762  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
763  return nullptr;
764  }
765  } else {
766  // Replace the selected destination lane with the selected source lane.
767  ShuffleMask[DestLane] = SourceLane + 4;
768  }
769 
770  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
771 }
772 
773 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
774 /// or conversion to a shuffle vector.
776  ConstantInt *CILength, ConstantInt *CIIndex,
777  InstCombiner::BuilderTy &Builder) {
778  auto LowConstantHighUndef = [&](uint64_t Val) {
779  Type *IntTy64 = Type::getInt64Ty(II.getContext());
780  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
781  UndefValue::get(IntTy64)};
782  return ConstantVector::get(Args);
783  };
784 
785  // See if we're dealing with constant values.
786  Constant *C0 = dyn_cast<Constant>(Op0);
787  ConstantInt *CI0 =
788  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
789  : nullptr;
790 
791  // Attempt to constant fold.
792  if (CILength && CIIndex) {
793  // From AMD documentation: "The bit index and field length are each six
794  // bits in length other bits of the field are ignored."
795  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
796  APInt APLength = CILength->getValue().zextOrTrunc(6);
797 
798  unsigned Index = APIndex.getZExtValue();
799 
800  // From AMD documentation: "a value of zero in the field length is
801  // defined as length of 64".
802  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
803 
804  // From AMD documentation: "If the sum of the bit index + length field
805  // is greater than 64, the results are undefined".
806  unsigned End = Index + Length;
807 
808  // Note that both field index and field length are 8-bit quantities.
809  // Since variables 'Index' and 'Length' are unsigned values
810  // obtained from zero-extending field index and field length
811  // respectively, their sum should never wrap around.
812  if (End > 64)
813  return UndefValue::get(II.getType());
814 
815  // If we are inserting whole bytes, we can convert this to a shuffle.
816  // Lowering can recognize EXTRQI shuffle masks.
817  if ((Length % 8) == 0 && (Index % 8) == 0) {
818  // Convert bit indices to byte indices.
819  Length /= 8;
820  Index /= 8;
821 
822  Type *IntTy8 = Type::getInt8Ty(II.getContext());
823  Type *IntTy32 = Type::getInt32Ty(II.getContext());
824  VectorType *ShufTy = VectorType::get(IntTy8, 16);
825 
826  SmallVector<Constant *, 16> ShuffleMask;
827  for (int i = 0; i != (int)Length; ++i)
828  ShuffleMask.push_back(
829  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
830  for (int i = Length; i != 8; ++i)
831  ShuffleMask.push_back(
832  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
833  for (int i = 8; i != 16; ++i)
834  ShuffleMask.push_back(UndefValue::get(IntTy32));
835 
836  Value *SV = Builder.CreateShuffleVector(
837  Builder.CreateBitCast(Op0, ShufTy),
838  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
839  return Builder.CreateBitCast(SV, II.getType());
840  }
841 
842  // Constant Fold - shift Index'th bit to lowest position and mask off
843  // Length bits.
844  if (CI0) {
845  APInt Elt = CI0->getValue();
846  Elt.lshrInPlace(Index);
847  Elt = Elt.zextOrTrunc(Length);
848  return LowConstantHighUndef(Elt.getZExtValue());
849  }
850 
851  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
852  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
853  Value *Args[] = {Op0, CILength, CIIndex};
854  Module *M = II.getModule();
855  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
856  return Builder.CreateCall(F, Args);
857  }
858  }
859 
860  // Constant Fold - extraction from zero is always {zero, undef}.
861  if (CI0 && CI0->isZero())
862  return LowConstantHighUndef(0);
863 
864  return nullptr;
865 }
866 
867 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
868 /// folding or conversion to a shuffle vector.
870  APInt APLength, APInt APIndex,
871  InstCombiner::BuilderTy &Builder) {
872  // From AMD documentation: "The bit index and field length are each six bits
873  // in length other bits of the field are ignored."
874  APIndex = APIndex.zextOrTrunc(6);
875  APLength = APLength.zextOrTrunc(6);
876 
877  // Attempt to constant fold.
878  unsigned Index = APIndex.getZExtValue();
879 
880  // From AMD documentation: "a value of zero in the field length is
881  // defined as length of 64".
882  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
883 
884  // From AMD documentation: "If the sum of the bit index + length field
885  // is greater than 64, the results are undefined".
886  unsigned End = Index + Length;
887 
888  // Note that both field index and field length are 8-bit quantities.
889  // Since variables 'Index' and 'Length' are unsigned values
890  // obtained from zero-extending field index and field length
891  // respectively, their sum should never wrap around.
892  if (End > 64)
893  return UndefValue::get(II.getType());
894 
895  // If we are inserting whole bytes, we can convert this to a shuffle.
896  // Lowering can recognize INSERTQI shuffle masks.
897  if ((Length % 8) == 0 && (Index % 8) == 0) {
898  // Convert bit indices to byte indices.
899  Length /= 8;
900  Index /= 8;
901 
902  Type *IntTy8 = Type::getInt8Ty(II.getContext());
903  Type *IntTy32 = Type::getInt32Ty(II.getContext());
904  VectorType *ShufTy = VectorType::get(IntTy8, 16);
905 
906  SmallVector<Constant *, 16> ShuffleMask;
907  for (int i = 0; i != (int)Index; ++i)
908  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
909  for (int i = 0; i != (int)Length; ++i)
910  ShuffleMask.push_back(
911  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
912  for (int i = Index + Length; i != 8; ++i)
913  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
914  for (int i = 8; i != 16; ++i)
915  ShuffleMask.push_back(UndefValue::get(IntTy32));
916 
917  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
918  Builder.CreateBitCast(Op1, ShufTy),
919  ConstantVector::get(ShuffleMask));
920  return Builder.CreateBitCast(SV, II.getType());
921  }
922 
923  // See if we're dealing with constant values.
924  Constant *C0 = dyn_cast<Constant>(Op0);
925  Constant *C1 = dyn_cast<Constant>(Op1);
926  ConstantInt *CI00 =
927  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
928  : nullptr;
929  ConstantInt *CI10 =
930  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
931  : nullptr;
932 
933  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
934  if (CI00 && CI10) {
935  APInt V00 = CI00->getValue();
936  APInt V10 = CI10->getValue();
937  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
938  V00 = V00 & ~Mask;
939  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
940  APInt Val = V00 | V10;
941  Type *IntTy64 = Type::getInt64Ty(II.getContext());
942  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
943  UndefValue::get(IntTy64)};
944  return ConstantVector::get(Args);
945  }
946 
947  // If we were an INSERTQ call, we'll save demanded elements if we convert to
948  // INSERTQI.
949  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
950  Type *IntTy8 = Type::getInt8Ty(II.getContext());
951  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
952  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
953 
954  Value *Args[] = {Op0, Op1, CILength, CIIndex};
955  Module *M = II.getModule();
956  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
957  return Builder.CreateCall(F, Args);
958  }
959 
960  return nullptr;
961 }
962 
963 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
965  InstCombiner::BuilderTy &Builder) {
967  if (!V)
968  return nullptr;
969 
970  auto *VecTy = cast<VectorType>(II.getType());
971  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
972  unsigned NumElts = VecTy->getNumElements();
973  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
974  "Unexpected number of elements in shuffle mask!");
975 
976  // Construct a shuffle mask from constant integers or UNDEFs.
977  Constant *Indexes[64] = {nullptr};
978 
979  // Each byte in the shuffle control mask forms an index to permute the
980  // corresponding byte in the destination operand.
981  for (unsigned I = 0; I < NumElts; ++I) {
982  Constant *COp = V->getAggregateElement(I);
983  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
984  return nullptr;
985 
986  if (isa<UndefValue>(COp)) {
987  Indexes[I] = UndefValue::get(MaskEltTy);
988  continue;
989  }
990 
991  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
992 
993  // If the most significant bit (bit[7]) of each byte of the shuffle
994  // control mask is set, then zero is written in the result byte.
995  // The zero vector is in the right-hand side of the resulting
996  // shufflevector.
997 
998  // The value of each index for the high 128-bit lane is the least
999  // significant 4 bits of the respective shuffle control byte.
1000  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1001  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1002  }
1003 
1004  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1005  auto V1 = II.getArgOperand(0);
1006  auto V2 = Constant::getNullValue(VecTy);
1007  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1008 }
1009 
1010 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1012  InstCombiner::BuilderTy &Builder) {
1013  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1014  if (!V)
1015  return nullptr;
1016 
1017  auto *VecTy = cast<VectorType>(II.getType());
1018  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1019  unsigned NumElts = VecTy->getVectorNumElements();
1020  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1021  unsigned NumLaneElts = IsPD ? 2 : 4;
1022  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1023 
1024  // Construct a shuffle mask from constant integers or UNDEFs.
1025  Constant *Indexes[16] = {nullptr};
1026 
1027  // The intrinsics only read one or two bits, clear the rest.
1028  for (unsigned I = 0; I < NumElts; ++I) {
1029  Constant *COp = V->getAggregateElement(I);
1030  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1031  return nullptr;
1032 
1033  if (isa<UndefValue>(COp)) {
1034  Indexes[I] = UndefValue::get(MaskEltTy);
1035  continue;
1036  }
1037 
1038  APInt Index = cast<ConstantInt>(COp)->getValue();
1039  Index = Index.zextOrTrunc(32).getLoBits(2);
1040 
1041  // The PD variants uses bit 1 to select per-lane element index, so
1042  // shift down to convert to generic shuffle mask index.
1043  if (IsPD)
1044  Index.lshrInPlace(1);
1045 
1046  // The _256 variants are a bit trickier since the mask bits always index
1047  // into the corresponding 128 half. In order to convert to a generic
1048  // shuffle, we have to make that explicit.
1049  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1050 
1051  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1052  }
1053 
1054  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1055  auto V1 = II.getArgOperand(0);
1056  auto V2 = UndefValue::get(V1->getType());
1057  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1058 }
1059 
1060 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1062  InstCombiner::BuilderTy &Builder) {
1063  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1064  if (!V)
1065  return nullptr;
1066 
1067  auto *VecTy = cast<VectorType>(II.getType());
1068  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1069  unsigned Size = VecTy->getNumElements();
1070  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1071  "Unexpected shuffle mask size");
1072 
1073  // Construct a shuffle mask from constant integers or UNDEFs.
1074  Constant *Indexes[64] = {nullptr};
1075 
1076  for (unsigned I = 0; I < Size; ++I) {
1077  Constant *COp = V->getAggregateElement(I);
1078  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1079  return nullptr;
1080 
1081  if (isa<UndefValue>(COp)) {
1082  Indexes[I] = UndefValue::get(MaskEltTy);
1083  continue;
1084  }
1085 
1086  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1087  Index &= Size - 1;
1088  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1089  }
1090 
1091  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1092  auto V1 = II.getArgOperand(0);
1093  auto V2 = UndefValue::get(VecTy);
1094  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1095 }
1096 
1097 /// Decode XOP integer vector comparison intrinsics.
1099  InstCombiner::BuilderTy &Builder,
1100  bool IsSigned) {
1101  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1102  uint64_t Imm = CInt->getZExtValue() & 0x7;
1103  VectorType *VecTy = cast<VectorType>(II.getType());
1105 
1106  switch (Imm) {
1107  case 0x0:
1108  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1109  break;
1110  case 0x1:
1111  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1112  break;
1113  case 0x2:
1114  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1115  break;
1116  case 0x3:
1117  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1118  break;
1119  case 0x4:
1120  Pred = ICmpInst::ICMP_EQ; break;
1121  case 0x5:
1122  Pred = ICmpInst::ICMP_NE; break;
1123  case 0x6:
1124  return ConstantInt::getSigned(VecTy, 0); // FALSE
1125  case 0x7:
1126  return ConstantInt::getSigned(VecTy, -1); // TRUE
1127  }
1128 
1129  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1130  II.getArgOperand(1)))
1131  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1132  }
1133  return nullptr;
1134 }
1135 
1137  Value *Arg0 = II.getArgOperand(0);
1138  Value *Arg1 = II.getArgOperand(1);
1139 
1140  // fmin(x, x) -> x
1141  if (Arg0 == Arg1)
1142  return Arg0;
1143 
1144  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
1145 
1146  // fmin(x, nan) -> x
1147  if (C1 && C1->isNaN())
1148  return Arg0;
1149 
1150  // This is the value because if undef were NaN, we would return the other
1151  // value and cannot return a NaN unless both operands are.
1152  //
1153  // fmin(undef, x) -> x
1154  if (isa<UndefValue>(Arg0))
1155  return Arg1;
1156 
1157  // fmin(x, undef) -> x
1158  if (isa<UndefValue>(Arg1))
1159  return Arg0;
1160 
1161  Value *X = nullptr;
1162  Value *Y = nullptr;
1163  if (II.getIntrinsicID() == Intrinsic::minnum) {
1164  // fmin(x, fmin(x, y)) -> fmin(x, y)
1165  // fmin(y, fmin(x, y)) -> fmin(x, y)
1166  if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
1167  if (Arg0 == X || Arg0 == Y)
1168  return Arg1;
1169  }
1170 
1171  // fmin(fmin(x, y), x) -> fmin(x, y)
1172  // fmin(fmin(x, y), y) -> fmin(x, y)
1173  if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
1174  if (Arg1 == X || Arg1 == Y)
1175  return Arg0;
1176  }
1177 
1178  // TODO: fmin(nnan x, inf) -> x
1179  // TODO: fmin(nnan ninf x, flt_max) -> x
1180  if (C1 && C1->isInfinity()) {
1181  // fmin(x, -inf) -> -inf
1182  if (C1->isNegative())
1183  return Arg1;
1184  }
1185  } else {
1187  // fmax(x, fmax(x, y)) -> fmax(x, y)
1188  // fmax(y, fmax(x, y)) -> fmax(x, y)
1189  if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
1190  if (Arg0 == X || Arg0 == Y)
1191  return Arg1;
1192  }
1193 
1194  // fmax(fmax(x, y), x) -> fmax(x, y)
1195  // fmax(fmax(x, y), y) -> fmax(x, y)
1196  if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
1197  if (Arg1 == X || Arg1 == Y)
1198  return Arg0;
1199  }
1200 
1201  // TODO: fmax(nnan x, -inf) -> x
1202  // TODO: fmax(nnan ninf x, -flt_max) -> x
1203  if (C1 && C1->isInfinity()) {
1204  // fmax(x, inf) -> inf
1205  if (!C1->isNegative())
1206  return Arg1;
1207  }
1208  }
1209  return nullptr;
1210 }
1211 
1213  auto *ConstMask = dyn_cast<Constant>(Mask);
1214  if (!ConstMask)
1215  return false;
1216  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1217  return true;
1218  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1219  ++I) {
1220  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1221  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1222  continue;
1223  return false;
1224  }
1225  return true;
1226 }
1227 
1229  InstCombiner::BuilderTy &Builder) {
1230  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1231  // argument.
1232  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1233  Value *LoadPtr = II.getArgOperand(0);
1234  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1235  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1236  }
1237 
1238  return nullptr;
1239 }
1240 
1242  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1243  if (!ConstMask)
1244  return nullptr;
1245 
1246  // If the mask is all zeros, this instruction does nothing.
1247  if (ConstMask->isNullValue())
1248  return IC.eraseInstFromFunction(II);
1249 
1250  // If the mask is all ones, this is a plain vector store of the 1st argument.
1251  if (ConstMask->isAllOnesValue()) {
1252  Value *StorePtr = II.getArgOperand(1);
1253  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1254  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1255  }
1256 
1257  return nullptr;
1258 }
1259 
1261  // If the mask is all zeros, return the "passthru" argument of the gather.
1262  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1263  if (ConstMask && ConstMask->isNullValue())
1264  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1265 
1266  return nullptr;
1267 }
1268 
1270  // If the mask is all zeros, a scatter does nothing.
1271  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1272  if (ConstMask && ConstMask->isNullValue())
1273  return IC.eraseInstFromFunction(II);
1274 
1275  return nullptr;
1276 }
1277 
1279  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1280  II.getIntrinsicID() == Intrinsic::ctlz) &&
1281  "Expected cttz or ctlz intrinsic");
1282  Value *Op0 = II.getArgOperand(0);
1283 
1284  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1285 
1286  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1287  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1288  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1289  : Known.countMaxLeadingZeros();
1290  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1291  : Known.countMinLeadingZeros();
1292 
1293  // If all bits above (ctlz) or below (cttz) the first known one are known
1294  // zero, this value is constant.
1295  // FIXME: This should be in InstSimplify because we're replacing an
1296  // instruction with a constant.
1297  if (PossibleZeros == DefiniteZeros) {
1298  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1299  return IC.replaceInstUsesWith(II, C);
1300  }
1301 
1302  // If the input to cttz/ctlz is known to be non-zero,
1303  // then change the 'ZeroIsUndef' parameter to 'true'
1304  // because we know the zero behavior can't affect the result.
1305  if (!Known.One.isNullValue() ||
1306  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1307  &IC.getDominatorTree())) {
1308  if (!match(II.getArgOperand(1), m_One())) {
1309  II.setOperand(1, IC.Builder.getTrue());
1310  return &II;
1311  }
1312  }
1313 
1314  // Add range metadata since known bits can't completely reflect what we know.
1315  // TODO: Handle splat vectors.
1316  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1317  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1318  Metadata *LowAndHigh[] = {
1319  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1320  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1323  return &II;
1324  }
1325 
1326  return nullptr;
1327 }
1328 
1330  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1331  "Expected ctpop intrinsic");
1332  Value *Op0 = II.getArgOperand(0);
1333  // FIXME: Try to simplify vectors of integers.
1334  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1335  if (!IT)
1336  return nullptr;
1337 
1338  unsigned BitWidth = IT->getBitWidth();
1339  KnownBits Known(BitWidth);
1340  IC.computeKnownBits(Op0, Known, 0, &II);
1341 
1342  unsigned MinCount = Known.countMinPopulation();
1343  unsigned MaxCount = Known.countMaxPopulation();
1344 
1345  // Add range metadata since known bits can't completely reflect what we know.
1346  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1347  Metadata *LowAndHigh[] = {
1349  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1352  return &II;
1353  }
1354 
1355  return nullptr;
1356 }
1357 
1358 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1359 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1360 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1362  Value *Ptr = II.getOperand(0);
1363  Value *Mask = II.getOperand(1);
1364  Constant *ZeroVec = Constant::getNullValue(II.getType());
1365 
1366  // Special case a zero mask since that's not a ConstantDataVector.
1367  // This masked load instruction creates a zero vector.
1368  if (isa<ConstantAggregateZero>(Mask))
1369  return IC.replaceInstUsesWith(II, ZeroVec);
1370 
1371  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1372  if (!ConstMask)
1373  return nullptr;
1374 
1375  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1376  // to allow target-independent optimizations.
1377 
1378  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1379  // the LLVM intrinsic definition for the pointer argument.
1380  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1381  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1382  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1383 
1384  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1385  // on each element's most significant bit (the sign bit).
1386  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1387 
1388  // The pass-through vector for an x86 masked load is a zero vector.
1389  CallInst *NewMaskedLoad =
1390  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1391  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1392 }
1393 
1394 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1395 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1396 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1398  Value *Ptr = II.getOperand(0);
1399  Value *Mask = II.getOperand(1);
1400  Value *Vec = II.getOperand(2);
1401 
1402  // Special case a zero mask since that's not a ConstantDataVector:
1403  // this masked store instruction does nothing.
1404  if (isa<ConstantAggregateZero>(Mask)) {
1405  IC.eraseInstFromFunction(II);
1406  return true;
1407  }
1408 
1409  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1410  // anything else at this level.
1411  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1412  return false;
1413 
1414  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1415  if (!ConstMask)
1416  return false;
1417 
1418  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1419  // to allow target-independent optimizations.
1420 
1421  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1422  // the LLVM intrinsic definition for the pointer argument.
1423  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1424  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1425  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1426 
1427  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1428  // on each element's most significant bit (the sign bit).
1429  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1430 
1431  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1432 
1433  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1434  IC.eraseInstFromFunction(II);
1435  return true;
1436 }
1437 
1438 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1439 //
1440 // A single NaN input is folded to minnum, so we rely on that folding for
1441 // handling NaNs.
1442 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1443  const APFloat &Src2) {
1444  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1445 
1446  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1447  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1448  if (Cmp0 == APFloat::cmpEqual)
1449  return maxnum(Src1, Src2);
1450 
1451  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1452  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1453  if (Cmp1 == APFloat::cmpEqual)
1454  return maxnum(Src0, Src2);
1455 
1456  return maxnum(Src0, Src1);
1457 }
1458 
1459 /// Convert a table lookup to shufflevector if the mask is constant.
1460 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
1461 /// which case we could lower the shufflevector with rev64 instructions
1462 /// as it's actually a byte reverse.
1464  InstCombiner::BuilderTy &Builder) {
1465  // Bail out if the mask is not a constant.
1466  auto *C = dyn_cast<Constant>(II.getArgOperand(1));
1467  if (!C)
1468  return nullptr;
1469 
1470  auto *VecTy = cast<VectorType>(II.getType());
1471  unsigned NumElts = VecTy->getNumElements();
1472 
1473  // Only perform this transformation for <8 x i8> vector types.
1474  if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
1475  return nullptr;
1476 
1477  uint32_t Indexes[8];
1478 
1479  for (unsigned I = 0; I < NumElts; ++I) {
1480  Constant *COp = C->getAggregateElement(I);
1481 
1482  if (!COp || !isa<ConstantInt>(COp))
1483  return nullptr;
1484 
1485  Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
1486 
1487  // Make sure the mask indices are in range.
1488  if (Indexes[I] >= NumElts)
1489  return nullptr;
1490  }
1491 
1492  auto *ShuffleMask = ConstantDataVector::get(II.getContext(),
1493  makeArrayRef(Indexes));
1494  auto *V1 = II.getArgOperand(0);
1495  auto *V2 = Constant::getNullValue(V1->getType());
1496  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1497 }
1498 
1499 /// Convert a vector load intrinsic into a simple llvm load instruction.
1500 /// This is beneficial when the underlying object being addressed comes
1501 /// from a constant, since we get constant-folding for free.
1503  unsigned MemAlign,
1504  InstCombiner::BuilderTy &Builder) {
1505  auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
1506 
1507  if (!IntrAlign)
1508  return nullptr;
1509 
1510  unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign ?
1511  MemAlign : IntrAlign->getLimitedValue();
1512 
1513  if (!isPowerOf2_32(Alignment))
1514  return nullptr;
1515 
1516  auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
1517  PointerType::get(II.getType(), 0));
1518  return Builder.CreateAlignedLoad(BCastInst, Alignment);
1519 }
1520 
1521 // Returns true iff the 2 intrinsics have the same operands, limiting the
1522 // comparison to the first NumOperands.
1523 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1524  unsigned NumOperands) {
1525  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1526  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1527  for (unsigned i = 0; i < NumOperands; i++)
1528  if (I.getArgOperand(i) != E.getArgOperand(i))
1529  return false;
1530  return true;
1531 }
1532 
1533 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1534 // immediately followed by an end (ignoring debuginfo or other
1535 // start/end intrinsics in between). As this handles only the most trivial
1536 // cases, tracking the nesting level is not needed:
1537 //
1538 // call @llvm.foo.start(i1 0) ; &I
1539 // call @llvm.foo.start(i1 0)
1540 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1541 // call @llvm.foo.end(i1 0)
1542 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1543  unsigned EndID, InstCombiner &IC) {
1544  assert(I.getIntrinsicID() == StartID &&
1545  "Start intrinsic does not have expected ID");
1546  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1547  for (++BI; BI != BE; ++BI) {
1548  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1549  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1550  continue;
1551  if (E->getIntrinsicID() == EndID &&
1552  haveSameOperands(I, *E, E->getNumArgOperands())) {
1553  IC.eraseInstFromFunction(*E);
1554  IC.eraseInstFromFunction(I);
1555  return true;
1556  }
1557  }
1558  break;
1559  }
1560 
1561  return false;
1562 }
1563 
1564 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1566  // Each NVVM intrinsic we can simplify can be replaced with one of:
1567  //
1568  // * an LLVM intrinsic,
1569  // * an LLVM cast operation,
1570  // * an LLVM binary operation, or
1571  // * ad-hoc LLVM IR for the particular operation.
1572 
1573  // Some transformations are only valid when the module's
1574  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1575  // transformations are valid regardless of the module's ftz setting.
1576  enum FtzRequirementTy {
1577  FTZ_Any, // Any ftz setting is ok.
1578  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1579  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1580  };
1581  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1582  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1583  // simplify.
1584  enum SpecialCase {
1585  SPC_Reciprocal,
1586  };
1587 
1588  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1589  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1590  struct SimplifyAction {
1591  // Invariant: At most one of these Optionals has a value.
1595  Optional<SpecialCase> Special;
1596 
1597  FtzRequirementTy FtzRequirement = FTZ_Any;
1598 
1599  SimplifyAction() = default;
1600 
1601  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1602  : IID(IID), FtzRequirement(FtzReq) {}
1603 
1604  // Cast operations don't have anything to do with FTZ, so we skip that
1605  // argument.
1606  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1607 
1608  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1609  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1610 
1611  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1612  : Special(Special), FtzRequirement(FtzReq) {}
1613  };
1614 
1615  // Try to generate a SimplifyAction describing how to replace our
1616  // IntrinsicInstr with target-generic LLVM IR.
1617  const SimplifyAction Action = [II]() -> SimplifyAction {
1618  switch (II->getIntrinsicID()) {
1619  // NVVM intrinsics that map directly to LLVM intrinsics.
1620  case Intrinsic::nvvm_ceil_d:
1621  return {Intrinsic::ceil, FTZ_Any};
1622  case Intrinsic::nvvm_ceil_f:
1623  return {Intrinsic::ceil, FTZ_MustBeOff};
1624  case Intrinsic::nvvm_ceil_ftz_f:
1625  return {Intrinsic::ceil, FTZ_MustBeOn};
1626  case Intrinsic::nvvm_fabs_d:
1627  return {Intrinsic::fabs, FTZ_Any};
1628  case Intrinsic::nvvm_fabs_f:
1629  return {Intrinsic::fabs, FTZ_MustBeOff};
1630  case Intrinsic::nvvm_fabs_ftz_f:
1631  return {Intrinsic::fabs, FTZ_MustBeOn};
1632  case Intrinsic::nvvm_floor_d:
1633  return {Intrinsic::floor, FTZ_Any};
1634  case Intrinsic::nvvm_floor_f:
1635  return {Intrinsic::floor, FTZ_MustBeOff};
1636  case Intrinsic::nvvm_floor_ftz_f:
1637  return {Intrinsic::floor, FTZ_MustBeOn};
1638  case Intrinsic::nvvm_fma_rn_d:
1639  return {Intrinsic::fma, FTZ_Any};
1640  case Intrinsic::nvvm_fma_rn_f:
1641  return {Intrinsic::fma, FTZ_MustBeOff};
1642  case Intrinsic::nvvm_fma_rn_ftz_f:
1643  return {Intrinsic::fma, FTZ_MustBeOn};
1644  case Intrinsic::nvvm_fmax_d:
1645  return {Intrinsic::maxnum, FTZ_Any};
1646  case Intrinsic::nvvm_fmax_f:
1647  return {Intrinsic::maxnum, FTZ_MustBeOff};
1648  case Intrinsic::nvvm_fmax_ftz_f:
1649  return {Intrinsic::maxnum, FTZ_MustBeOn};
1650  case Intrinsic::nvvm_fmin_d:
1651  return {Intrinsic::minnum, FTZ_Any};
1652  case Intrinsic::nvvm_fmin_f:
1653  return {Intrinsic::minnum, FTZ_MustBeOff};
1654  case Intrinsic::nvvm_fmin_ftz_f:
1655  return {Intrinsic::minnum, FTZ_MustBeOn};
1656  case Intrinsic::nvvm_round_d:
1657  return {Intrinsic::round, FTZ_Any};
1658  case Intrinsic::nvvm_round_f:
1659  return {Intrinsic::round, FTZ_MustBeOff};
1660  case Intrinsic::nvvm_round_ftz_f:
1661  return {Intrinsic::round, FTZ_MustBeOn};
1662  case Intrinsic::nvvm_sqrt_rn_d:
1663  return {Intrinsic::sqrt, FTZ_Any};
1664  case Intrinsic::nvvm_sqrt_f:
1665  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1666  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1667  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1668  // the versions with explicit ftz-ness.
1669  return {Intrinsic::sqrt, FTZ_Any};
1670  case Intrinsic::nvvm_sqrt_rn_f:
1671  return {Intrinsic::sqrt, FTZ_MustBeOff};
1672  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1673  return {Intrinsic::sqrt, FTZ_MustBeOn};
1674  case Intrinsic::nvvm_trunc_d:
1675  return {Intrinsic::trunc, FTZ_Any};
1676  case Intrinsic::nvvm_trunc_f:
1677  return {Intrinsic::trunc, FTZ_MustBeOff};
1678  case Intrinsic::nvvm_trunc_ftz_f:
1679  return {Intrinsic::trunc, FTZ_MustBeOn};
1680 
1681  // NVVM intrinsics that map to LLVM cast operations.
1682  //
1683  // Note that llvm's target-generic conversion operators correspond to the rz
1684  // (round to zero) versions of the nvvm conversion intrinsics, even though
1685  // most everything else here uses the rn (round to nearest even) nvvm ops.
1686  case Intrinsic::nvvm_d2i_rz:
1687  case Intrinsic::nvvm_f2i_rz:
1688  case Intrinsic::nvvm_d2ll_rz:
1689  case Intrinsic::nvvm_f2ll_rz:
1690  return {Instruction::FPToSI};
1691  case Intrinsic::nvvm_d2ui_rz:
1692  case Intrinsic::nvvm_f2ui_rz:
1693  case Intrinsic::nvvm_d2ull_rz:
1694  case Intrinsic::nvvm_f2ull_rz:
1695  return {Instruction::FPToUI};
1696  case Intrinsic::nvvm_i2d_rz:
1697  case Intrinsic::nvvm_i2f_rz:
1698  case Intrinsic::nvvm_ll2d_rz:
1699  case Intrinsic::nvvm_ll2f_rz:
1700  return {Instruction::SIToFP};
1701  case Intrinsic::nvvm_ui2d_rz:
1702  case Intrinsic::nvvm_ui2f_rz:
1703  case Intrinsic::nvvm_ull2d_rz:
1704  case Intrinsic::nvvm_ull2f_rz:
1705  return {Instruction::UIToFP};
1706 
1707  // NVVM intrinsics that map to LLVM binary ops.
1708  case Intrinsic::nvvm_add_rn_d:
1709  return {Instruction::FAdd, FTZ_Any};
1710  case Intrinsic::nvvm_add_rn_f:
1711  return {Instruction::FAdd, FTZ_MustBeOff};
1712  case Intrinsic::nvvm_add_rn_ftz_f:
1713  return {Instruction::FAdd, FTZ_MustBeOn};
1714  case Intrinsic::nvvm_mul_rn_d:
1715  return {Instruction::FMul, FTZ_Any};
1716  case Intrinsic::nvvm_mul_rn_f:
1717  return {Instruction::FMul, FTZ_MustBeOff};
1718  case Intrinsic::nvvm_mul_rn_ftz_f:
1719  return {Instruction::FMul, FTZ_MustBeOn};
1720  case Intrinsic::nvvm_div_rn_d:
1721  return {Instruction::FDiv, FTZ_Any};
1722  case Intrinsic::nvvm_div_rn_f:
1723  return {Instruction::FDiv, FTZ_MustBeOff};
1724  case Intrinsic::nvvm_div_rn_ftz_f:
1725  return {Instruction::FDiv, FTZ_MustBeOn};
1726 
1727  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1728  // need special handling.
1729  //
1730  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1731  // as well.
1732  case Intrinsic::nvvm_rcp_rn_d:
1733  return {SPC_Reciprocal, FTZ_Any};
1734  case Intrinsic::nvvm_rcp_rn_f:
1735  return {SPC_Reciprocal, FTZ_MustBeOff};
1736  case Intrinsic::nvvm_rcp_rn_ftz_f:
1737  return {SPC_Reciprocal, FTZ_MustBeOn};
1738 
1739  // We do not currently simplify intrinsics that give an approximate answer.
1740  // These include:
1741  //
1742  // - nvvm_cos_approx_{f,ftz_f}
1743  // - nvvm_ex2_approx_{d,f,ftz_f}
1744  // - nvvm_lg2_approx_{d,f,ftz_f}
1745  // - nvvm_sin_approx_{f,ftz_f}
1746  // - nvvm_sqrt_approx_{f,ftz_f}
1747  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1748  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1749  // - nvvm_rcp_approx_ftz_d
1750  //
1751  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1752  // means that fastmath is enabled in the intrinsic. Unfortunately only
1753  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1754  // information gets lost and we can't select on it.
1755  //
1756  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1757  // lower them to "fast fdiv".
1758 
1759  default:
1760  return {};
1761  }
1762  }();
1763 
1764  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1765  // can bail out now. (Notice that in the case that IID is not an NVVM
1766  // intrinsic, we don't have to look up any module metadata, as
1767  // FtzRequirementTy will be FTZ_Any.)
1768  if (Action.FtzRequirement != FTZ_Any) {
1769  bool FtzEnabled =
1770  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1771  "true";
1772 
1773  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1774  return nullptr;
1775  }
1776 
1777  // Simplify to target-generic intrinsic.
1778  if (Action.IID) {
1780  // All the target-generic intrinsics currently of interest to us have one
1781  // type argument, equal to that of the nvvm intrinsic's argument.
1782  Type *Tys[] = {II->getArgOperand(0)->getType()};
1783  return CallInst::Create(
1784  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1785  }
1786 
1787  // Simplify to target-generic binary op.
1788  if (Action.BinaryOp)
1789  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1790  II->getArgOperand(1), II->getName());
1791 
1792  // Simplify to target-generic cast op.
1793  if (Action.CastOp)
1794  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1795  II->getName());
1796 
1797  // All that's left are the special cases.
1798  if (!Action.Special)
1799  return nullptr;
1800 
1801  switch (*Action.Special) {
1802  case SPC_Reciprocal:
1803  // Simplify reciprocal.
1804  return BinaryOperator::Create(
1805  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1806  II->getArgOperand(0), II->getName());
1807  }
1808  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1809 }
1810 
1812  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1813  return nullptr;
1814 }
1815 
1817  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1818  return nullptr;
1819 }
1820 
1821 /// CallInst simplification. This mostly only handles folding of intrinsic
1822 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1823 /// lifting.
1825  if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
1826  return replaceInstUsesWith(CI, V);
1827 
1828  if (isFreeCall(&CI, &TLI))
1829  return visitFree(CI);
1830 
1831  // If the caller function is nounwind, mark the call as nounwind, even if the
1832  // callee isn't.
1833  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1834  CI.setDoesNotThrow();
1835  return &CI;
1836  }
1837 
1838  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1839  if (!II) return visitCallSite(&CI);
1840 
1841  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1842  // visitCallSite.
1843  if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1844  bool Changed = false;
1845 
1846  // memmove/cpy/set of zero bytes is a noop.
1847  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1848  if (NumBytes->isNullValue())
1849  return eraseInstFromFunction(CI);
1850 
1851  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1852  if (CI->getZExtValue() == 1) {
1853  // Replace the instruction with just byte operations. We would
1854  // transform other cases to loads/stores, but we don't know if
1855  // alignment is sufficient.
1856  }
1857  }
1858 
1859  // No other transformations apply to volatile transfers.
1860  if (auto *M = dyn_cast<MemIntrinsic>(MI))
1861  if (M->isVolatile())
1862  return nullptr;
1863 
1864  // If we have a memmove and the source operation is a constant global,
1865  // then the source and dest pointers can't alias, so we can change this
1866  // into a call to memcpy.
1867  if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1868  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1869  if (GVSrc->isConstant()) {
1870  Module *M = CI.getModule();
1871  Intrinsic::ID MemCpyID =
1872  isa<AtomicMemMoveInst>(MMI)
1873  ? Intrinsic::memcpy_element_unordered_atomic
1874  : Intrinsic::memcpy;
1875  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1876  CI.getArgOperand(1)->getType(),
1877  CI.getArgOperand(2)->getType() };
1878  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1879  Changed = true;
1880  }
1881  }
1882 
1883  if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1884  // memmove(x,x,size) -> noop.
1885  if (MTI->getSource() == MTI->getDest())
1886  return eraseInstFromFunction(CI);
1887  }
1888 
1889  // If we can determine a pointer alignment that is bigger than currently
1890  // set, update the alignment.
1891  if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1892  if (Instruction *I = SimplifyAnyMemTransfer(MTI))
1893  return I;
1894  } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1895  if (Instruction *I = SimplifyAnyMemSet(MSI))
1896  return I;
1897  }
1898 
1899  if (Changed) return II;
1900  }
1901 
1902  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1903  return I;
1904 
1905  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1906  unsigned DemandedWidth) {
1907  APInt UndefElts(Width, 0);
1908  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1909  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1910  };
1911 
1912  switch (II->getIntrinsicID()) {
1913  default: break;
1914  case Intrinsic::objectsize:
1915  if (ConstantInt *N =
1916  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1917  return replaceInstUsesWith(CI, N);
1918  return nullptr;
1919  case Intrinsic::bswap: {
1920  Value *IIOperand = II->getArgOperand(0);
1921  Value *X = nullptr;
1922 
1923  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1924  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1925  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1926  IIOperand->getType()->getPrimitiveSizeInBits();
1927  Value *CV = ConstantInt::get(X->getType(), C);
1928  Value *V = Builder.CreateLShr(X, CV);
1929  return new TruncInst(V, IIOperand->getType());
1930  }
1931  break;
1932  }
1933  case Intrinsic::masked_load:
1934  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1935  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1936  break;
1937  case Intrinsic::masked_store:
1938  return simplifyMaskedStore(*II, *this);
1939  case Intrinsic::masked_gather:
1940  return simplifyMaskedGather(*II, *this);
1941  case Intrinsic::masked_scatter:
1942  return simplifyMaskedScatter(*II, *this);
1943 
1944  case Intrinsic::powi:
1945  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1946  // 0 and 1 are handled in instsimplify
1947 
1948  // powi(x, -1) -> 1/x
1949  if (Power->isMinusOne())
1950  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1951  II->getArgOperand(0));
1952  // powi(x, 2) -> x*x
1953  if (Power->equalsInt(2))
1954  return BinaryOperator::CreateFMul(II->getArgOperand(0),
1955  II->getArgOperand(0));
1956  }
1957  break;
1958 
1959  case Intrinsic::cttz:
1960  case Intrinsic::ctlz:
1961  if (auto *I = foldCttzCtlz(*II, *this))
1962  return I;
1963  break;
1964 
1965  case Intrinsic::ctpop:
1966  if (auto *I = foldCtpop(*II, *this))
1967  return I;
1968  break;
1969 
1970  case Intrinsic::uadd_with_overflow:
1971  case Intrinsic::sadd_with_overflow:
1972  case Intrinsic::umul_with_overflow:
1973  case Intrinsic::smul_with_overflow:
1974  if (isa<Constant>(II->getArgOperand(0)) &&
1975  !isa<Constant>(II->getArgOperand(1))) {
1976  // Canonicalize constants into the RHS.
1977  Value *LHS = II->getArgOperand(0);
1978  II->setArgOperand(0, II->getArgOperand(1));
1979  II->setArgOperand(1, LHS);
1980  return II;
1981  }
1983 
1984  case Intrinsic::usub_with_overflow:
1985  case Intrinsic::ssub_with_overflow: {
1986  OverflowCheckFlavor OCF =
1988  assert(OCF != OCF_INVALID && "unexpected!");
1989 
1990  Value *OperationResult = nullptr;
1991  Constant *OverflowResult = nullptr;
1992  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
1993  *II, OperationResult, OverflowResult))
1994  return CreateOverflowTuple(II, OperationResult, OverflowResult);
1995 
1996  break;
1997  }
1998 
1999  case Intrinsic::minnum:
2000  case Intrinsic::maxnum: {
2001  Value *Arg0 = II->getArgOperand(0);
2002  Value *Arg1 = II->getArgOperand(1);
2003  // Canonicalize constants to the RHS.
2004  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
2005  II->setArgOperand(0, Arg1);
2006  II->setArgOperand(1, Arg0);
2007  return II;
2008  }
2009 
2010  // FIXME: Simplifications should be in instsimplify.
2011  if (Value *V = simplifyMinnumMaxnum(*II))
2012  return replaceInstUsesWith(*II, V);
2013 
2014  Value *X, *Y;
2015  if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2016  (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2017  // If both operands are negated, invert the call and negate the result:
2018  // minnum(-X, -Y) --> -(maxnum(X, Y))
2019  // maxnum(-X, -Y) --> -(minnum(X, Y))
2020  Intrinsic::ID NewIID = II->getIntrinsicID() == Intrinsic::maxnum ?
2022  Value *NewCall = Builder.CreateIntrinsic(NewIID, { X, Y }, II);
2023  Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall);
2024  FNeg->copyIRFlags(II);
2025  return FNeg;
2026  }
2027  break;
2028  }
2029  case Intrinsic::fmuladd: {
2030  // Canonicalize fast fmuladd to the separate fmul + fadd.
2031  if (II->isFast()) {
2032  BuilderTy::FastMathFlagGuard Guard(Builder);
2033  Builder.setFastMathFlags(II->getFastMathFlags());
2034  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2035  II->getArgOperand(1));
2036  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2037  Add->takeName(II);
2038  return replaceInstUsesWith(*II, Add);
2039  }
2040 
2042  }
2043  case Intrinsic::fma: {
2044  Value *Src0 = II->getArgOperand(0);
2045  Value *Src1 = II->getArgOperand(1);
2046 
2047  // Canonicalize constant multiply operand to Src1.
2048  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2049  II->setArgOperand(0, Src1);
2050  II->setArgOperand(1, Src0);
2051  std::swap(Src0, Src1);
2052  }
2053 
2054  // fma fneg(x), fneg(y), z -> fma x, y, z
2055  Value *X, *Y;
2056  if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2057  II->setArgOperand(0, X);
2058  II->setArgOperand(1, Y);
2059  return II;
2060  }
2061 
2062  // fma fabs(x), fabs(x), z -> fma x, x, z
2063  if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))) &&
2064  match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Specific(X)))) {
2065  II->setArgOperand(0, X);
2066  II->setArgOperand(1, X);
2067  return II;
2068  }
2069 
2070  // fma x, 1, z -> fadd x, z
2071  if (match(Src1, m_FPOne())) {
2072  auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2073  FAdd->copyFastMathFlags(II);
2074  return FAdd;
2075  }
2076 
2077  break;
2078  }
2079  case Intrinsic::fabs: {
2080  Value *Cond;
2081  Constant *LHS, *RHS;
2082  if (match(II->getArgOperand(0),
2083  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2084  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2085  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2086  return SelectInst::Create(Cond, Call0, Call1);
2087  }
2088 
2090  }
2091  case Intrinsic::ceil:
2092  case Intrinsic::floor:
2093  case Intrinsic::round:
2094  case Intrinsic::nearbyint:
2095  case Intrinsic::rint:
2096  case Intrinsic::trunc: {
2097  Value *ExtSrc;
2098  if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
2099  // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2100  Value *NarrowII = Builder.CreateIntrinsic(II->getIntrinsicID(),
2101  { ExtSrc }, II);
2102  return new FPExtInst(NarrowII, II->getType());
2103  }
2104  break;
2105  }
2106  case Intrinsic::cos:
2107  case Intrinsic::amdgcn_cos: {
2108  Value *SrcSrc;
2109  Value *Src = II->getArgOperand(0);
2110  if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
2111  match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
2112  // cos(-x) -> cos(x)
2113  // cos(fabs(x)) -> cos(x)
2114  II->setArgOperand(0, SrcSrc);
2115  return II;
2116  }
2117 
2118  break;
2119  }
2120  case Intrinsic::ppc_altivec_lvx:
2121  case Intrinsic::ppc_altivec_lvxl:
2122  // Turn PPC lvx -> load if the pointer is known aligned.
2123  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2124  &DT) >= 16) {
2125  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2126  PointerType::getUnqual(II->getType()));
2127  return new LoadInst(Ptr);
2128  }
2129  break;
2130  case Intrinsic::ppc_vsx_lxvw4x:
2131  case Intrinsic::ppc_vsx_lxvd2x: {
2132  // Turn PPC VSX loads into normal loads.
2133  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2134  PointerType::getUnqual(II->getType()));
2135  return new LoadInst(Ptr, Twine(""), false, 1);
2136  }
2137  case Intrinsic::ppc_altivec_stvx:
2138  case Intrinsic::ppc_altivec_stvxl:
2139  // Turn stvx -> store if the pointer is known aligned.
2140  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2141  &DT) >= 16) {
2142  Type *OpPtrTy =
2143  PointerType::getUnqual(II->getArgOperand(0)->getType());
2144  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2145  return new StoreInst(II->getArgOperand(0), Ptr);
2146  }
2147  break;
2148  case Intrinsic::ppc_vsx_stxvw4x:
2149  case Intrinsic::ppc_vsx_stxvd2x: {
2150  // Turn PPC VSX stores into normal stores.
2151  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2152  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2153  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2154  }
2155  case Intrinsic::ppc_qpx_qvlfs:
2156  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2157  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2158  &DT) >= 16) {
2159  Type *VTy = VectorType::get(Builder.getFloatTy(),
2160  II->getType()->getVectorNumElements());
2161  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2162  PointerType::getUnqual(VTy));
2163  Value *Load = Builder.CreateLoad(Ptr);
2164  return new FPExtInst(Load, II->getType());
2165  }
2166  break;
2167  case Intrinsic::ppc_qpx_qvlfd:
2168  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2169  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2170  &DT) >= 32) {
2171  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2172  PointerType::getUnqual(II->getType()));
2173  return new LoadInst(Ptr);
2174  }
2175  break;
2176  case Intrinsic::ppc_qpx_qvstfs:
2177  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2178  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2179  &DT) >= 16) {
2180  Type *VTy = VectorType::get(Builder.getFloatTy(),
2181  II->getArgOperand(0)->getType()->getVectorNumElements());
2182  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2183  Type *OpPtrTy = PointerType::getUnqual(VTy);
2184  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2185  return new StoreInst(TOp, Ptr);
2186  }
2187  break;
2188  case Intrinsic::ppc_qpx_qvstfd:
2189  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2190  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2191  &DT) >= 32) {
2192  Type *OpPtrTy =
2193  PointerType::getUnqual(II->getArgOperand(0)->getType());
2194  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2195  return new StoreInst(II->getArgOperand(0), Ptr);
2196  }
2197  break;
2198 
2199  case Intrinsic::x86_bmi_bextr_32:
2200  case Intrinsic::x86_bmi_bextr_64:
2201  case Intrinsic::x86_tbm_bextri_u32:
2202  case Intrinsic::x86_tbm_bextri_u64:
2203  // If the RHS is a constant we can try some simplifications.
2204  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2205  uint64_t Shift = C->getZExtValue();
2206  uint64_t Length = (Shift >> 8) & 0xff;
2207  Shift &= 0xff;
2208  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2209  // If the length is 0 or the shift is out of range, replace with zero.
2210  if (Length == 0 || Shift >= BitWidth)
2211  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2212  // If the LHS is also a constant, we can completely constant fold this.
2213  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2214  uint64_t Result = InC->getZExtValue() >> Shift;
2215  if (Length > BitWidth)
2216  Length = BitWidth;
2217  Result &= maskTrailingOnes<uint64_t>(Length);
2218  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2219  }
2220  // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2221  // are only masking bits that a shift already cleared?
2222  }
2223  break;
2224 
2225  case Intrinsic::x86_bmi_bzhi_32:
2226  case Intrinsic::x86_bmi_bzhi_64:
2227  // If the RHS is a constant we can try some simplifications.
2228  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2229  uint64_t Index = C->getZExtValue() & 0xff;
2230  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2231  if (Index >= BitWidth)
2232  return replaceInstUsesWith(CI, II->getArgOperand(0));
2233  if (Index == 0)
2234  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2235  // If the LHS is also a constant, we can completely constant fold this.
2236  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2237  uint64_t Result = InC->getZExtValue();
2238  Result &= maskTrailingOnes<uint64_t>(Index);
2239  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2240  }
2241  // TODO should we convert this to an AND if the RHS is constant?
2242  }
2243  break;
2244 
2245  case Intrinsic::x86_vcvtph2ps_128:
2246  case Intrinsic::x86_vcvtph2ps_256: {
2247  auto Arg = II->getArgOperand(0);
2248  auto ArgType = cast<VectorType>(Arg->getType());
2249  auto RetType = cast<VectorType>(II->getType());
2250  unsigned ArgWidth = ArgType->getNumElements();
2251  unsigned RetWidth = RetType->getNumElements();
2252  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2253  assert(ArgType->isIntOrIntVectorTy() &&
2254  ArgType->getScalarSizeInBits() == 16 &&
2255  "CVTPH2PS input type should be 16-bit integer vector");
2256  assert(RetType->getScalarType()->isFloatTy() &&
2257  "CVTPH2PS output type should be 32-bit float vector");
2258 
2259  // Constant folding: Convert to generic half to single conversion.
2260  if (isa<ConstantAggregateZero>(Arg))
2261  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2262 
2263  if (isa<ConstantDataVector>(Arg)) {
2264  auto VectorHalfAsShorts = Arg;
2265  if (RetWidth < ArgWidth) {
2266  SmallVector<uint32_t, 8> SubVecMask;
2267  for (unsigned i = 0; i != RetWidth; ++i)
2268  SubVecMask.push_back((int)i);
2269  VectorHalfAsShorts = Builder.CreateShuffleVector(
2270  Arg, UndefValue::get(ArgType), SubVecMask);
2271  }
2272 
2273  auto VectorHalfType =
2274  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2275  auto VectorHalfs =
2276  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2277  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2278  return replaceInstUsesWith(*II, VectorFloats);
2279  }
2280 
2281  // We only use the lowest lanes of the argument.
2282  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2283  II->setArgOperand(0, V);
2284  return II;
2285  }
2286  break;
2287  }
2288 
2289  case Intrinsic::x86_sse_cvtss2si:
2290  case Intrinsic::x86_sse_cvtss2si64:
2291  case Intrinsic::x86_sse_cvttss2si:
2292  case Intrinsic::x86_sse_cvttss2si64:
2293  case Intrinsic::x86_sse2_cvtsd2si:
2294  case Intrinsic::x86_sse2_cvtsd2si64:
2295  case Intrinsic::x86_sse2_cvttsd2si:
2296  case Intrinsic::x86_sse2_cvttsd2si64:
2297  case Intrinsic::x86_avx512_vcvtss2si32:
2298  case Intrinsic::x86_avx512_vcvtss2si64:
2299  case Intrinsic::x86_avx512_vcvtss2usi32:
2300  case Intrinsic::x86_avx512_vcvtss2usi64:
2301  case Intrinsic::x86_avx512_vcvtsd2si32:
2302  case Intrinsic::x86_avx512_vcvtsd2si64:
2303  case Intrinsic::x86_avx512_vcvtsd2usi32:
2304  case Intrinsic::x86_avx512_vcvtsd2usi64:
2305  case Intrinsic::x86_avx512_cvttss2si:
2306  case Intrinsic::x86_avx512_cvttss2si64:
2307  case Intrinsic::x86_avx512_cvttss2usi:
2308  case Intrinsic::x86_avx512_cvttss2usi64:
2309  case Intrinsic::x86_avx512_cvttsd2si:
2310  case Intrinsic::x86_avx512_cvttsd2si64:
2311  case Intrinsic::x86_avx512_cvttsd2usi:
2312  case Intrinsic::x86_avx512_cvttsd2usi64: {
2313  // These intrinsics only demand the 0th element of their input vectors. If
2314  // we can simplify the input based on that, do so now.
2315  Value *Arg = II->getArgOperand(0);
2316  unsigned VWidth = Arg->getType()->getVectorNumElements();
2317  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2318  II->setArgOperand(0, V);
2319  return II;
2320  }
2321  break;
2322  }
2323 
2324  case Intrinsic::x86_sse41_round_ps:
2325  case Intrinsic::x86_sse41_round_pd:
2326  case Intrinsic::x86_avx_round_ps_256:
2327  case Intrinsic::x86_avx_round_pd_256:
2328  case Intrinsic::x86_avx512_mask_rndscale_ps_128:
2329  case Intrinsic::x86_avx512_mask_rndscale_ps_256:
2330  case Intrinsic::x86_avx512_mask_rndscale_ps_512:
2331  case Intrinsic::x86_avx512_mask_rndscale_pd_128:
2332  case Intrinsic::x86_avx512_mask_rndscale_pd_256:
2333  case Intrinsic::x86_avx512_mask_rndscale_pd_512:
2334  case Intrinsic::x86_avx512_mask_rndscale_ss:
2335  case Intrinsic::x86_avx512_mask_rndscale_sd:
2336  if (Value *V = simplifyX86round(*II, Builder))
2337  return replaceInstUsesWith(*II, V);
2338  break;
2339 
2340  case Intrinsic::x86_mmx_pmovmskb:
2341  case Intrinsic::x86_sse_movmsk_ps:
2342  case Intrinsic::x86_sse2_movmsk_pd:
2343  case Intrinsic::x86_sse2_pmovmskb_128:
2344  case Intrinsic::x86_avx_movmsk_pd_256:
2345  case Intrinsic::x86_avx_movmsk_ps_256:
2346  case Intrinsic::x86_avx2_pmovmskb:
2347  if (Value *V = simplifyX86movmsk(*II))
2348  return replaceInstUsesWith(*II, V);
2349  break;
2350 
2351  case Intrinsic::x86_sse_comieq_ss:
2352  case Intrinsic::x86_sse_comige_ss:
2353  case Intrinsic::x86_sse_comigt_ss:
2354  case Intrinsic::x86_sse_comile_ss:
2355  case Intrinsic::x86_sse_comilt_ss:
2356  case Intrinsic::x86_sse_comineq_ss:
2357  case Intrinsic::x86_sse_ucomieq_ss:
2358  case Intrinsic::x86_sse_ucomige_ss:
2359  case Intrinsic::x86_sse_ucomigt_ss:
2360  case Intrinsic::x86_sse_ucomile_ss:
2361  case Intrinsic::x86_sse_ucomilt_ss:
2362  case Intrinsic::x86_sse_ucomineq_ss:
2363  case Intrinsic::x86_sse2_comieq_sd:
2364  case Intrinsic::x86_sse2_comige_sd:
2365  case Intrinsic::x86_sse2_comigt_sd:
2366  case Intrinsic::x86_sse2_comile_sd:
2367  case Intrinsic::x86_sse2_comilt_sd:
2368  case Intrinsic::x86_sse2_comineq_sd:
2369  case Intrinsic::x86_sse2_ucomieq_sd:
2370  case Intrinsic::x86_sse2_ucomige_sd:
2371  case Intrinsic::x86_sse2_ucomigt_sd:
2372  case Intrinsic::x86_sse2_ucomile_sd:
2373  case Intrinsic::x86_sse2_ucomilt_sd:
2374  case Intrinsic::x86_sse2_ucomineq_sd:
2375  case Intrinsic::x86_avx512_vcomi_ss:
2376  case Intrinsic::x86_avx512_vcomi_sd:
2377  case Intrinsic::x86_avx512_mask_cmp_ss:
2378  case Intrinsic::x86_avx512_mask_cmp_sd: {
2379  // These intrinsics only demand the 0th element of their input vectors. If
2380  // we can simplify the input based on that, do so now.
2381  bool MadeChange = false;
2382  Value *Arg0 = II->getArgOperand(0);
2383  Value *Arg1 = II->getArgOperand(1);
2384  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2385  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2386  II->setArgOperand(0, V);
2387  MadeChange = true;
2388  }
2389  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2390  II->setArgOperand(1, V);
2391  MadeChange = true;
2392  }
2393  if (MadeChange)
2394  return II;
2395  break;
2396  }
2397  case Intrinsic::x86_avx512_mask_cmp_pd_128:
2398  case Intrinsic::x86_avx512_mask_cmp_pd_256:
2399  case Intrinsic::x86_avx512_mask_cmp_pd_512:
2400  case Intrinsic::x86_avx512_mask_cmp_ps_128:
2401  case Intrinsic::x86_avx512_mask_cmp_ps_256:
2402  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
2403  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2404  Value *Arg0 = II->getArgOperand(0);
2405  Value *Arg1 = II->getArgOperand(1);
2406  bool Arg0IsZero = match(Arg0, m_PosZeroFP());
2407  if (Arg0IsZero)
2408  std::swap(Arg0, Arg1);
2409  Value *A, *B;
2410  // This fold requires only the NINF(not +/- inf) since inf minus
2411  // inf is nan.
2412  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2413  // equal for both compares.
2414  // NNAN is not needed because nans compare the same for both compares.
2415  // The compare intrinsic uses the above assumptions and therefore
2416  // doesn't require additional flags.
2417  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2418  match(Arg1, m_PosZeroFP()) && isa<Instruction>(Arg0) &&
2419  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2420  if (Arg0IsZero)
2421  std::swap(A, B);
2422  II->setArgOperand(0, A);
2423  II->setArgOperand(1, B);
2424  return II;
2425  }
2426  break;
2427  }
2428 
2429  case Intrinsic::x86_avx512_add_ps_512:
2430  case Intrinsic::x86_avx512_div_ps_512:
2431  case Intrinsic::x86_avx512_mul_ps_512:
2432  case Intrinsic::x86_avx512_sub_ps_512:
2433  case Intrinsic::x86_avx512_add_pd_512:
2434  case Intrinsic::x86_avx512_div_pd_512:
2435  case Intrinsic::x86_avx512_mul_pd_512:
2436  case Intrinsic::x86_avx512_sub_pd_512:
2437  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2438  // IR operations.
2439  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2440  if (R->getValue() == 4) {
2441  Value *Arg0 = II->getArgOperand(0);
2442  Value *Arg1 = II->getArgOperand(1);
2443 
2444  Value *V;
2445  switch (II->getIntrinsicID()) {
2446  default: llvm_unreachable("Case stmts out of sync!");
2447  case Intrinsic::x86_avx512_add_ps_512:
2448  case Intrinsic::x86_avx512_add_pd_512:
2449  V = Builder.CreateFAdd(Arg0, Arg1);
2450  break;
2451  case Intrinsic::x86_avx512_sub_ps_512:
2452  case Intrinsic::x86_avx512_sub_pd_512:
2453  V = Builder.CreateFSub(Arg0, Arg1);
2454  break;
2455  case Intrinsic::x86_avx512_mul_ps_512:
2456  case Intrinsic::x86_avx512_mul_pd_512:
2457  V = Builder.CreateFMul(Arg0, Arg1);
2458  break;
2459  case Intrinsic::x86_avx512_div_ps_512:
2460  case Intrinsic::x86_avx512_div_pd_512:
2461  V = Builder.CreateFDiv(Arg0, Arg1);
2462  break;
2463  }
2464 
2465  return replaceInstUsesWith(*II, V);
2466  }
2467  }
2468  break;
2469 
2470  case Intrinsic::x86_avx512_mask_add_ss_round:
2471  case Intrinsic::x86_avx512_mask_div_ss_round:
2472  case Intrinsic::x86_avx512_mask_mul_ss_round:
2473  case Intrinsic::x86_avx512_mask_sub_ss_round:
2474  case Intrinsic::x86_avx512_mask_add_sd_round:
2475  case Intrinsic::x86_avx512_mask_div_sd_round:
2476  case Intrinsic::x86_avx512_mask_mul_sd_round:
2477  case Intrinsic::x86_avx512_mask_sub_sd_round:
2478  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2479  // IR operations.
2480  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2481  if (R->getValue() == 4) {
2482  // Extract the element as scalars.
2483  Value *Arg0 = II->getArgOperand(0);
2484  Value *Arg1 = II->getArgOperand(1);
2485  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2486  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2487 
2488  Value *V;
2489  switch (II->getIntrinsicID()) {
2490  default: llvm_unreachable("Case stmts out of sync!");
2491  case Intrinsic::x86_avx512_mask_add_ss_round:
2492  case Intrinsic::x86_avx512_mask_add_sd_round:
2493  V = Builder.CreateFAdd(LHS, RHS);
2494  break;
2495  case Intrinsic::x86_avx512_mask_sub_ss_round:
2496  case Intrinsic::x86_avx512_mask_sub_sd_round:
2497  V = Builder.CreateFSub(LHS, RHS);
2498  break;
2499  case Intrinsic::x86_avx512_mask_mul_ss_round:
2500  case Intrinsic::x86_avx512_mask_mul_sd_round:
2501  V = Builder.CreateFMul(LHS, RHS);
2502  break;
2503  case Intrinsic::x86_avx512_mask_div_ss_round:
2504  case Intrinsic::x86_avx512_mask_div_sd_round:
2505  V = Builder.CreateFDiv(LHS, RHS);
2506  break;
2507  }
2508 
2509  // Handle the masking aspect of the intrinsic.
2510  Value *Mask = II->getArgOperand(3);
2511  auto *C = dyn_cast<ConstantInt>(Mask);
2512  // We don't need a select if we know the mask bit is a 1.
2513  if (!C || !C->getValue()[0]) {
2514  // Cast the mask to an i1 vector and then extract the lowest element.
2515  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2516  cast<IntegerType>(Mask->getType())->getBitWidth());
2517  Mask = Builder.CreateBitCast(Mask, MaskTy);
2518  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2519  // Extract the lowest element from the passthru operand.
2520  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2521  (uint64_t)0);
2522  V = Builder.CreateSelect(Mask, V, Passthru);
2523  }
2524 
2525  // Insert the result back into the original argument 0.
2526  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2527 
2528  return replaceInstUsesWith(*II, V);
2529  }
2530  }
2532 
2533  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2534  case Intrinsic::x86_avx512_mask_max_ss_round:
2535  case Intrinsic::x86_avx512_mask_min_ss_round:
2536  case Intrinsic::x86_avx512_mask_max_sd_round:
2537  case Intrinsic::x86_avx512_mask_min_sd_round:
2538  case Intrinsic::x86_avx512_mask_vfmadd_ss:
2539  case Intrinsic::x86_avx512_mask_vfmadd_sd:
2540  case Intrinsic::x86_avx512_maskz_vfmadd_ss:
2541  case Intrinsic::x86_avx512_maskz_vfmadd_sd:
2542  case Intrinsic::x86_avx512_mask3_vfmadd_ss:
2543  case Intrinsic::x86_avx512_mask3_vfmadd_sd:
2544  case Intrinsic::x86_avx512_mask3_vfmsub_ss:
2545  case Intrinsic::x86_avx512_mask3_vfmsub_sd:
2546  case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
2547  case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
2548  case Intrinsic::x86_fma_vfmadd_ss:
2549  case Intrinsic::x86_fma_vfmadd_sd:
2550  case Intrinsic::x86_sse_cmp_ss:
2551  case Intrinsic::x86_sse_min_ss:
2552  case Intrinsic::x86_sse_max_ss:
2553  case Intrinsic::x86_sse2_cmp_sd:
2554  case Intrinsic::x86_sse2_min_sd:
2555  case Intrinsic::x86_sse2_max_sd:
2556  case Intrinsic::x86_xop_vfrcz_ss:
2557  case Intrinsic::x86_xop_vfrcz_sd: {
2558  unsigned VWidth = II->getType()->getVectorNumElements();
2559  APInt UndefElts(VWidth, 0);
2560  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2561  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2562  if (V != II)
2563  return replaceInstUsesWith(*II, V);
2564  return II;
2565  }
2566  break;
2567  }
2568  case Intrinsic::x86_sse41_round_ss:
2569  case Intrinsic::x86_sse41_round_sd: {
2570  unsigned VWidth = II->getType()->getVectorNumElements();
2571  APInt UndefElts(VWidth, 0);
2572  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2573  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2574  if (V != II)
2575  return replaceInstUsesWith(*II, V);
2576  return II;
2577  } else if (Value *V = simplifyX86round(*II, Builder))
2578  return replaceInstUsesWith(*II, V);
2579  break;
2580  }
2581 
2582  // Constant fold ashr( <A x Bi>, Ci ).
2583  // Constant fold lshr( <A x Bi>, Ci ).
2584  // Constant fold shl( <A x Bi>, Ci ).
2585  case Intrinsic::x86_sse2_psrai_d:
2586  case Intrinsic::x86_sse2_psrai_w:
2587  case Intrinsic::x86_avx2_psrai_d:
2588  case Intrinsic::x86_avx2_psrai_w:
2589  case Intrinsic::x86_avx512_psrai_q_128:
2590  case Intrinsic::x86_avx512_psrai_q_256:
2591  case Intrinsic::x86_avx512_psrai_d_512:
2592  case Intrinsic::x86_avx512_psrai_q_512:
2593  case Intrinsic::x86_avx512_psrai_w_512:
2594  case Intrinsic::x86_sse2_psrli_d:
2595  case Intrinsic::x86_sse2_psrli_q:
2596  case Intrinsic::x86_sse2_psrli_w:
2597  case Intrinsic::x86_avx2_psrli_d:
2598  case Intrinsic::x86_avx2_psrli_q:
2599  case Intrinsic::x86_avx2_psrli_w:
2600  case Intrinsic::x86_avx512_psrli_d_512:
2601  case Intrinsic::x86_avx512_psrli_q_512:
2602  case Intrinsic::x86_avx512_psrli_w_512:
2603  case Intrinsic::x86_sse2_pslli_d:
2604  case Intrinsic::x86_sse2_pslli_q:
2605  case Intrinsic::x86_sse2_pslli_w:
2606  case Intrinsic::x86_avx2_pslli_d:
2607  case Intrinsic::x86_avx2_pslli_q:
2608  case Intrinsic::x86_avx2_pslli_w:
2609  case Intrinsic::x86_avx512_pslli_d_512:
2610  case Intrinsic::x86_avx512_pslli_q_512:
2611  case Intrinsic::x86_avx512_pslli_w_512:
2612  if (Value *V = simplifyX86immShift(*II, Builder))
2613  return replaceInstUsesWith(*II, V);
2614  break;
2615 
2616  case Intrinsic::x86_sse2_psra_d:
2617  case Intrinsic::x86_sse2_psra_w:
2618  case Intrinsic::x86_avx2_psra_d:
2619  case Intrinsic::x86_avx2_psra_w:
2620  case Intrinsic::x86_avx512_psra_q_128:
2621  case Intrinsic::x86_avx512_psra_q_256:
2622  case Intrinsic::x86_avx512_psra_d_512:
2623  case Intrinsic::x86_avx512_psra_q_512:
2624  case Intrinsic::x86_avx512_psra_w_512:
2625  case Intrinsic::x86_sse2_psrl_d:
2626  case Intrinsic::x86_sse2_psrl_q:
2627  case Intrinsic::x86_sse2_psrl_w:
2628  case Intrinsic::x86_avx2_psrl_d:
2629  case Intrinsic::x86_avx2_psrl_q:
2630  case Intrinsic::x86_avx2_psrl_w:
2631  case Intrinsic::x86_avx512_psrl_d_512:
2632  case Intrinsic::x86_avx512_psrl_q_512:
2633  case Intrinsic::x86_avx512_psrl_w_512:
2634  case Intrinsic::x86_sse2_psll_d:
2635  case Intrinsic::x86_sse2_psll_q:
2636  case Intrinsic::x86_sse2_psll_w:
2637  case Intrinsic::x86_avx2_psll_d:
2638  case Intrinsic::x86_avx2_psll_q:
2639  case Intrinsic::x86_avx2_psll_w:
2640  case Intrinsic::x86_avx512_psll_d_512:
2641  case Intrinsic::x86_avx512_psll_q_512:
2642  case Intrinsic::x86_avx512_psll_w_512: {
2643  if (Value *V = simplifyX86immShift(*II, Builder))
2644  return replaceInstUsesWith(*II, V);
2645 
2646  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2647  // operand to compute the shift amount.
2648  Value *Arg1 = II->getArgOperand(1);
2649  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2650  "Unexpected packed shift size");
2651  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2652 
2653  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2654  II->setArgOperand(1, V);
2655  return II;
2656  }
2657  break;
2658  }
2659 
2660  case Intrinsic::x86_avx2_psllv_d:
2661  case Intrinsic::x86_avx2_psllv_d_256:
2662  case Intrinsic::x86_avx2_psllv_q:
2663  case Intrinsic::x86_avx2_psllv_q_256:
2664  case Intrinsic::x86_avx512_psllv_d_512:
2665  case Intrinsic::x86_avx512_psllv_q_512:
2666  case Intrinsic::x86_avx512_psllv_w_128:
2667  case Intrinsic::x86_avx512_psllv_w_256:
2668  case Intrinsic::x86_avx512_psllv_w_512:
2669  case Intrinsic::x86_avx2_psrav_d:
2670  case Intrinsic::x86_avx2_psrav_d_256:
2671  case Intrinsic::x86_avx512_psrav_q_128:
2672  case Intrinsic::x86_avx512_psrav_q_256:
2673  case Intrinsic::x86_avx512_psrav_d_512:
2674  case Intrinsic::x86_avx512_psrav_q_512:
2675  case Intrinsic::x86_avx512_psrav_w_128:
2676  case Intrinsic::x86_avx512_psrav_w_256:
2677  case Intrinsic::x86_avx512_psrav_w_512:
2678  case Intrinsic::x86_avx2_psrlv_d:
2679  case Intrinsic::x86_avx2_psrlv_d_256:
2680  case Intrinsic::x86_avx2_psrlv_q:
2681  case Intrinsic::x86_avx2_psrlv_q_256:
2682  case Intrinsic::x86_avx512_psrlv_d_512:
2683  case Intrinsic::x86_avx512_psrlv_q_512:
2684  case Intrinsic::x86_avx512_psrlv_w_128:
2685  case Intrinsic::x86_avx512_psrlv_w_256:
2686  case Intrinsic::x86_avx512_psrlv_w_512:
2687  if (Value *V = simplifyX86varShift(*II, Builder))
2688  return replaceInstUsesWith(*II, V);
2689  break;
2690 
2691  case Intrinsic::x86_sse2_packssdw_128:
2692  case Intrinsic::x86_sse2_packsswb_128:
2693  case Intrinsic::x86_avx2_packssdw:
2694  case Intrinsic::x86_avx2_packsswb:
2695  case Intrinsic::x86_avx512_packssdw_512:
2696  case Intrinsic::x86_avx512_packsswb_512:
2697  if (Value *V = simplifyX86pack(*II, true))
2698  return replaceInstUsesWith(*II, V);
2699  break;
2700 
2701  case Intrinsic::x86_sse2_packuswb_128:
2702  case Intrinsic::x86_sse41_packusdw:
2703  case Intrinsic::x86_avx2_packusdw:
2704  case Intrinsic::x86_avx2_packuswb:
2705  case Intrinsic::x86_avx512_packusdw_512:
2706  case Intrinsic::x86_avx512_packuswb_512:
2707  if (Value *V = simplifyX86pack(*II, false))
2708  return replaceInstUsesWith(*II, V);
2709  break;
2710 
2711  case Intrinsic::x86_pclmulqdq:
2712  case Intrinsic::x86_pclmulqdq_256:
2713  case Intrinsic::x86_pclmulqdq_512: {
2714  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2715  unsigned Imm = C->getZExtValue();
2716 
2717  bool MadeChange = false;
2718  Value *Arg0 = II->getArgOperand(0);
2719  Value *Arg1 = II->getArgOperand(1);
2720  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2721 
2722  APInt UndefElts1(VWidth, 0);
2723  APInt DemandedElts1 = APInt::getSplat(VWidth,
2724  APInt(2, (Imm & 0x01) ? 2 : 1));
2725  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,
2726  UndefElts1)) {
2727  II->setArgOperand(0, V);
2728  MadeChange = true;
2729  }
2730 
2731  APInt UndefElts2(VWidth, 0);
2732  APInt DemandedElts2 = APInt::getSplat(VWidth,
2733  APInt(2, (Imm & 0x10) ? 2 : 1));
2734  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,
2735  UndefElts2)) {
2736  II->setArgOperand(1, V);
2737  MadeChange = true;
2738  }
2739 
2740  // If either input elements are undef, the result is zero.
2741  if (DemandedElts1.isSubsetOf(UndefElts1) ||
2742  DemandedElts2.isSubsetOf(UndefElts2))
2743  return replaceInstUsesWith(*II,
2744  ConstantAggregateZero::get(II->getType()));
2745 
2746  if (MadeChange)
2747  return II;
2748  }
2749  break;
2750  }
2751 
2752  case Intrinsic::x86_sse41_insertps:
2753  if (Value *V = simplifyX86insertps(*II, Builder))
2754  return replaceInstUsesWith(*II, V);
2755  break;
2756 
2757  case Intrinsic::x86_sse4a_extrq: {
2758  Value *Op0 = II->getArgOperand(0);
2759  Value *Op1 = II->getArgOperand(1);
2760  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2761  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2762  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2763  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2764  VWidth1 == 16 && "Unexpected operand sizes");
2765 
2766  // See if we're dealing with constant values.
2767  Constant *C1 = dyn_cast<Constant>(Op1);
2768  ConstantInt *CILength =
2769  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2770  : nullptr;
2771  ConstantInt *CIIndex =
2772  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2773  : nullptr;
2774 
2775  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2776  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2777  return replaceInstUsesWith(*II, V);
2778 
2779  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2780  // operands and the lowest 16-bits of the second.
2781  bool MadeChange = false;
2782  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2783  II->setArgOperand(0, V);
2784  MadeChange = true;
2785  }
2786  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2787  II->setArgOperand(1, V);
2788  MadeChange = true;
2789  }
2790  if (MadeChange)
2791  return II;
2792  break;
2793  }
2794 
2795  case Intrinsic::x86_sse4a_extrqi: {
2796  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2797  // bits of the lower 64-bits. The upper 64-bits are undefined.
2798  Value *Op0 = II->getArgOperand(0);
2799  unsigned VWidth = Op0->getType()->getVectorNumElements();
2800  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2801  "Unexpected operand size");
2802 
2803  // See if we're dealing with constant values.
2804  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2805  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2806 
2807  // Attempt to simplify to a constant or shuffle vector.
2808  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2809  return replaceInstUsesWith(*II, V);
2810 
2811  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2812  // operand.
2813  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2814  II->setArgOperand(0, V);
2815  return II;
2816  }
2817  break;
2818  }
2819 
2820  case Intrinsic::x86_sse4a_insertq: {
2821  Value *Op0 = II->getArgOperand(0);
2822  Value *Op1 = II->getArgOperand(1);
2823  unsigned VWidth = Op0->getType()->getVectorNumElements();
2824  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2825  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2826  Op1->getType()->getVectorNumElements() == 2 &&
2827  "Unexpected operand size");
2828 
2829  // See if we're dealing with constant values.
2830  Constant *C1 = dyn_cast<Constant>(Op1);
2831  ConstantInt *CI11 =
2832  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2833  : nullptr;
2834 
2835  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2836  if (CI11) {
2837  const APInt &V11 = CI11->getValue();
2838  APInt Len = V11.zextOrTrunc(6);
2839  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2840  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2841  return replaceInstUsesWith(*II, V);
2842  }
2843 
2844  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2845  // operand.
2846  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2847  II->setArgOperand(0, V);
2848  return II;
2849  }
2850  break;
2851  }
2852 
2853  case Intrinsic::x86_sse4a_insertqi: {
2854  // INSERTQI: Extract lowest Length bits from lower half of second source and
2855  // insert over first source starting at Index bit. The upper 64-bits are
2856  // undefined.
2857  Value *Op0 = II->getArgOperand(0);
2858  Value *Op1 = II->getArgOperand(1);
2859  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2860  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2861  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2862  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2863  VWidth1 == 2 && "Unexpected operand sizes");
2864 
2865  // See if we're dealing with constant values.
2866  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2867  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2868 
2869  // Attempt to simplify to a constant or shuffle vector.
2870  if (CILength && CIIndex) {
2871  APInt Len = CILength->getValue().zextOrTrunc(6);
2872  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2873  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2874  return replaceInstUsesWith(*II, V);
2875  }
2876 
2877  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2878  // operands.
2879  bool MadeChange = false;
2880  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2881  II->setArgOperand(0, V);
2882  MadeChange = true;
2883  }
2884  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2885  II->setArgOperand(1, V);
2886  MadeChange = true;
2887  }
2888  if (MadeChange)
2889  return II;
2890  break;
2891  }
2892 
2893  case Intrinsic::x86_sse41_pblendvb:
2894  case Intrinsic::x86_sse41_blendvps:
2895  case Intrinsic::x86_sse41_blendvpd:
2896  case Intrinsic::x86_avx_blendv_ps_256:
2897  case Intrinsic::x86_avx_blendv_pd_256:
2898  case Intrinsic::x86_avx2_pblendvb: {
2899  // Convert blendv* to vector selects if the mask is constant.
2900  // This optimization is convoluted because the intrinsic is defined as
2901  // getting a vector of floats or doubles for the ps and pd versions.
2902  // FIXME: That should be changed.
2903 
2904  Value *Op0 = II->getArgOperand(0);
2905  Value *Op1 = II->getArgOperand(1);
2906  Value *Mask = II->getArgOperand(2);
2907 
2908  // fold (blend A, A, Mask) -> A
2909  if (Op0 == Op1)
2910  return replaceInstUsesWith(CI, Op0);
2911 
2912  // Zero Mask - select 1st argument.
2913  if (isa<ConstantAggregateZero>(Mask))
2914  return replaceInstUsesWith(CI, Op0);
2915 
2916  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2917  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2918  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2919  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2920  }
2921  break;
2922  }
2923 
2924  case Intrinsic::x86_ssse3_pshuf_b_128:
2925  case Intrinsic::x86_avx2_pshuf_b:
2926  case Intrinsic::x86_avx512_pshuf_b_512:
2927  if (Value *V = simplifyX86pshufb(*II, Builder))
2928  return replaceInstUsesWith(*II, V);
2929  break;
2930 
2931  case Intrinsic::x86_avx_vpermilvar_ps:
2932  case Intrinsic::x86_avx_vpermilvar_ps_256:
2933  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2934  case Intrinsic::x86_avx_vpermilvar_pd:
2935  case Intrinsic::x86_avx_vpermilvar_pd_256:
2936  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2937  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2938  return replaceInstUsesWith(*II, V);
2939  break;
2940 
2941  case Intrinsic::x86_avx2_permd:
2942  case Intrinsic::x86_avx2_permps:
2943  case Intrinsic::x86_avx512_permvar_df_256:
2944  case Intrinsic::x86_avx512_permvar_df_512:
2945  case Intrinsic::x86_avx512_permvar_di_256:
2946  case Intrinsic::x86_avx512_permvar_di_512:
2947  case Intrinsic::x86_avx512_permvar_hi_128:
2948  case Intrinsic::x86_avx512_permvar_hi_256:
2949  case Intrinsic::x86_avx512_permvar_hi_512:
2950  case Intrinsic::x86_avx512_permvar_qi_128:
2951  case Intrinsic::x86_avx512_permvar_qi_256:
2952  case Intrinsic::x86_avx512_permvar_qi_512:
2953  case Intrinsic::x86_avx512_permvar_sf_512:
2954  case Intrinsic::x86_avx512_permvar_si_512:
2955  if (Value *V = simplifyX86vpermv(*II, Builder))
2956  return replaceInstUsesWith(*II, V);
2957  break;
2958 
2959  case Intrinsic::x86_avx_maskload_ps:
2960  case Intrinsic::x86_avx_maskload_pd:
2961  case Intrinsic::x86_avx_maskload_ps_256:
2962  case Intrinsic::x86_avx_maskload_pd_256:
2963  case Intrinsic::x86_avx2_maskload_d:
2964  case Intrinsic::x86_avx2_maskload_q:
2965  case Intrinsic::x86_avx2_maskload_d_256:
2966  case Intrinsic::x86_avx2_maskload_q_256:
2967  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
2968  return I;
2969  break;
2970 
2971  case Intrinsic::x86_sse2_maskmov_dqu:
2972  case Intrinsic::x86_avx_maskstore_ps:
2973  case Intrinsic::x86_avx_maskstore_pd:
2974  case Intrinsic::x86_avx_maskstore_ps_256:
2975  case Intrinsic::x86_avx_maskstore_pd_256:
2976  case Intrinsic::x86_avx2_maskstore_d:
2977  case Intrinsic::x86_avx2_maskstore_q:
2978  case Intrinsic::x86_avx2_maskstore_d_256:
2979  case Intrinsic::x86_avx2_maskstore_q_256:
2980  if (simplifyX86MaskedStore(*II, *this))
2981  return nullptr;
2982  break;
2983 
2984  case Intrinsic::x86_xop_vpcomb:
2985  case Intrinsic::x86_xop_vpcomd:
2986  case Intrinsic::x86_xop_vpcomq:
2987  case Intrinsic::x86_xop_vpcomw:
2988  if (Value *V = simplifyX86vpcom(*II, Builder, true))
2989  return replaceInstUsesWith(*II, V);
2990  break;
2991 
2992  case Intrinsic::x86_xop_vpcomub:
2993  case Intrinsic::x86_xop_vpcomud:
2994  case Intrinsic::x86_xop_vpcomuq:
2995  case Intrinsic::x86_xop_vpcomuw:
2996  if (Value *V = simplifyX86vpcom(*II, Builder, false))
2997  return replaceInstUsesWith(*II, V);
2998  break;
2999 
3000  case Intrinsic::ppc_altivec_vperm:
3001  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
3002  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
3003  // a vectorshuffle for little endian, we must undo the transformation
3004  // performed on vec_perm in altivec.h. That is, we must complement
3005  // the permutation mask with respect to 31 and reverse the order of
3006  // V1 and V2.
3007  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
3008  assert(Mask->getType()->getVectorNumElements() == 16 &&
3009  "Bad type for intrinsic!");
3010 
3011  // Check that all of the elements are integer constants or undefs.
3012  bool AllEltsOk = true;
3013  for (unsigned i = 0; i != 16; ++i) {
3014  Constant *Elt = Mask->getAggregateElement(i);
3015  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3016  AllEltsOk = false;
3017  break;
3018  }
3019  }
3020 
3021  if (AllEltsOk) {
3022  // Cast the input vectors to byte vectors.
3023  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3024  Mask->getType());
3025  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3026  Mask->getType());
3027  Value *Result = UndefValue::get(Op0->getType());
3028 
3029  // Only extract each element once.
3030  Value *ExtractedElts[32];
3031  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3032 
3033  for (unsigned i = 0; i != 16; ++i) {
3034  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3035  continue;
3036  unsigned Idx =
3037  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3038  Idx &= 31; // Match the hardware behavior.
3039  if (DL.isLittleEndian())
3040  Idx = 31 - Idx;
3041 
3042  if (!ExtractedElts[Idx]) {
3043  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3044  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3045  ExtractedElts[Idx] =
3046  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3047  Builder.getInt32(Idx&15));
3048  }
3049 
3050  // Insert this value into the result vector.
3051  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3052  Builder.getInt32(i));
3053  }
3054  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3055  }
3056  }
3057  break;
3058 
3059  case Intrinsic::arm_neon_vld1: {
3060  unsigned MemAlign = getKnownAlignment(II->getArgOperand(0),
3061  DL, II, &AC, &DT);
3062  if (Value *V = simplifyNeonVld1(*II, MemAlign, Builder))
3063  return replaceInstUsesWith(*II, V);
3064  break;
3065  }
3066 
3067  case Intrinsic::arm_neon_vld2:
3068  case Intrinsic::arm_neon_vld3:
3069  case Intrinsic::arm_neon_vld4:
3070  case Intrinsic::arm_neon_vld2lane:
3071  case Intrinsic::arm_neon_vld3lane:
3072  case Intrinsic::arm_neon_vld4lane:
3073  case Intrinsic::arm_neon_vst1:
3074  case Intrinsic::arm_neon_vst2:
3075  case Intrinsic::arm_neon_vst3:
3076  case Intrinsic::arm_neon_vst4:
3077  case Intrinsic::arm_neon_vst2lane:
3078  case Intrinsic::arm_neon_vst3lane:
3079  case Intrinsic::arm_neon_vst4lane: {
3080  unsigned MemAlign =
3081  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3082  unsigned AlignArg = II->getNumArgOperands() - 1;
3083  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3084  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3085  II->setArgOperand(AlignArg,
3086  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3087  MemAlign, false));
3088  return II;
3089  }
3090  break;
3091  }
3092 
3093  case Intrinsic::arm_neon_vtbl1:
3094  case Intrinsic::aarch64_neon_tbl1:
3095  if (Value *V = simplifyNeonTbl1(*II, Builder))
3096  return replaceInstUsesWith(*II, V);
3097  break;
3098 
3099  case Intrinsic::arm_neon_vmulls:
3100  case Intrinsic::arm_neon_vmullu:
3101  case Intrinsic::aarch64_neon_smull:
3102  case Intrinsic::aarch64_neon_umull: {
3103  Value *Arg0 = II->getArgOperand(0);
3104  Value *Arg1 = II->getArgOperand(1);
3105 
3106  // Handle mul by zero first:
3107  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3108  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3109  }
3110 
3111  // Check for constant LHS & RHS - in this case we just simplify.
3112  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3113  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3114  VectorType *NewVT = cast<VectorType>(II->getType());
3115  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3116  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3117  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3118  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3119 
3120  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3121  }
3122 
3123  // Couldn't simplify - canonicalize constant to the RHS.
3124  std::swap(Arg0, Arg1);
3125  }
3126 
3127  // Handle mul by one:
3128  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3129  if (ConstantInt *Splat =
3130  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3131  if (Splat->isOne())
3132  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3133  /*isSigned=*/!Zext);
3134 
3135  break;
3136  }
3137  case Intrinsic::arm_neon_aesd:
3138  case Intrinsic::arm_neon_aese:
3139  case Intrinsic::aarch64_crypto_aesd:
3140  case Intrinsic::aarch64_crypto_aese: {
3141  Value *DataArg = II->getArgOperand(0);
3142  Value *KeyArg = II->getArgOperand(1);
3143 
3144  // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3145  Value *Data, *Key;
3146  if (match(KeyArg, m_ZeroInt()) &&
3147  match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3148  II->setArgOperand(0, Data);
3149  II->setArgOperand(1, Key);
3150  return II;
3151  }
3152  break;
3153  }
3154  case Intrinsic::amdgcn_rcp: {
3155  Value *Src = II->getArgOperand(0);
3156 
3157  // TODO: Move to ConstantFolding/InstSimplify?
3158  if (isa<UndefValue>(Src))
3159  return replaceInstUsesWith(CI, Src);
3160 
3161  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3162  const APFloat &ArgVal = C->getValueAPF();
3163  APFloat Val(ArgVal.getSemantics(), 1.0);
3164  APFloat::opStatus Status = Val.divide(ArgVal,
3166  // Only do this if it was exact and therefore not dependent on the
3167  // rounding mode.
3168  if (Status == APFloat::opOK)
3169  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3170  }
3171 
3172  break;
3173  }
3174  case Intrinsic::amdgcn_rsq: {
3175  Value *Src = II->getArgOperand(0);
3176 
3177  // TODO: Move to ConstantFolding/InstSimplify?
3178  if (isa<UndefValue>(Src))
3179  return replaceInstUsesWith(CI, Src);
3180  break;
3181  }
3182  case Intrinsic::amdgcn_frexp_mant:
3183  case Intrinsic::amdgcn_frexp_exp: {
3184  Value *Src = II->getArgOperand(0);
3185  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3186  int Exp;
3187  APFloat Significand = frexp(C->getValueAPF(), Exp,
3189 
3190  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3191  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3192  Significand));
3193  }
3194 
3195  // Match instruction special case behavior.
3196  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3197  Exp = 0;
3198 
3199  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3200  }
3201 
3202  if (isa<UndefValue>(Src))
3203  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3204 
3205  break;
3206  }
3207  case Intrinsic::amdgcn_class: {
3208  enum {
3209  S_NAN = 1 << 0, // Signaling NaN
3210  Q_NAN = 1 << 1, // Quiet NaN
3211  N_INFINITY = 1 << 2, // Negative infinity
3212  N_NORMAL = 1 << 3, // Negative normal
3213  N_SUBNORMAL = 1 << 4, // Negative subnormal
3214  N_ZERO = 1 << 5, // Negative zero
3215  P_ZERO = 1 << 6, // Positive zero
3216  P_SUBNORMAL = 1 << 7, // Positive subnormal
3217  P_NORMAL = 1 << 8, // Positive normal
3218  P_INFINITY = 1 << 9 // Positive infinity
3219  };
3220 
3221  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3223 
3224  Value *Src0 = II->getArgOperand(0);
3225  Value *Src1 = II->getArgOperand(1);
3226  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3227  if (!CMask) {
3228  if (isa<UndefValue>(Src0))
3229  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3230 
3231  if (isa<UndefValue>(Src1))
3232  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3233  break;
3234  }
3235 
3236  uint32_t Mask = CMask->getZExtValue();
3237 
3238  // If all tests are made, it doesn't matter what the value is.
3239  if ((Mask & FullMask) == FullMask)
3240  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3241 
3242  if ((Mask & FullMask) == 0)
3243  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3244 
3245  if (Mask == (S_NAN | Q_NAN)) {
3246  // Equivalent of isnan. Replace with standard fcmp.
3247  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3248  FCmp->takeName(II);
3249  return replaceInstUsesWith(*II, FCmp);
3250  }
3251 
3252  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3253  if (!CVal) {
3254  if (isa<UndefValue>(Src0))
3255  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3256 
3257  // Clamp mask to used bits
3258  if ((Mask & FullMask) != Mask) {
3259  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3260  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3261  );
3262 
3263  NewCall->takeName(II);
3264  return replaceInstUsesWith(*II, NewCall);
3265  }
3266 
3267  break;
3268  }
3269 
3270  const APFloat &Val = CVal->getValueAPF();
3271 
3272  bool Result =
3273  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3274  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3275  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3276  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3277  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3278  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3279  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3280  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3281  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3282  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3283 
3284  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3285  }
3286  case Intrinsic::amdgcn_cvt_pkrtz: {
3287  Value *Src0 = II->getArgOperand(0);
3288  Value *Src1 = II->getArgOperand(1);
3289  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3290  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3291  const fltSemantics &HalfSem
3292  = II->getType()->getScalarType()->getFltSemantics();
3293  bool LosesInfo;
3294  APFloat Val0 = C0->getValueAPF();
3295  APFloat Val1 = C1->getValueAPF();
3296  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3297  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3298 
3299  Constant *Folded = ConstantVector::get({
3300  ConstantFP::get(II->getContext(), Val0),
3301  ConstantFP::get(II->getContext(), Val1) });
3302  return replaceInstUsesWith(*II, Folded);
3303  }
3304  }
3305 
3306  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3307  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3308 
3309  break;
3310  }
3311  case Intrinsic::amdgcn_cvt_pknorm_i16:
3312  case Intrinsic::amdgcn_cvt_pknorm_u16:
3313  case Intrinsic::amdgcn_cvt_pk_i16:
3314  case Intrinsic::amdgcn_cvt_pk_u16: {
3315  Value *Src0 = II->getArgOperand(0);
3316  Value *Src1 = II->getArgOperand(1);
3317 
3318  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3319  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3320 
3321  break;
3322  }
3323  case Intrinsic::amdgcn_ubfe:
3324  case Intrinsic::amdgcn_sbfe: {
3325  // Decompose simple cases into standard shifts.
3326  Value *Src = II->getArgOperand(0);
3327  if (isa<UndefValue>(Src))
3328  return replaceInstUsesWith(*II, Src);
3329 
3330  unsigned Width;
3331  Type *Ty = II->getType();
3332  unsigned IntSize = Ty->getIntegerBitWidth();
3333 
3334  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3335  if (CWidth) {
3336  Width = CWidth->getZExtValue();
3337  if ((Width & (IntSize - 1)) == 0)
3338  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3339 
3340  if (Width >= IntSize) {
3341  // Hardware ignores high bits, so remove those.
3342  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3343  Width & (IntSize - 1)));
3344  return II;
3345  }
3346  }
3347 
3348  unsigned Offset;
3349  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3350  if (COffset) {
3351  Offset = COffset->getZExtValue();
3352  if (Offset >= IntSize) {
3353  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3354  Offset & (IntSize - 1)));
3355  return II;
3356  }
3357  }
3358 
3359  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3360 
3361  // TODO: Also emit sub if only width is constant.
3362  if (!CWidth && COffset && Offset == 0) {
3363  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3364  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3365  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3366 
3367  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3368  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3369  : Builder.CreateLShr(Shl, ShiftVal);
3370  RightShift->takeName(II);
3371  return replaceInstUsesWith(*II, RightShift);
3372  }
3373 
3374  if (!CWidth || !COffset)
3375  break;
3376 
3377  // TODO: This allows folding to undef when the hardware has specific
3378  // behavior?
3379  if (Offset + Width < IntSize) {
3380  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3381  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3382  : Builder.CreateLShr(Shl, IntSize - Width);
3383  RightShift->takeName(II);
3384  return replaceInstUsesWith(*II, RightShift);
3385  }
3386 
3387  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3388  : Builder.CreateLShr(Src, Offset);
3389 
3390  RightShift->takeName(II);
3391  return replaceInstUsesWith(*II, RightShift);
3392  }
3393  case Intrinsic::amdgcn_exp:
3394  case Intrinsic::amdgcn_exp_compr: {
3395  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3396  if (!En) // Illegal.
3397  break;
3398 
3399  unsigned EnBits = En->getZExtValue();
3400  if (EnBits == 0xf)
3401  break; // All inputs enabled.
3402 
3403  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3404  bool Changed = false;
3405  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3406  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3407  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3408  Value *Src = II->getArgOperand(I + 2);
3409  if (!isa<UndefValue>(Src)) {
3410  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3411  Changed = true;
3412  }
3413  }
3414  }
3415 
3416  if (Changed)
3417  return II;
3418 
3419  break;
3420  }
3421  case Intrinsic::amdgcn_fmed3: {
3422  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3423  // for the shader.
3424 
3425  Value *Src0 = II->getArgOperand(0);
3426  Value *Src1 = II->getArgOperand(1);
3427  Value *Src2 = II->getArgOperand(2);
3428 
3429  bool Swap = false;
3430  // Canonicalize constants to RHS operands.
3431  //
3432  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3433  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3434  std::swap(Src0, Src1);
3435  Swap = true;
3436  }
3437 
3438  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3439  std::swap(Src1, Src2);
3440  Swap = true;
3441  }
3442 
3443  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3444  std::swap(Src0, Src1);
3445  Swap = true;
3446  }
3447 
3448  if (Swap) {
3449  II->setArgOperand(0, Src0);
3450  II->setArgOperand(1, Src1);
3451  II->setArgOperand(2, Src2);
3452  return II;
3453  }
3454 
3455  if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3456  CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
3457  NewCall->copyFastMathFlags(II);
3458  NewCall->takeName(II);
3459  return replaceInstUsesWith(*II, NewCall);
3460  }
3461 
3462  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3463  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3464  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3465  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3466  C2->getValueAPF());
3467  return replaceInstUsesWith(*II,
3468  ConstantFP::get(Builder.getContext(), Result));
3469  }
3470  }
3471  }
3472 
3473  break;
3474  }
3475  case Intrinsic::amdgcn_icmp:
3476  case Intrinsic::amdgcn_fcmp: {
3477  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3478  if (!CC)
3479  break;
3480 
3481  // Guard against invalid arguments.
3482  int64_t CCVal = CC->getZExtValue();
3483  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3484  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3485  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3486  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3487  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3488  break;
3489 
3490  Value *Src0 = II->getArgOperand(0);
3491  Value *Src1 = II->getArgOperand(1);
3492 
3493  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3494  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3495  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3496  if (CCmp->isNullValue()) {
3497  return replaceInstUsesWith(
3498  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3499  }
3500 
3501  // The result of V_ICMP/V_FCMP assembly instructions (which this
3502  // intrinsic exposes) is one bit per thread, masked with the EXEC
3503  // register (which contains the bitmask of live threads). So a
3504  // comparison that always returns true is the same as a read of the
3505  // EXEC register.
3507  II->getModule(), Intrinsic::read_register, II->getType());
3508  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3509  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3510  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3511  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3514  NewCall->takeName(II);
3515  return replaceInstUsesWith(*II, NewCall);
3516  }
3517 
3518  // Canonicalize constants to RHS.
3519  CmpInst::Predicate SwapPred
3520  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3521  II->setArgOperand(0, Src1);
3522  II->setArgOperand(1, Src0);
3523  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3524  static_cast<int>(SwapPred)));
3525  return II;
3526  }
3527 
3528  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3529  break;
3530 
3531  // Canonicalize compare eq with true value to compare != 0
3532  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3533  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3534  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3535  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3536  Value *ExtSrc;
3537  if (CCVal == CmpInst::ICMP_EQ &&
3538  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3539  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3540  ExtSrc->getType()->isIntegerTy(1)) {
3541  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3542  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3543  return II;
3544  }
3545 
3546  CmpInst::Predicate SrcPred;
3547  Value *SrcLHS;
3548  Value *SrcRHS;
3549 
3550  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3551  // intrinsic. The typical use is a wave vote function in the library, which
3552  // will be fed from a user code condition compared with 0. Fold in the
3553  // redundant compare.
3554 
3555  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3556  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3557  //
3558  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3559  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3560  if (match(Src1, m_Zero()) &&
3561  match(Src0,
3562  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3563  if (CCVal == CmpInst::ICMP_EQ)
3564  SrcPred = CmpInst::getInversePredicate(SrcPred);
3565 
3566  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3567  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3568 
3569  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3570  SrcLHS->getType());
3571  Value *Args[] = { SrcLHS, SrcRHS,
3572  ConstantInt::get(CC->getType(), SrcPred) };
3573  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3574  NewCall->takeName(II);
3575  return replaceInstUsesWith(*II, NewCall);
3576  }
3577 
3578  break;
3579  }
3580  case Intrinsic::amdgcn_wqm_vote: {
3581  // wqm_vote is identity when the argument is constant.
3582  if (!isa<Constant>(II->getArgOperand(0)))
3583  break;
3584 
3585  return replaceInstUsesWith(*II, II->getArgOperand(0));
3586  }
3587  case Intrinsic::amdgcn_kill: {
3588  const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
3589  if (!C || !C->getZExtValue())
3590  break;
3591 
3592  // amdgcn.kill(i1 1) is a no-op
3593  return eraseInstFromFunction(CI);
3594  }
3595  case Intrinsic::amdgcn_update_dpp: {
3596  Value *Old = II->getArgOperand(0);
3597 
3598  auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
3599  auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
3600  auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
3601  if (!BC || !RM || !BM ||
3602  BC->isZeroValue() ||
3603  RM->getZExtValue() != 0xF ||
3604  BM->getZExtValue() != 0xF ||
3605  isa<UndefValue>(Old))
3606  break;
3607 
3608  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
3609  II->setOperand(0, UndefValue::get(Old->getType()));
3610  return II;
3611  }
3612  case Intrinsic::stackrestore: {
3613  // If the save is right next to the restore, remove the restore. This can
3614  // happen when variable allocas are DCE'd.
3615  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3616  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3617  // Skip over debug info.
3618  if (SS->getNextNonDebugInstruction() == II) {
3619  return eraseInstFromFunction(CI);
3620  }
3621  }
3622  }
3623 
3624  // Scan down this block to see if there is another stack restore in the
3625  // same block without an intervening call/alloca.
3626  BasicBlock::iterator BI(II);
3627  TerminatorInst *TI = II->getParent()->getTerminator();
3628  bool CannotRemove = false;
3629  for (++BI; &*BI != TI; ++BI) {
3630  if (isa<AllocaInst>(BI)) {
3631  CannotRemove = true;
3632  break;
3633  }
3634  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3635  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3636  // If there is a stackrestore below this one, remove this one.
3637  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3638  return eraseInstFromFunction(CI);
3639 
3640  // Bail if we cross over an intrinsic with side effects, such as
3641  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3642  if (II->mayHaveSideEffects()) {
3643  CannotRemove = true;
3644  break;
3645  }
3646  } else {
3647  // If we found a non-intrinsic call, we can't remove the stack
3648  // restore.
3649  CannotRemove = true;
3650  break;
3651  }
3652  }
3653  }
3654 
3655  // If the stack restore is in a return, resume, or unwind block and if there
3656  // are no allocas or calls between the restore and the return, nuke the
3657  // restore.
3658  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3659  return eraseInstFromFunction(CI);
3660  break;
3661  }
3662  case Intrinsic::lifetime_start:
3663  // Asan needs to poison memory to detect invalid access which is possible
3664  // even for empty lifetime range.
3665  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3666  II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3667  break;
3668 
3669  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3670  Intrinsic::lifetime_end, *this))
3671  return nullptr;
3672  break;
3673  case Intrinsic::assume: {
3674  Value *IIOperand = II->getArgOperand(0);
3675  // Remove an assume if it is immediately followed by an identical assume.
3676  if (match(II->getNextNode(),
3677  m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3678  return eraseInstFromFunction(CI);
3679 
3680  // Canonicalize assume(a && b) -> assume(a); assume(b);
3681  // Note: New assumption intrinsics created here are registered by
3682  // the InstCombineIRInserter object.
3683  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3684  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3685  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3686  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3687  return eraseInstFromFunction(*II);
3688  }
3689  // assume(!(a || b)) -> assume(!a); assume(!b);
3690  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3691  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3692  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3693  return eraseInstFromFunction(*II);
3694  }
3695 
3696  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3697  // (if assume is valid at the load)
3698  CmpInst::Predicate Pred;
3699  Instruction *LHS;
3700  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3701  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3702  LHS->getType()->isPointerTy() &&
3703  isValidAssumeForContext(II, LHS, &DT)) {
3704  MDNode *MD = MDNode::get(II->getContext(), None);
3706  return eraseInstFromFunction(*II);
3707 
3708  // TODO: apply nonnull return attributes to calls and invokes
3709  // TODO: apply range metadata for range check patterns?
3710  }
3711 
3712  // If there is a dominating assume with the same condition as this one,
3713  // then this one is redundant, and should be removed.
3714  KnownBits Known(1);
3715  computeKnownBits(IIOperand, Known, 0, II);
3716  if (Known.isAllOnes())
3717  return eraseInstFromFunction(*II);
3718 
3719  // Update the cache of affected values for this assumption (we might be
3720  // here because we just simplified the condition).
3721  AC.updateAffectedValues(II);
3722  break;
3723  }
3724  case Intrinsic::experimental_gc_relocate: {
3725  // Translate facts known about a pointer before relocating into
3726  // facts about the relocate value, while being careful to
3727  // preserve relocation semantics.
3728  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3729 
3730  // Remove the relocation if unused, note that this check is required
3731  // to prevent the cases below from looping forever.
3732  if (II->use_empty())
3733  return eraseInstFromFunction(*II);
3734 
3735  // Undef is undef, even after relocation.
3736  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3737  // most practical collectors, but there was discussion in the review thread
3738  // about whether it was legal for all possible collectors.
3739  if (isa<UndefValue>(DerivedPtr))
3740  // Use undef of gc_relocate's type to replace it.
3741  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3742 
3743  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3744  // The relocation of null will be null for most any collector.
3745  // TODO: provide a hook for this in GCStrategy. There might be some
3746  // weird collector this property does not hold for.
3747  if (isa<ConstantPointerNull>(DerivedPtr))
3748  // Use null-pointer of gc_relocate's type to replace it.
3749  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3750 
3751  // isKnownNonNull -> nonnull attribute
3752  if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
3753  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3754  }
3755 
3756  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3757  // Canonicalize on the type from the uses to the defs
3758 
3759  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3760  break;
3761  }
3762 
3763  case Intrinsic::experimental_guard: {
3764  // Is this guard followed by another guard? We scan forward over a small
3765  // fixed window of instructions to handle common cases with conditions
3766  // computed between guards.
3767  Instruction *NextInst = II->getNextNode();
3768  for (unsigned i = 0; i < GuardWideningWindow; i++) {
3769  // Note: Using context-free form to avoid compile time blow up
3770  if (!isSafeToSpeculativelyExecute(NextInst))
3771  break;
3772  NextInst = NextInst->getNextNode();
3773  }
3774  Value *NextCond = nullptr;
3775  if (match(NextInst,
3776  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3777  Value *CurrCond = II->getArgOperand(0);
3778 
3779  // Remove a guard that it is immediately preceded by an identical guard.
3780  if (CurrCond == NextCond)
3781  return eraseInstFromFunction(*NextInst);
3782 
3783  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3784  Instruction* MoveI = II->getNextNode();
3785  while (MoveI != NextInst) {
3786  auto *Temp = MoveI;
3787  MoveI = MoveI->getNextNode();
3788  Temp->moveBefore(II);
3789  }
3790  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3791  return eraseInstFromFunction(*NextInst);
3792  }
3793  break;
3794  }
3795  }
3796  return visitCallSite(II);
3797 }
3798 
3799 // Fence instruction simplification
3801  // Remove identical consecutive fences.
3803  if (auto *NFI = dyn_cast<FenceInst>(Next))
3804  if (FI.isIdenticalTo(NFI))
3805  return eraseInstFromFunction(FI);
3806  return nullptr;
3807 }
3808 
3809 // InvokeInst simplification
3811  return visitCallSite(&II);
3812 }
3813 
3814 /// If this cast does not affect the value passed through the varargs area, we
3815 /// can eliminate the use of the cast.
3817  const DataLayout &DL,
3818  const CastInst *const CI,
3819  const int ix) {
3820  if (!CI->isLosslessCast())
3821  return false;
3822 
3823  // If this is a GC intrinsic, avoid munging types. We need types for
3824  // statepoint reconstruction in SelectionDAG.
3825  // TODO: This is probably something which should be expanded to all
3826  // intrinsics since the entire point of intrinsics is that
3827  // they are understandable by the optimizer.
3828  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3829  return false;
3830 
3831  // The size of ByVal or InAlloca arguments is derived from the type, so we
3832  // can't change to a type with a different size. If the size were
3833  // passed explicitly we could avoid this check.
3834  if (!CS.isByValOrInAllocaArgument(ix))
3835  return true;
3836 
3837  Type* SrcTy =
3838  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3839  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3840  if (!SrcTy->isSized() || !DstTy->isSized())
3841  return false;
3842  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3843  return false;
3844  return true;
3845 }
3846 
3847 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3848  if (!CI->getCalledFunction()) return nullptr;
3849 
3850  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3851  replaceInstUsesWith(*From, With);
3852  };
3853  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
3854  if (Value *With = Simplifier.optimizeCall(CI)) {
3855  ++NumSimplified;
3856  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3857  }
3858 
3859  return nullptr;
3860 }
3861 
3863  // Strip off at most one level of pointer casts, looking for an alloca. This
3864  // is good enough in practice and simpler than handling any number of casts.
3865  Value *Underlying = TrampMem->stripPointerCasts();
3866  if (Underlying != TrampMem &&
3867  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3868  return nullptr;
3869  if (!isa<AllocaInst>(Underlying))
3870  return nullptr;
3871 
3872  IntrinsicInst *InitTrampoline = nullptr;
3873  for (User *U : TrampMem->users()) {
3875  if (!II)
3876  return nullptr;
3877  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3878  if (InitTrampoline)
3879  // More than one init_trampoline writes to this value. Give up.
3880  return nullptr;
3881  InitTrampoline = II;
3882  continue;
3883  }
3884  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3885  // Allow any number of calls to adjust.trampoline.
3886  continue;
3887  return nullptr;
3888  }
3889 
3890  // No call to init.trampoline found.
3891  if (!InitTrampoline)
3892  return nullptr;
3893 
3894  // Check that the alloca is being used in the expected way.
3895  if (InitTrampoline->getOperand(0) != TrampMem)
3896  return nullptr;
3897 
3898  return InitTrampoline;
3899 }
3900 
3902  Value *TrampMem) {
3903  // Visit all the previous instructions in the basic block, and try to find a
3904  // init.trampoline which has a direct path to the adjust.trampoline.
3905  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3906  E = AdjustTramp->getParent()->begin();
3907  I != E;) {
3908  Instruction *Inst = &*--I;
3909  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
3910  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3911  II->getOperand(0) == TrampMem)
3912  return II;
3913  if (Inst->mayWriteToMemory())
3914  return nullptr;
3915  }
3916  return nullptr;
3917 }
3918 
3919 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3920 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3921 // to a direct call to a function. Otherwise return NULL.
3923  Callee = Callee->stripPointerCasts();
3924  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
3925  if (!AdjustTramp ||
3926  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3927  return nullptr;
3928 
3929  Value *TrampMem = AdjustTramp->getOperand(0);
3930 
3932  return IT;
3933  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3934  return IT;
3935  return nullptr;
3936 }
3937 
3938 /// Improvements for call and invoke instructions.
3939 Instruction *InstCombiner::visitCallSite(CallSite CS) {
3940  if (isAllocLikeFn(CS.getInstruction(), &TLI))
3941  return visitAllocSite(*CS.getInstruction());
3942 
3943  bool Changed = false;
3944 
3945  // Mark any parameters that are known to be non-null with the nonnull
3946  // attribute. This is helpful for inlining calls to functions with null
3947  // checks on their arguments.
3948  SmallVector<unsigned, 4> ArgNos;
3949  unsigned ArgNo = 0;
3950 
3951  for (Value *V : CS.args()) {
3952  if (V->getType()->isPointerTy() &&
3953  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
3954  isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
3955  ArgNos.push_back(ArgNo);
3956  ArgNo++;
3957  }
3958 
3959  assert(ArgNo == CS.arg_size() && "sanity check");
3960 
3961  if (!ArgNos.empty()) {
3963  LLVMContext &Ctx = CS.getInstruction()->getContext();
3964  AS = AS.addParamAttribute(Ctx, ArgNos,
3965  Attribute::get(Ctx, Attribute::NonNull));
3966  CS.setAttributes(AS);
3967  Changed = true;
3968  }
3969 
3970  // If the callee is a pointer to a function, attempt to move any casts to the
3971  // arguments of the call/invoke.
3972  Value *Callee = CS.getCalledValue();
3973  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
3974  return nullptr;
3975 
3976  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
3977  // Remove the convergent attr on calls when the callee is not convergent.
3978  if (CS.isConvergent() && !CalleeF->isConvergent() &&
3979  !CalleeF->isIntrinsic()) {
3980  LLVM_DEBUG(dbgs() << "Removing convergent attr from instr "
3981  << CS.getInstruction() << "\n");
3982  CS.setNotConvergent();
3983  return CS.getInstruction();
3984  }
3985 
3986  // If the call and callee calling conventions don't match, this call must
3987  // be unreachable, as the call is undefined.
3988  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
3989  // Only do this for calls to a function with a body. A prototype may
3990  // not actually end up matching the implementation's calling conv for a
3991  // variety of reasons (e.g. it may be written in assembly).
3992  !CalleeF->isDeclaration()) {
3993  Instruction *OldCall = CS.getInstruction();
3994  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
3996  OldCall);
3997  // If OldCall does not return void then replaceAllUsesWith undef.
3998  // This allows ValueHandlers and custom metadata to adjust itself.
3999  if (!OldCall->getType()->isVoidTy())
4000  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
4001  if (isa<CallInst>(OldCall))
4002  return eraseInstFromFunction(*OldCall);
4003 
4004  // We cannot remove an invoke, because it would change the CFG, just
4005  // change the callee to a null pointer.
4006  cast<InvokeInst>(OldCall)->setCalledFunction(
4007  Constant::getNullValue(CalleeF->getType()));
4008  return nullptr;
4009  }
4010  }
4011 
4012  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
4013  // If CS does not return void then replaceAllUsesWith undef.
4014  // This allows ValueHandlers and custom metadata to adjust itself.
4015  if (!CS.getInstruction()->getType()->isVoidTy())
4016  replaceInstUsesWith(*CS.getInstruction(),
4018 
4019  if (isa<InvokeInst>(CS.getInstruction())) {
4020  // Can't remove an invoke because we cannot change the CFG.
4021  return nullptr;
4022  }
4023 
4024  // This instruction is not reachable, just remove it. We insert a store to
4025  // undef so that we know that this code is not reachable, despite the fact
4026  // that we can't modify the CFG here.
4027  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4029  CS.getInstruction());
4030 
4031  return eraseInstFromFunction(*CS.getInstruction());
4032  }
4033 
4034  if (IntrinsicInst *II = findInitTrampoline(Callee))
4035  return transformCallThroughTrampoline(CS, II);
4036 
4037  PointerType *PTy = cast<PointerType>(Callee->getType());
4038  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4039  if (FTy->isVarArg()) {
4040  int ix = FTy->getNumParams();
4041  // See if we can optimize any arguments passed through the varargs area of
4042  // the call.
4043  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
4044  E = CS.arg_end(); I != E; ++I, ++ix) {
4045  CastInst *CI = dyn_cast<CastInst>(*I);
4046  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
4047  *I = CI->getOperand(0);
4048  Changed = true;
4049  }
4050  }
4051  }
4052 
4053  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
4054  // Inline asm calls cannot throw - mark them 'nounwind'.
4055  CS.setDoesNotThrow();
4056  Changed = true;
4057  }
4058 
4059  // Try to optimize the call if possible, we require DataLayout for most of
4060  // this. None of these calls are seen as possibly dead so go ahead and
4061  // delete the instruction now.
4062  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
4063  Instruction *I = tryOptimizeCall(CI);
4064  // If we changed something return the result, etc. Otherwise let
4065  // the fallthrough check.
4066  if (I) return eraseInstFromFunction(*I);
4067  }
4068 
4069  return Changed ? CS.getInstruction() : nullptr;
4070 }
4071 
4072 /// If the callee is a constexpr cast of a function, attempt to move the cast to
4073 /// the arguments of the call/invoke.
4074 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4076  if (!Callee)
4077  return false;
4078 
4079  // If this is a call to a thunk function, don't remove the cast. Thunks are
4080  // used to transparently forward all incoming parameters and outgoing return
4081  // values, so it's important to leave the cast in place.
4082  if (Callee->hasFnAttribute("thunk"))
4083  return false;
4084 
4085  // If this is a musttail call, the callee's prototype must match the caller's
4086  // prototype with the exception of pointee types. The code below doesn't
4087  // implement that, so we can't do this transform.
4088  // TODO: Do the transform if it only requires adding pointer casts.
4089  if (CS.isMustTailCall())
4090  return false;
4091 
4092  Instruction *Caller = CS.getInstruction();
4093  const AttributeList &CallerPAL = CS.getAttributes();
4094 
4095  // Okay, this is a cast from a function to a different type. Unless doing so
4096  // would cause a type conversion of one of our arguments, change this call to
4097  // be a direct call with arguments casted to the appropriate types.
4098  FunctionType *FT = Callee->getFunctionType();
4099  Type *OldRetTy = Caller->getType();
4100  Type *NewRetTy = FT->getReturnType();
4101 
4102  // Check to see if we are changing the return type...
4103  if (OldRetTy != NewRetTy) {
4104 
4105  if (NewRetTy->isStructTy())
4106  return false; // TODO: Handle multiple return values.
4107 
4108  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4109  if (Callee->isDeclaration())
4110  return false; // Cannot transform this return value.
4111 
4112  if (!Caller->use_empty() &&
4113  // void -> non-void is handled specially
4114  !NewRetTy->isVoidTy())
4115  return false; // Cannot transform this return value.
4116  }
4117 
4118  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4119  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4120  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4121  return false; // Attribute not compatible with transformed value.
4122  }
4123 
4124  // If the callsite is an invoke instruction, and the return value is used by
4125  // a PHI node in a successor, we cannot change the return type of the call
4126  // because there is no place to put the cast instruction (without breaking
4127  // the critical edge). Bail out in this case.
4128  if (!Caller->use_empty())
4129  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4130  for (User *U : II->users())
4131  if (PHINode *PN = dyn_cast<PHINode>(U))
4132  if (PN->getParent() == II->getNormalDest() ||
4133  PN->getParent() == II->getUnwindDest())
4134  return false;
4135  }
4136 
4137  unsigned NumActualArgs = CS.arg_size();
4138  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4139 
4140  // Prevent us turning:
4141  // declare void @takes_i32_inalloca(i32* inalloca)
4142  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4143  //
4144  // into:
4145  // call void @takes_i32_inalloca(i32* null)
4146  //
4147  // Similarly, avoid folding away bitcasts of byval calls.
4148  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4149  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4150  return false;
4151 
4153  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4154  Type *ParamTy = FT->getParamType(i);
4155  Type *ActTy = (*AI)->getType();
4156 
4157  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4158  return false; // Cannot transform this parameter value.
4159 
4160  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4161  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4162  return false; // Attribute not compatible with transformed value.
4163 
4164  if (CS.isInAllocaArgument(i))
4165  return false; // Cannot transform to and from inalloca.
4166 
4167  // If the parameter is passed as a byval argument, then we have to have a
4168  // sized type and the sized type has to have the same size as the old type.
4169  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4170  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4171  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4172  return false;
4173 
4174  Type *CurElTy = ActTy->getPointerElementType();
4175  if (DL.getTypeAllocSize(CurElTy) !=
4176  DL.getTypeAllocSize(ParamPTy->getElementType()))
4177  return false;
4178  }
4179  }
4180 
4181  if (Callee->isDeclaration()) {
4182  // Do not delete arguments unless we have a function body.
4183  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4184  return false;
4185 
4186  // If the callee is just a declaration, don't change the varargsness of the
4187  // call. We don't want to introduce a varargs call where one doesn't
4188  // already exist.
4189  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4190  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4191  return false;
4192 
4193  // If both the callee and the cast type are varargs, we still have to make
4194  // sure the number of fixed parameters are the same or we have the same
4195  // ABI issues as if we introduce a varargs call.
4196  if (FT->isVarArg() &&
4197  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4198  FT->getNumParams() !=
4199  cast<FunctionType>(APTy->getElementType())->getNumParams())
4200  return false;
4201  }
4202 
4203  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4204  !CallerPAL.isEmpty()) {
4205  // In this case we have more arguments than the new function type, but we
4206  // won't be dropping them. Check that these extra arguments have attributes
4207  // that are compatible with being a vararg call argument.
4208  unsigned SRetIdx;
4209  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4210  SRetIdx > FT->getNumParams())
4211  return false;
4212  }
4213 
4214  // Okay, we decided that this is a safe thing to do: go ahead and start
4215  // inserting cast instructions as necessary.
4218  Args.reserve(NumActualArgs);
4219  ArgAttrs.reserve(NumActualArgs);
4220 
4221  // Get any return attributes.
4222  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4223 
4224  // If the return value is not being used, the type may not be compatible
4225  // with the existing attributes. Wipe out any problematic attributes.
4226  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4227 
4228  AI = CS.arg_begin();
4229  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4230  Type *ParamTy = FT->getParamType(i);
4231 
4232  Value *NewArg = *AI;
4233  if ((*AI)->getType() != ParamTy)
4234  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4235  Args.push_back(NewArg);
4236 
4237  // Add any parameter attributes.
4238  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4239  }
4240 
4241  // If the function takes more arguments than the call was taking, add them
4242  // now.
4243  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4245  ArgAttrs.push_back(AttributeSet());
4246  }
4247 
4248  // If we are removing arguments to the function, emit an obnoxious warning.
4249  if (FT->getNumParams() < NumActualArgs) {
4250  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4251  if (FT->isVarArg()) {
4252  // Add all of the arguments in their promoted form to the arg list.
4253  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4254  Type *PTy = getPromotedType((*AI)->getType());
4255  Value *NewArg = *AI;
4256  if (PTy != (*AI)->getType()) {
4257  // Must promote to pass through va_arg area!
4258  Instruction::CastOps opcode =
4259  CastInst::getCastOpcode(*AI, false, PTy, false);
4260  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4261  }
4262  Args.push_back(NewArg);
4263 
4264  // Add any parameter attributes.
4265  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4266  }
4267  }
4268  }
4269 
4270  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4271 
4272  if (NewRetTy->isVoidTy())
4273  Caller->setName(""); // Void type should not have a name.
4274 
4275  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4276  "missing argument attributes");
4277  LLVMContext &Ctx = Callee->getContext();
4278  AttributeList NewCallerPAL = AttributeList::get(
4279  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4280 
4282  CS.getOperandBundlesAsDefs(OpBundles);
4283 
4284  CallSite NewCS;
4285  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4286  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4287  II->getUnwindDest(), Args, OpBundles);
4288  } else {
4289  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4290  cast<CallInst>(NewCS.getInstruction())
4291  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4292  }
4293  NewCS->takeName(Caller);
4294  NewCS.setCallingConv(CS.getCallingConv());
4295  NewCS.setAttributes(NewCallerPAL);
4296 
4297  // Preserve the weight metadata for the new call instruction. The metadata
4298  // is used by SamplePGO to check callsite's hotness.
4299  uint64_t W;
4300  if (Caller->extractProfTotalWeight(W))
4301  NewCS->setProfWeight(W);
4302 
4303  // Insert a cast of the return type as necessary.
4304  Instruction *NC = NewCS.getInstruction();
4305  Value *NV = NC;
4306  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4307  if (!NV->getType()->isVoidTy()) {
4308  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4309  NC->setDebugLoc(Caller->getDebugLoc());
4310 
4311  // If this is an invoke instruction, we should insert it after the first
4312  // non-phi, instruction in the normal successor block.
4313  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4314  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4315  InsertNewInstBefore(NC, *I);
4316  } else {
4317  // Otherwise, it's a call, just insert cast right after the call.
4318  InsertNewInstBefore(NC, *Caller);
4319  }
4320  Worklist.AddUsersToWorkList(*Caller);
4321  } else {
4322  NV = UndefValue::get(Caller->getType());
4323  }
4324  }
4325 
4326  if (!Caller->use_empty())
4327  replaceInstUsesWith(*Caller, NV);
4328  else if (Caller->hasValueHandle()) {
4329  if (OldRetTy == NV->getType())
4330  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4331  else
4332  // We cannot call ValueIsRAUWd with a different type, and the
4333  // actual tracked value will disappear.
4335  }
4336 
4337  eraseInstFromFunction(*Caller);
4338  return true;
4339 }
4340 
4341 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4342 /// intrinsic pair into a direct call to the underlying function.
4343 Instruction *
4344 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4345  IntrinsicInst *Tramp) {
4346  Value *Callee = CS.getCalledValue();
4347  PointerType *PTy = cast<PointerType>(Callee->getType());
4348  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4350 
4351  // If the call already has the 'nest' attribute somewhere then give up -
4352  // otherwise 'nest' would occur twice after splicing in the chain.
4353  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4354  return nullptr;
4355 
4356  assert(Tramp &&
4357  "transformCallThroughTrampoline called with incorrect CallSite.");
4358 
4359  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4360  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4361 
4362  AttributeList NestAttrs = NestF->getAttributes();
4363  if (!NestAttrs.isEmpty()) {
4364  unsigned NestArgNo = 0;
4365  Type *NestTy = nullptr;
4366  AttributeSet NestAttr;
4367 
4368  // Look for a parameter marked with the 'nest' attribute.
4369  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4370  E = NestFTy->param_end();
4371  I != E; ++NestArgNo, ++I) {
4372  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4373  if (AS.hasAttribute(Attribute::Nest)) {
4374  // Record the parameter type and any other attributes.
4375  NestTy = *I;
4376  NestAttr = AS;
4377  break;
4378  }
4379  }
4380 
4381  if (NestTy) {
4382  Instruction *Caller = CS.getInstruction();
4383  std::vector<Value*> NewArgs;
4384  std::vector<AttributeSet> NewArgAttrs;
4385  NewArgs.reserve(CS.arg_size() + 1);
4386  NewArgAttrs.reserve(CS.arg_size());
4387 
4388  // Insert the nest argument into the call argument list, which may
4389  // mean appending it. Likewise for attributes.
4390 
4391  {
4392  unsigned ArgNo = 0;
4393  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4394  do {
4395  if (ArgNo == NestArgNo) {
4396  // Add the chain argument and attributes.
4397  Value *NestVal = Tramp->getArgOperand(2);
4398  if (NestVal->getType() != NestTy)
4399  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4400  NewArgs.push_back(NestVal);
4401  NewArgAttrs.push_back(NestAttr);
4402  }
4403 
4404  if (I == E)
4405  break;
4406 
4407  // Add the original argument and attributes.
4408  NewArgs.push_back(*I);
4409  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4410 
4411  ++ArgNo;
4412  ++I;
4413  } while (true);
4414  }
4415 
4416  // The trampoline may have been bitcast to a bogus type (FTy).
4417  // Handle this by synthesizing a new function type, equal to FTy
4418  // with the chain parameter inserted.
4419 
4420  std::vector<Type*> NewTypes;
4421  NewTypes.reserve(FTy->getNumParams()+1);
4422 
4423  // Insert the chain's type into the list of parameter types, which may
4424  // mean appending it.
4425  {
4426  unsigned ArgNo = 0;
4427  FunctionType::param_iterator I = FTy->param_begin(),
4428  E = FTy->param_end();
4429 
4430  do {
4431  if (ArgNo == NestArgNo)
4432  // Add the chain's type.
4433  NewTypes.push_back(NestTy);
4434 
4435  if (I == E)
4436  break;
4437 
4438  // Add the original type.
4439  NewTypes.push_back(*I);
4440 
4441  ++ArgNo;
4442  ++I;
4443  } while (true);
4444  }
4445 
4446  // Replace the trampoline call with a direct call. Let the generic
4447  // code sort out any function type mismatches.
4448  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4449  FTy->isVarArg());
4450  Constant *NewCallee =
4451  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4452  NestF : ConstantExpr::getBitCast(NestF,
4453  PointerType::getUnqual(NewFTy));
4454  AttributeList NewPAL =
4455  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4456  Attrs.getRetAttributes(), NewArgAttrs);
4457 
4459  CS.getOperandBundlesAsDefs(OpBundles);
4460 
4461  Instruction *NewCaller;
4462  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4463  NewCaller = InvokeInst::Create(NewCallee,
4464  II->getNormalDest(), II->getUnwindDest(),
4465  NewArgs, OpBundles);
4466  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4467  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4468  } else {
4469  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4470  cast<CallInst>(NewCaller)->setTailCallKind(
4471  cast<CallInst>(Caller)->getTailCallKind());
4472  cast<CallInst>(NewCaller)->setCallingConv(
4473  cast<CallInst>(Caller)->getCallingConv());
4474  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4475  }
4476  NewCaller->setDebugLoc(Caller->getDebugLoc());
4477 
4478  return NewCaller;
4479  }
4480  }
4481 
4482  // Replace the trampoline call with a direct call. Since there is no 'nest'
4483  // parameter, there is no need to adjust the argument list. Let the generic
4484  // code sort out any function type mismatches.
4485  Constant *NewCallee =
4486  NestF->getType() == PTy ? NestF :
4487  ConstantExpr::getBitCast(NestF, PTy);
4488  CS.setCalledFunction(NewCallee);
4489  return CS.getInstruction();
4490 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:976
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:758
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:709
uint64_t CallInst * C
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
BinaryOp_match< cstfp_pred_ty< is_neg_zero_fp >, RHS, Instruction::FSub > m_FNeg(const RHS &X)
Match &#39;fneg X&#39; as &#39;fsub -0.0, X&#39;.
Definition: PatternMatch.h:656
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:840
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1846
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1143
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1547
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1165
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:555
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:373
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
Atomic ordering constants.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2673
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:186
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:603
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
An instruction for ordering other memory operations.
Definition: Instructions.h:440
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:454
Instruction * visitVACopyInst(VACopyInst &I)
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1299
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:641
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:239
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:215
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:485
unsigned less or equal
Definition: InstrTypes.h:911
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:910
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1313
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:817
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:862
F(f)
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:648
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:882
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1894
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:166
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
void reserve(size_type N)
Definition: SmallVector.h:377
Value * getLength() const
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:361
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:528
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1493
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:258
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:136
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1580
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:514
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:721
Type * getPointerElementType() const
Definition: Type.h:373
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:983
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:592
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:986
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:397
static Value * simplifyNeonTbl1(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Convert a table lookup to shufflevector if the mask is constant.
IterTy arg_end() const
Definition: CallSite.h:575
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1618
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:971
This file implements a class to represent arbitrary precision integral constant values and operations...
All zero aggregate value.
Definition: Constants.h:337
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:191
Key
PAL metadata keys.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
bool isInfinity() const
Definition: APFloat.h:1144
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1581
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:415
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4444
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:195
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2047
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
Definition: PatternMatch.h:433
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:210
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:306
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1340
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:893
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:408
static Value * simplifyX86movmsk(const IntrinsicInst &II)
amdgpu Simplify well known AMD library false Value * Callee
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1001
This class represents a truncation of integer types.
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2333
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:576
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:328
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
Value * getOperand(unsigned i_nocapture) const
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1740
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:333
bool doesNotThrow() const
Determine if the call cannot unwind.
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:511
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:282
bool isNegative() const
Definition: APFloat.h:1147
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1368
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1092
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:715
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:287
CallInst * CreateIntrinsic(Intrinsic::ID ID, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with no operands.
Definition: IRBuilder.cpp:741
bool isNaN() const
Definition: APFloat.h:1145
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1901
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:306
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:490
const Instruction * getNextNonDebugInstruction() const
Return a pointer to the next non-debug instruction in the same basic block as &#39;this&#39;, or nullptr if no such instruction exists.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool isFast() const
Determine whether all fast-math-flags are set.
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
void setCalledFunction(Value *Fn)
Set the function called.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
static const unsigned End
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
This class represents any memset intrinsic.
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
void setArgOperand(unsigned i, Value *v)
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:360
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1921
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:443
void setNotConvergent()
Definition: CallSite.h:527
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:312
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:538
iterator_range< User::op_iterator > arg_operands()
Iteration adapter for range-for loops.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:964
signed greater than
Definition: InstrTypes.h:912
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:295
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
const APFloat & getValueAPF() const
Definition: Constants.h:299
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:506
static BinaryOperator * CreateFNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:176
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:266
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:571
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1934
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:521
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Value * simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, InstCombiner::BuilderTy &Builder)
Convert a vector load intrinsic into a simple llvm load instruction.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:508
signed less than
Definition: InstrTypes.h:914
Type * getReturnType() const
Definition: DerivedTypes.h:124
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:491
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1205
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1948
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:611
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:625
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:674
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:470
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1277
Value * SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q)
Given a callsite, fold the result or return null.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:567
bool isDenormal() const
Definition: APFloat.h:1148
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:915
void setOperand(unsigned i_nocapture, Value *Val_nocapture)
Class to represent vector types.
Definition: DerivedTypes.h:393
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:342
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:399
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1051
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:332
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:531
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:364
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
unsigned getNumArgOperands() const
Return the number of call arguments.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:909
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:582
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:505
bool isNormal() const
Definition: APFloat.h:1151
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void setDoesNotThrow()
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:259
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:275
static IntrinsicInst * findInitTrampoline(Value *Callee)
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:608
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:81
AssumptionCache & getAssumptionCache() const
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:449
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1112
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
This represents the llvm.va_copy intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:538
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
static Value * simplifyX86round(IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1328
static Instruction * foldCtpop(IntrinsicInst &II, InstCombiner &IC)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:352
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
AttributeSet getFnAttributes() const
The function attributes are returned.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:317
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1072
Invoke instruction.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:146
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if the given value is known to be non-zero when defined.
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
unsigned greater than
Definition: InstrTypes.h:908
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:999
AttributeList getAttributes() const
Get the parameter attributes of the call.
Definition: CallSite.h:329
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2356
bool isConvergent() const
Determine if the call is convergent.
Definition: CallSite.h:521
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:562
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2199
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:2495
#define LLVM_DEBUG(X)
Definition: Debug.h:119
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This class represents an extension of floating point types.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:271
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:220
bool isEmpty() const
Return true if there are no attributes.
Definition: Attributes.h:645
Root of the metadata hierarchy.
Definition: Metadata.h:58
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
void setCalledFunction(Value *V)
Set the callee to the specified value.
Definition: CallSite.h:126
bool isSignaling() const
Definition: APFloat.h:1149
Value * getRawDest() const
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
bool use_empty() const
Definition: Value.h:322
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1046