LLVM  8.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/Twine.h"
29 #include "llvm/IR/Attributes.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/CallSite.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Statepoint.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/IR/ValueHandle.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/KnownBits.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstring>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 using namespace PatternMatch;
71 
72 #define DEBUG_TYPE "instcombine"
73 
74 STATISTIC(NumSimplified, "Number of library calls simplified");
75 
77  "instcombine-guard-widening-window",
78  cl::init(3),
79  cl::desc("How wide an instruction window to bypass looking for "
80  "another guard"));
81 
82 /// Return the specified type promoted as it would be to pass though a va_arg
83 /// area.
84 static Type *getPromotedType(Type *Ty) {
85  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
86  if (ITy->getBitWidth() < 32)
87  return Type::getInt32Ty(Ty->getContext());
88  }
89  return Ty;
90 }
91 
92 /// Return a constant boolean vector that has true elements in all positions
93 /// where the input constant data vector has an element with the sign bit set.
96  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
97  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
98  Constant *Elt = V->getElementAsConstant(I);
99  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
100  "Unexpected constant data vector element type");
101  bool Sign = V->getElementType()->isIntegerTy()
102  ? cast<ConstantInt>(Elt)->isNegative()
103  : cast<ConstantFP>(Elt)->isNegative();
104  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
105  }
106  return ConstantVector::get(BoolVec);
107 }
108 
109 Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
110  unsigned DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
111  unsigned CopyDstAlign = MI->getDestAlignment();
112  if (CopyDstAlign < DstAlign){
113  MI->setDestAlignment(DstAlign);
114  return MI;
115  }
116 
117  unsigned SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
118  unsigned CopySrcAlign = MI->getSourceAlignment();
119  if (CopySrcAlign < SrcAlign) {
120  MI->setSourceAlignment(SrcAlign);
121  return MI;
122  }
123 
124  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
125  // load/store.
126  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
127  if (!MemOpLength) return nullptr;
128 
129  // Source and destination pointer types are always "i8*" for intrinsic. See
130  // if the size is something we can handle with a single primitive load/store.
131  // A single load+store correctly handles overlapping memory in the memmove
132  // case.
133  uint64_t Size = MemOpLength->getLimitedValue();
134  assert(Size && "0-sized memory transferring should be removed already.");
135 
136  if (Size > 8 || (Size&(Size-1)))
137  return nullptr; // If not 1/2/4/8 bytes, exit.
138 
139  // Use an integer load+store unless we can find something better.
140  unsigned SrcAddrSp =
141  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
142  unsigned DstAddrSp =
143  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
144 
145  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
146  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
147  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
148 
149  // If the memcpy has metadata describing the members, see if we can get the
150  // TBAA tag describing our copy.
151  MDNode *CopyMD = nullptr;
152  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
153  CopyMD = M;
154  } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
155  if (M->getNumOperands() == 3 && M->getOperand(0) &&
156  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
157  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
158  M->getOperand(1) &&
159  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
160  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
161  Size &&
162  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
163  CopyMD = cast<MDNode>(M->getOperand(2));
164  }
165 
166  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
167  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
168  LoadInst *L = Builder.CreateLoad(Src);
169  // Alignment from the mem intrinsic will be better, so use it.
170  L->setAlignment(CopySrcAlign);
171  if (CopyMD)
172  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
173  MDNode *LoopMemParallelMD =
175  if (LoopMemParallelMD)
177 
178  StoreInst *S = Builder.CreateStore(L, Dest);
179  // Alignment from the mem intrinsic will be better, so use it.
180  S->setAlignment(CopyDstAlign);
181  if (CopyMD)
182  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
183  if (LoopMemParallelMD)
185 
186  if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
187  // non-atomics can be volatile
188  L->setVolatile(MT->isVolatile());
189  S->setVolatile(MT->isVolatile());
190  }
191  if (isa<AtomicMemTransferInst>(MI)) {
192  // atomics have to be unordered
195  }
196 
197  // Set the size of the copy to 0, it will be deleted on the next iteration.
198  MI->setLength(Constant::getNullValue(MemOpLength->getType()));
199  return MI;
200 }
201 
202 Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
203  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
204  if (MI->getDestAlignment() < Alignment) {
205  MI->setDestAlignment(Alignment);
206  return MI;
207  }
208 
209  // Extract the length and alignment and fill if they are constant.
210  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
211  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
212  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
213  return nullptr;
214  uint64_t Len = LenC->getLimitedValue();
215  Alignment = MI->getDestAlignment();
216  assert(Len && "0-sized memory setting should be removed already.");
217 
218  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
219  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
220  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
221 
222  Value *Dest = MI->getDest();
223  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
224  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
225  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
226 
227  // Alignment 0 is identity for alignment 1 for memset, but not store.
228  if (Alignment == 0) Alignment = 1;
229 
230  // Extract the fill value and store.
231  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
232  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
233  MI->isVolatile());
234  S->setAlignment(Alignment);
235  if (isa<AtomicMemSetInst>(MI))
237 
238  // Set the size of the copy to 0, it will be deleted on the next iteration.
239  MI->setLength(Constant::getNullValue(LenC->getType()));
240  return MI;
241  }
242 
243  return nullptr;
244 }
245 
247  InstCombiner::BuilderTy &Builder) {
248  bool IsAddition;
249 
250  switch (II.getIntrinsicID()) {
251  default: llvm_unreachable("Unexpected intrinsic!");
252  case Intrinsic::x86_sse2_padds_b:
253  case Intrinsic::x86_sse2_padds_w:
254  case Intrinsic::x86_avx2_padds_b:
255  case Intrinsic::x86_avx2_padds_w:
256  case Intrinsic::x86_avx512_padds_b_512:
257  case Intrinsic::x86_avx512_padds_w_512:
258  IsAddition = true;
259  break;
260  case Intrinsic::x86_sse2_psubs_b:
261  case Intrinsic::x86_sse2_psubs_w:
262  case Intrinsic::x86_avx2_psubs_b:
263  case Intrinsic::x86_avx2_psubs_w:
264  case Intrinsic::x86_avx512_psubs_b_512:
265  case Intrinsic::x86_avx512_psubs_w_512:
266  IsAddition = false;
267  break;
268  }
269 
270  auto *Arg0 = dyn_cast<Constant>(II.getOperand(0));
271  auto *Arg1 = dyn_cast<Constant>(II.getOperand(1));
272  auto VT = cast<VectorType>(II.getType());
273  auto SVT = VT->getElementType();
274  unsigned NumElems = VT->getNumElements();
275 
276  if (!Arg0 || !Arg1)
277  return nullptr;
278 
280 
281  APInt MaxValue = APInt::getSignedMaxValue(SVT->getIntegerBitWidth());
282  APInt MinValue = APInt::getSignedMinValue(SVT->getIntegerBitWidth());
283  for (unsigned i = 0; i < NumElems; ++i) {
284  auto *Elt0 = Arg0->getAggregateElement(i);
285  auto *Elt1 = Arg1->getAggregateElement(i);
286  if (isa<UndefValue>(Elt0) || isa<UndefValue>(Elt1)) {
287  Result.push_back(UndefValue::get(SVT));
288  continue;
289  }
290 
291  if (!isa<ConstantInt>(Elt0) || !isa<ConstantInt>(Elt1))
292  return nullptr;
293 
294  const APInt &Val0 = cast<ConstantInt>(Elt0)->getValue();
295  const APInt &Val1 = cast<ConstantInt>(Elt1)->getValue();
296  bool Overflow = false;
297  APInt ResultElem = IsAddition ? Val0.sadd_ov(Val1, Overflow)
298  : Val0.ssub_ov(Val1, Overflow);
299  if (Overflow)
300  ResultElem = Val0.isNegative() ? MinValue : MaxValue;
301  Result.push_back(Constant::getIntegerValue(SVT, ResultElem));
302  }
303 
304  return ConstantVector::get(Result);
305 }
306 
308  InstCombiner::BuilderTy &Builder) {
309  bool LogicalShift = false;
310  bool ShiftLeft = false;
311 
312  switch (II.getIntrinsicID()) {
313  default: llvm_unreachable("Unexpected intrinsic!");
314  case Intrinsic::x86_sse2_psra_d:
315  case Intrinsic::x86_sse2_psra_w:
316  case Intrinsic::x86_sse2_psrai_d:
317  case Intrinsic::x86_sse2_psrai_w:
318  case Intrinsic::x86_avx2_psra_d:
319  case Intrinsic::x86_avx2_psra_w:
320  case Intrinsic::x86_avx2_psrai_d:
321  case Intrinsic::x86_avx2_psrai_w:
322  case Intrinsic::x86_avx512_psra_q_128:
323  case Intrinsic::x86_avx512_psrai_q_128:
324  case Intrinsic::x86_avx512_psra_q_256:
325  case Intrinsic::x86_avx512_psrai_q_256:
326  case Intrinsic::x86_avx512_psra_d_512:
327  case Intrinsic::x86_avx512_psra_q_512:
328  case Intrinsic::x86_avx512_psra_w_512:
329  case Intrinsic::x86_avx512_psrai_d_512:
330  case Intrinsic::x86_avx512_psrai_q_512:
331  case Intrinsic::x86_avx512_psrai_w_512:
332  LogicalShift = false; ShiftLeft = false;
333  break;
334  case Intrinsic::x86_sse2_psrl_d:
335  case Intrinsic::x86_sse2_psrl_q:
336  case Intrinsic::x86_sse2_psrl_w:
337  case Intrinsic::x86_sse2_psrli_d:
338  case Intrinsic::x86_sse2_psrli_q:
339  case Intrinsic::x86_sse2_psrli_w:
340  case Intrinsic::x86_avx2_psrl_d:
341  case Intrinsic::x86_avx2_psrl_q:
342  case Intrinsic::x86_avx2_psrl_w:
343  case Intrinsic::x86_avx2_psrli_d:
344  case Intrinsic::x86_avx2_psrli_q:
345  case Intrinsic::x86_avx2_psrli_w:
346  case Intrinsic::x86_avx512_psrl_d_512:
347  case Intrinsic::x86_avx512_psrl_q_512:
348  case Intrinsic::x86_avx512_psrl_w_512:
349  case Intrinsic::x86_avx512_psrli_d_512:
350  case Intrinsic::x86_avx512_psrli_q_512:
351  case Intrinsic::x86_avx512_psrli_w_512:
352  LogicalShift = true; ShiftLeft = false;
353  break;
354  case Intrinsic::x86_sse2_psll_d:
355  case Intrinsic::x86_sse2_psll_q:
356  case Intrinsic::x86_sse2_psll_w:
357  case Intrinsic::x86_sse2_pslli_d:
358  case Intrinsic::x86_sse2_pslli_q:
359  case Intrinsic::x86_sse2_pslli_w:
360  case Intrinsic::x86_avx2_psll_d:
361  case Intrinsic::x86_avx2_psll_q:
362  case Intrinsic::x86_avx2_psll_w:
363  case Intrinsic::x86_avx2_pslli_d:
364  case Intrinsic::x86_avx2_pslli_q:
365  case Intrinsic::x86_avx2_pslli_w:
366  case Intrinsic::x86_avx512_psll_d_512:
367  case Intrinsic::x86_avx512_psll_q_512:
368  case Intrinsic::x86_avx512_psll_w_512:
369  case Intrinsic::x86_avx512_pslli_d_512:
370  case Intrinsic::x86_avx512_pslli_q_512:
371  case Intrinsic::x86_avx512_pslli_w_512:
372  LogicalShift = true; ShiftLeft = true;
373  break;
374  }
375  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
376 
377  // Simplify if count is constant.
378  auto Arg1 = II.getArgOperand(1);
379  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
380  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
381  auto CInt = dyn_cast<ConstantInt>(Arg1);
382  if (!CAZ && !CDV && !CInt)
383  return nullptr;
384 
385  APInt Count(64, 0);
386  if (CDV) {
387  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
388  // operand to compute the shift amount.
389  auto VT = cast<VectorType>(CDV->getType());
390  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
391  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
392  unsigned NumSubElts = 64 / BitWidth;
393 
394  // Concatenate the sub-elements to create the 64-bit value.
395  for (unsigned i = 0; i != NumSubElts; ++i) {
396  unsigned SubEltIdx = (NumSubElts - 1) - i;
397  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
398  Count <<= BitWidth;
399  Count |= SubElt->getValue().zextOrTrunc(64);
400  }
401  }
402  else if (CInt)
403  Count = CInt->getValue();
404 
405  auto Vec = II.getArgOperand(0);
406  auto VT = cast<VectorType>(Vec->getType());
407  auto SVT = VT->getElementType();
408  unsigned VWidth = VT->getNumElements();
409  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
410 
411  // If shift-by-zero then just return the original value.
412  if (Count.isNullValue())
413  return Vec;
414 
415  // Handle cases when Shift >= BitWidth.
416  if (Count.uge(BitWidth)) {
417  // If LogicalShift - just return zero.
418  if (LogicalShift)
419  return ConstantAggregateZero::get(VT);
420 
421  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
422  Count = APInt(64, BitWidth - 1);
423  }
424 
425  // Get a constant vector of the same type as the first operand.
426  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
427  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
428 
429  if (ShiftLeft)
430  return Builder.CreateShl(Vec, ShiftVec);
431 
432  if (LogicalShift)
433  return Builder.CreateLShr(Vec, ShiftVec);
434 
435  return Builder.CreateAShr(Vec, ShiftVec);
436 }
437 
438 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
439 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
440 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
442  InstCombiner::BuilderTy &Builder) {
443  bool LogicalShift = false;
444  bool ShiftLeft = false;
445 
446  switch (II.getIntrinsicID()) {
447  default: llvm_unreachable("Unexpected intrinsic!");
448  case Intrinsic::x86_avx2_psrav_d:
449  case Intrinsic::x86_avx2_psrav_d_256:
450  case Intrinsic::x86_avx512_psrav_q_128:
451  case Intrinsic::x86_avx512_psrav_q_256:
452  case Intrinsic::x86_avx512_psrav_d_512:
453  case Intrinsic::x86_avx512_psrav_q_512:
454  case Intrinsic::x86_avx512_psrav_w_128:
455  case Intrinsic::x86_avx512_psrav_w_256:
456  case Intrinsic::x86_avx512_psrav_w_512:
457  LogicalShift = false;
458  ShiftLeft = false;
459  break;
460  case Intrinsic::x86_avx2_psrlv_d:
461  case Intrinsic::x86_avx2_psrlv_d_256:
462  case Intrinsic::x86_avx2_psrlv_q:
463  case Intrinsic::x86_avx2_psrlv_q_256:
464  case Intrinsic::x86_avx512_psrlv_d_512:
465  case Intrinsic::x86_avx512_psrlv_q_512:
466  case Intrinsic::x86_avx512_psrlv_w_128:
467  case Intrinsic::x86_avx512_psrlv_w_256:
468  case Intrinsic::x86_avx512_psrlv_w_512:
469  LogicalShift = true;
470  ShiftLeft = false;
471  break;
472  case Intrinsic::x86_avx2_psllv_d:
473  case Intrinsic::x86_avx2_psllv_d_256:
474  case Intrinsic::x86_avx2_psllv_q:
475  case Intrinsic::x86_avx2_psllv_q_256:
476  case Intrinsic::x86_avx512_psllv_d_512:
477  case Intrinsic::x86_avx512_psllv_q_512:
478  case Intrinsic::x86_avx512_psllv_w_128:
479  case Intrinsic::x86_avx512_psllv_w_256:
480  case Intrinsic::x86_avx512_psllv_w_512:
481  LogicalShift = true;
482  ShiftLeft = true;
483  break;
484  }
485  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
486 
487  // Simplify if all shift amounts are constant/undef.
488  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
489  if (!CShift)
490  return nullptr;
491 
492  auto Vec = II.getArgOperand(0);
493  auto VT = cast<VectorType>(II.getType());
494  auto SVT = VT->getVectorElementType();
495  int NumElts = VT->getNumElements();
496  int BitWidth = SVT->getIntegerBitWidth();
497 
498  // Collect each element's shift amount.
499  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
500  bool AnyOutOfRange = false;
501  SmallVector<int, 8> ShiftAmts;
502  for (int I = 0; I < NumElts; ++I) {
503  auto *CElt = CShift->getAggregateElement(I);
504  if (CElt && isa<UndefValue>(CElt)) {
505  ShiftAmts.push_back(-1);
506  continue;
507  }
508 
509  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
510  if (!COp)
511  return nullptr;
512 
513  // Handle out of range shifts.
514  // If LogicalShift - set to BitWidth (special case).
515  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
516  APInt ShiftVal = COp->getValue();
517  if (ShiftVal.uge(BitWidth)) {
518  AnyOutOfRange = LogicalShift;
519  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
520  continue;
521  }
522 
523  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
524  }
525 
526  // If all elements out of range or UNDEF, return vector of zeros/undefs.
527  // ArithmeticShift should only hit this if they are all UNDEF.
528  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
529  if (llvm::all_of(ShiftAmts, OutOfRange)) {
530  SmallVector<Constant *, 8> ConstantVec;
531  for (int Idx : ShiftAmts) {
532  if (Idx < 0) {
533  ConstantVec.push_back(UndefValue::get(SVT));
534  } else {
535  assert(LogicalShift && "Logical shift expected");
536  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
537  }
538  }
539  return ConstantVector::get(ConstantVec);
540  }
541 
542  // We can't handle only some out of range values with generic logical shifts.
543  if (AnyOutOfRange)
544  return nullptr;
545 
546  // Build the shift amount constant vector.
547  SmallVector<Constant *, 8> ShiftVecAmts;
548  for (int Idx : ShiftAmts) {
549  if (Idx < 0)
550  ShiftVecAmts.push_back(UndefValue::get(SVT));
551  else
552  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
553  }
554  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
555 
556  if (ShiftLeft)
557  return Builder.CreateShl(Vec, ShiftVec);
558 
559  if (LogicalShift)
560  return Builder.CreateLShr(Vec, ShiftVec);
561 
562  return Builder.CreateAShr(Vec, ShiftVec);
563 }
564 
565 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
566  Value *Arg0 = II.getArgOperand(0);
567  Value *Arg1 = II.getArgOperand(1);
568  Type *ResTy = II.getType();
569 
570  // Fast all undef handling.
571  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
572  return UndefValue::get(ResTy);
573 
574  Type *ArgTy = Arg0->getType();
575  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
576  unsigned NumDstElts = ResTy->getVectorNumElements();
577  unsigned NumSrcElts = ArgTy->getVectorNumElements();
578  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
579 
580  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
581  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
582  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
583  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
584  "Unexpected packing types");
585 
586  // Constant folding.
587  auto *Cst0 = dyn_cast<Constant>(Arg0);
588  auto *Cst1 = dyn_cast<Constant>(Arg1);
589  if (!Cst0 || !Cst1)
590  return nullptr;
591 
593  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
594  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
595  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
596  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
597  auto *COp = Cst->getAggregateElement(SrcIdx);
598  if (COp && isa<UndefValue>(COp)) {
599  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
600  continue;
601  }
602 
603  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
604  if (!CInt)
605  return nullptr;
606 
607  APInt Val = CInt->getValue();
608  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
609  "Unexpected constant bitwidth");
610 
611  if (IsSigned) {
612  // PACKSS: Truncate signed value with signed saturation.
613  // Source values less than dst minint are saturated to minint.
614  // Source values greater than dst maxint are saturated to maxint.
615  if (Val.isSignedIntN(DstScalarSizeInBits))
616  Val = Val.trunc(DstScalarSizeInBits);
617  else if (Val.isNegative())
618  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
619  else
620  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
621  } else {
622  // PACKUS: Truncate signed value with unsigned saturation.
623  // Source values less than zero are saturated to zero.
624  // Source values greater than dst maxuint are saturated to maxuint.
625  if (Val.isIntN(DstScalarSizeInBits))
626  Val = Val.trunc(DstScalarSizeInBits);
627  else if (Val.isNegative())
628  Val = APInt::getNullValue(DstScalarSizeInBits);
629  else
630  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
631  }
632 
633  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
634  }
635  }
636 
637  return ConstantVector::get(Vals);
638 }
639 
640 // Replace X86-specific intrinsics with generic floor-ceil where applicable.
642  InstCombiner::BuilderTy &Builder) {
643  ConstantInt *Arg = nullptr;
644  Intrinsic::ID IntrinsicID = II.getIntrinsicID();
645 
646  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
647  IntrinsicID == Intrinsic::x86_sse41_round_sd)
648  Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
649  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
650  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
651  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
652  else
653  Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
654  if (!Arg)
655  return nullptr;
656  unsigned RoundControl = Arg->getZExtValue();
657 
658  Arg = nullptr;
659  unsigned SAE = 0;
660  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
661  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
662  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
663  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
664  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
665  Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
666  else
667  SAE = 4;
668  if (!SAE) {
669  if (!Arg)
670  return nullptr;
671  SAE = Arg->getZExtValue();
672  }
673 
674  if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
675  return nullptr;
676 
677  Value *Src, *Dst, *Mask;
678  bool IsScalar = false;
679  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
680  IntrinsicID == Intrinsic::x86_sse41_round_sd ||
681  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
682  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
683  IsScalar = true;
684  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
685  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
686  Mask = II.getArgOperand(3);
687  Value *Zero = Constant::getNullValue(Mask->getType());
688  Mask = Builder.CreateAnd(Mask, 1);
689  Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
690  Dst = II.getArgOperand(2);
691  } else
692  Dst = II.getArgOperand(0);
693  Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
694  } else {
695  Src = II.getArgOperand(0);
696  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
697  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
698  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
699  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
700  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
701  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
702  Dst = II.getArgOperand(2);
703  Mask = II.getArgOperand(3);
704  } else {
705  Dst = Src;
707  Builder.getIntNTy(Src->getType()->getVectorNumElements()));
708  }
709  }
710 
711  Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
712  Value *Res = Builder.CreateIntrinsic(ID, {Src}, &II);
713  if (!IsScalar) {
714  if (auto *C = dyn_cast<Constant>(Mask))
715  if (C->isAllOnesValue())
716  return Res;
717  auto *MaskTy = VectorType::get(
718  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
719  Mask = Builder.CreateBitCast(Mask, MaskTy);
720  unsigned Width = Src->getType()->getVectorNumElements();
721  if (MaskTy->getVectorNumElements() > Width) {
722  uint32_t Indices[4];
723  for (unsigned i = 0; i != Width; ++i)
724  Indices[i] = i;
725  Mask = Builder.CreateShuffleVector(Mask, Mask,
726  makeArrayRef(Indices, Width));
727  }
728  return Builder.CreateSelect(Mask, Res, Dst);
729  }
730  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
731  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
732  Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
733  Res = Builder.CreateSelect(Mask, Res, Dst);
734  Dst = II.getArgOperand(0);
735  }
736  return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
737 }
738 
740  Value *Arg = II.getArgOperand(0);
741  Type *ResTy = II.getType();
742  Type *ArgTy = Arg->getType();
743 
744  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
745  if (isa<UndefValue>(Arg))
746  return Constant::getNullValue(ResTy);
747 
748  // We can't easily peek through x86_mmx types.
749  if (!ArgTy->isVectorTy())
750  return nullptr;
751 
752  auto *C = dyn_cast<Constant>(Arg);
753  if (!C)
754  return nullptr;
755 
756  // Extract signbits of the vector input and pack into integer result.
757  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
758  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
759  auto *COp = C->getAggregateElement(I);
760  if (!COp)
761  return nullptr;
762  if (isa<UndefValue>(COp))
763  continue;
764 
765  auto *CInt = dyn_cast<ConstantInt>(COp);
766  auto *CFp = dyn_cast<ConstantFP>(COp);
767  if (!CInt && !CFp)
768  return nullptr;
769 
770  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
771  Result.setBit(I);
772  }
773 
774  return Constant::getIntegerValue(ResTy, Result);
775 }
776 
778  InstCombiner::BuilderTy &Builder) {
779  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
780  if (!CInt)
781  return nullptr;
782 
783  VectorType *VecTy = cast<VectorType>(II.getType());
784  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
785 
786  // The immediate permute control byte looks like this:
787  // [3:0] - zero mask for each 32-bit lane
788  // [5:4] - select one 32-bit destination lane
789  // [7:6] - select one 32-bit source lane
790 
791  uint8_t Imm = CInt->getZExtValue();
792  uint8_t ZMask = Imm & 0xf;
793  uint8_t DestLane = (Imm >> 4) & 0x3;
794  uint8_t SourceLane = (Imm >> 6) & 0x3;
795 
797 
798  // If all zero mask bits are set, this was just a weird way to
799  // generate a zero vector.
800  if (ZMask == 0xf)
801  return ZeroVector;
802 
803  // Initialize by passing all of the first source bits through.
804  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
805 
806  // We may replace the second operand with the zero vector.
807  Value *V1 = II.getArgOperand(1);
808 
809  if (ZMask) {
810  // If the zero mask is being used with a single input or the zero mask
811  // overrides the destination lane, this is a shuffle with the zero vector.
812  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
813  (ZMask & (1 << DestLane))) {
814  V1 = ZeroVector;
815  // We may still move 32-bits of the first source vector from one lane
816  // to another.
817  ShuffleMask[DestLane] = SourceLane;
818  // The zero mask may override the previous insert operation.
819  for (unsigned i = 0; i < 4; ++i)
820  if ((ZMask >> i) & 0x1)
821  ShuffleMask[i] = i + 4;
822  } else {
823  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
824  return nullptr;
825  }
826  } else {
827  // Replace the selected destination lane with the selected source lane.
828  ShuffleMask[DestLane] = SourceLane + 4;
829  }
830 
831  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
832 }
833 
834 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
835 /// or conversion to a shuffle vector.
837  ConstantInt *CILength, ConstantInt *CIIndex,
838  InstCombiner::BuilderTy &Builder) {
839  auto LowConstantHighUndef = [&](uint64_t Val) {
840  Type *IntTy64 = Type::getInt64Ty(II.getContext());
841  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
842  UndefValue::get(IntTy64)};
843  return ConstantVector::get(Args);
844  };
845 
846  // See if we're dealing with constant values.
847  Constant *C0 = dyn_cast<Constant>(Op0);
848  ConstantInt *CI0 =
849  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
850  : nullptr;
851 
852  // Attempt to constant fold.
853  if (CILength && CIIndex) {
854  // From AMD documentation: "The bit index and field length are each six
855  // bits in length other bits of the field are ignored."
856  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
857  APInt APLength = CILength->getValue().zextOrTrunc(6);
858 
859  unsigned Index = APIndex.getZExtValue();
860 
861  // From AMD documentation: "a value of zero in the field length is
862  // defined as length of 64".
863  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
864 
865  // From AMD documentation: "If the sum of the bit index + length field
866  // is greater than 64, the results are undefined".
867  unsigned End = Index + Length;
868 
869  // Note that both field index and field length are 8-bit quantities.
870  // Since variables 'Index' and 'Length' are unsigned values
871  // obtained from zero-extending field index and field length
872  // respectively, their sum should never wrap around.
873  if (End > 64)
874  return UndefValue::get(II.getType());
875 
876  // If we are inserting whole bytes, we can convert this to a shuffle.
877  // Lowering can recognize EXTRQI shuffle masks.
878  if ((Length % 8) == 0 && (Index % 8) == 0) {
879  // Convert bit indices to byte indices.
880  Length /= 8;
881  Index /= 8;
882 
883  Type *IntTy8 = Type::getInt8Ty(II.getContext());
884  Type *IntTy32 = Type::getInt32Ty(II.getContext());
885  VectorType *ShufTy = VectorType::get(IntTy8, 16);
886 
887  SmallVector<Constant *, 16> ShuffleMask;
888  for (int i = 0; i != (int)Length; ++i)
889  ShuffleMask.push_back(
890  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
891  for (int i = Length; i != 8; ++i)
892  ShuffleMask.push_back(
893  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
894  for (int i = 8; i != 16; ++i)
895  ShuffleMask.push_back(UndefValue::get(IntTy32));
896 
897  Value *SV = Builder.CreateShuffleVector(
898  Builder.CreateBitCast(Op0, ShufTy),
899  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
900  return Builder.CreateBitCast(SV, II.getType());
901  }
902 
903  // Constant Fold - shift Index'th bit to lowest position and mask off
904  // Length bits.
905  if (CI0) {
906  APInt Elt = CI0->getValue();
907  Elt.lshrInPlace(Index);
908  Elt = Elt.zextOrTrunc(Length);
909  return LowConstantHighUndef(Elt.getZExtValue());
910  }
911 
912  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
913  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
914  Value *Args[] = {Op0, CILength, CIIndex};
915  Module *M = II.getModule();
916  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
917  return Builder.CreateCall(F, Args);
918  }
919  }
920 
921  // Constant Fold - extraction from zero is always {zero, undef}.
922  if (CI0 && CI0->isZero())
923  return LowConstantHighUndef(0);
924 
925  return nullptr;
926 }
927 
928 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
929 /// folding or conversion to a shuffle vector.
931  APInt APLength, APInt APIndex,
932  InstCombiner::BuilderTy &Builder) {
933  // From AMD documentation: "The bit index and field length are each six bits
934  // in length other bits of the field are ignored."
935  APIndex = APIndex.zextOrTrunc(6);
936  APLength = APLength.zextOrTrunc(6);
937 
938  // Attempt to constant fold.
939  unsigned Index = APIndex.getZExtValue();
940 
941  // From AMD documentation: "a value of zero in the field length is
942  // defined as length of 64".
943  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
944 
945  // From AMD documentation: "If the sum of the bit index + length field
946  // is greater than 64, the results are undefined".
947  unsigned End = Index + Length;
948 
949  // Note that both field index and field length are 8-bit quantities.
950  // Since variables 'Index' and 'Length' are unsigned values
951  // obtained from zero-extending field index and field length
952  // respectively, their sum should never wrap around.
953  if (End > 64)
954  return UndefValue::get(II.getType());
955 
956  // If we are inserting whole bytes, we can convert this to a shuffle.
957  // Lowering can recognize INSERTQI shuffle masks.
958  if ((Length % 8) == 0 && (Index % 8) == 0) {
959  // Convert bit indices to byte indices.
960  Length /= 8;
961  Index /= 8;
962 
963  Type *IntTy8 = Type::getInt8Ty(II.getContext());
964  Type *IntTy32 = Type::getInt32Ty(II.getContext());
965  VectorType *ShufTy = VectorType::get(IntTy8, 16);
966 
967  SmallVector<Constant *, 16> ShuffleMask;
968  for (int i = 0; i != (int)Index; ++i)
969  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
970  for (int i = 0; i != (int)Length; ++i)
971  ShuffleMask.push_back(
972  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
973  for (int i = Index + Length; i != 8; ++i)
974  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
975  for (int i = 8; i != 16; ++i)
976  ShuffleMask.push_back(UndefValue::get(IntTy32));
977 
978  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
979  Builder.CreateBitCast(Op1, ShufTy),
980  ConstantVector::get(ShuffleMask));
981  return Builder.CreateBitCast(SV, II.getType());
982  }
983 
984  // See if we're dealing with constant values.
985  Constant *C0 = dyn_cast<Constant>(Op0);
986  Constant *C1 = dyn_cast<Constant>(Op1);
987  ConstantInt *CI00 =
988  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
989  : nullptr;
990  ConstantInt *CI10 =
991  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
992  : nullptr;
993 
994  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
995  if (CI00 && CI10) {
996  APInt V00 = CI00->getValue();
997  APInt V10 = CI10->getValue();
998  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
999  V00 = V00 & ~Mask;
1000  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
1001  APInt Val = V00 | V10;
1002  Type *IntTy64 = Type::getInt64Ty(II.getContext());
1003  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
1004  UndefValue::get(IntTy64)};
1005  return ConstantVector::get(Args);
1006  }
1007 
1008  // If we were an INSERTQ call, we'll save demanded elements if we convert to
1009  // INSERTQI.
1010  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1011  Type *IntTy8 = Type::getInt8Ty(II.getContext());
1012  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
1013  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
1014 
1015  Value *Args[] = {Op0, Op1, CILength, CIIndex};
1016  Module *M = II.getModule();
1017  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
1018  return Builder.CreateCall(F, Args);
1019  }
1020 
1021  return nullptr;
1022 }
1023 
1024 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
1026  InstCombiner::BuilderTy &Builder) {
1027  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1028  if (!V)
1029  return nullptr;
1030 
1031  auto *VecTy = cast<VectorType>(II.getType());
1032  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1033  unsigned NumElts = VecTy->getNumElements();
1034  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1035  "Unexpected number of elements in shuffle mask!");
1036 
1037  // Construct a shuffle mask from constant integers or UNDEFs.
1038  Constant *Indexes[64] = {nullptr};
1039 
1040  // Each byte in the shuffle control mask forms an index to permute the
1041  // corresponding byte in the destination operand.
1042  for (unsigned I = 0; I < NumElts; ++I) {
1043  Constant *COp = V->getAggregateElement(I);
1044  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1045  return nullptr;
1046 
1047  if (isa<UndefValue>(COp)) {
1048  Indexes[I] = UndefValue::get(MaskEltTy);
1049  continue;
1050  }
1051 
1052  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1053 
1054  // If the most significant bit (bit[7]) of each byte of the shuffle
1055  // control mask is set, then zero is written in the result byte.
1056  // The zero vector is in the right-hand side of the resulting
1057  // shufflevector.
1058 
1059  // The value of each index for the high 128-bit lane is the least
1060  // significant 4 bits of the respective shuffle control byte.
1061  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1062  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1063  }
1064 
1065  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1066  auto V1 = II.getArgOperand(0);
1067  auto V2 = Constant::getNullValue(VecTy);
1068  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1069 }
1070 
1071 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1073  InstCombiner::BuilderTy &Builder) {
1074  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1075  if (!V)
1076  return nullptr;
1077 
1078  auto *VecTy = cast<VectorType>(II.getType());
1079  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1080  unsigned NumElts = VecTy->getVectorNumElements();
1081  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1082  unsigned NumLaneElts = IsPD ? 2 : 4;
1083  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1084 
1085  // Construct a shuffle mask from constant integers or UNDEFs.
1086  Constant *Indexes[16] = {nullptr};
1087 
1088  // The intrinsics only read one or two bits, clear the rest.
1089  for (unsigned I = 0; I < NumElts; ++I) {
1090  Constant *COp = V->getAggregateElement(I);
1091  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1092  return nullptr;
1093 
1094  if (isa<UndefValue>(COp)) {
1095  Indexes[I] = UndefValue::get(MaskEltTy);
1096  continue;
1097  }
1098 
1099  APInt Index = cast<ConstantInt>(COp)->getValue();
1100  Index = Index.zextOrTrunc(32).getLoBits(2);
1101 
1102  // The PD variants uses bit 1 to select per-lane element index, so
1103  // shift down to convert to generic shuffle mask index.
1104  if (IsPD)
1105  Index.lshrInPlace(1);
1106 
1107  // The _256 variants are a bit trickier since the mask bits always index
1108  // into the corresponding 128 half. In order to convert to a generic
1109  // shuffle, we have to make that explicit.
1110  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1111 
1112  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1113  }
1114 
1115  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1116  auto V1 = II.getArgOperand(0);
1117  auto V2 = UndefValue::get(V1->getType());
1118  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1119 }
1120 
1121 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1123  InstCombiner::BuilderTy &Builder) {
1124  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1125  if (!V)
1126  return nullptr;
1127 
1128  auto *VecTy = cast<VectorType>(II.getType());
1129  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1130  unsigned Size = VecTy->getNumElements();
1131  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1132  "Unexpected shuffle mask size");
1133 
1134  // Construct a shuffle mask from constant integers or UNDEFs.
1135  Constant *Indexes[64] = {nullptr};
1136 
1137  for (unsigned I = 0; I < Size; ++I) {
1138  Constant *COp = V->getAggregateElement(I);
1139  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1140  return nullptr;
1141 
1142  if (isa<UndefValue>(COp)) {
1143  Indexes[I] = UndefValue::get(MaskEltTy);
1144  continue;
1145  }
1146 
1147  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1148  Index &= Size - 1;
1149  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1150  }
1151 
1152  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1153  auto V1 = II.getArgOperand(0);
1154  auto V2 = UndefValue::get(VecTy);
1155  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1156 }
1157 
1158 /// Decode XOP integer vector comparison intrinsics.
1160  InstCombiner::BuilderTy &Builder,
1161  bool IsSigned) {
1162  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1163  uint64_t Imm = CInt->getZExtValue() & 0x7;
1164  VectorType *VecTy = cast<VectorType>(II.getType());
1166 
1167  switch (Imm) {
1168  case 0x0:
1169  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1170  break;
1171  case 0x1:
1172  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1173  break;
1174  case 0x2:
1175  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1176  break;
1177  case 0x3:
1178  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1179  break;
1180  case 0x4:
1181  Pred = ICmpInst::ICMP_EQ; break;
1182  case 0x5:
1183  Pred = ICmpInst::ICMP_NE; break;
1184  case 0x6:
1185  return ConstantInt::getSigned(VecTy, 0); // FALSE
1186  case 0x7:
1187  return ConstantInt::getSigned(VecTy, -1); // TRUE
1188  }
1189 
1190  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1191  II.getArgOperand(1)))
1192  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1193  }
1194  return nullptr;
1195 }
1196 
1198  auto *ConstMask = dyn_cast<Constant>(Mask);
1199  if (!ConstMask)
1200  return false;
1201  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1202  return true;
1203  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1204  ++I) {
1205  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1206  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1207  continue;
1208  return false;
1209  }
1210  return true;
1211 }
1212 
1214  InstCombiner::BuilderTy &Builder) {
1215  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1216  // argument.
1217  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1218  Value *LoadPtr = II.getArgOperand(0);
1219  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1220  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1221  }
1222 
1223  return nullptr;
1224 }
1225 
1227  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1228  if (!ConstMask)
1229  return nullptr;
1230 
1231  // If the mask is all zeros, this instruction does nothing.
1232  if (ConstMask->isNullValue())
1233  return IC.eraseInstFromFunction(II);
1234 
1235  // If the mask is all ones, this is a plain vector store of the 1st argument.
1236  if (ConstMask->isAllOnesValue()) {
1237  Value *StorePtr = II.getArgOperand(1);
1238  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1239  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1240  }
1241 
1242  return nullptr;
1243 }
1244 
1246  // If the mask is all zeros, return the "passthru" argument of the gather.
1247  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1248  if (ConstMask && ConstMask->isNullValue())
1249  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1250 
1251  return nullptr;
1252 }
1253 
1254 /// This function transforms launder.invariant.group and strip.invariant.group
1255 /// like:
1256 /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
1257 /// launder(strip(%x)) -> launder(%x)
1258 /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
1259 /// strip(launder(%x)) -> strip(%x)
1260 /// This is legal because it preserves the most recent information about
1261 /// the presence or absence of invariant.group.
1263  InstCombiner &IC) {
1264  auto *Arg = II.getArgOperand(0);
1265  auto *StrippedArg = Arg->stripPointerCasts();
1266  auto *StrippedInvariantGroupsArg = Arg->stripPointerCastsAndInvariantGroups();
1267  if (StrippedArg == StrippedInvariantGroupsArg)
1268  return nullptr; // No launders/strips to remove.
1269 
1270  Value *Result = nullptr;
1271 
1272  if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
1273  Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
1274  else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
1275  Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
1276  else
1278  "simplifyInvariantGroupIntrinsic only handles launder and strip");
1279  if (Result->getType()->getPointerAddressSpace() !=
1281  Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
1282  if (Result->getType() != II.getType())
1283  Result = IC.Builder.CreateBitCast(Result, II.getType());
1284 
1285  return cast<Instruction>(Result);
1286 }
1287 
1289  // If the mask is all zeros, a scatter does nothing.
1290  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1291  if (ConstMask && ConstMask->isNullValue())
1292  return IC.eraseInstFromFunction(II);
1293 
1294  return nullptr;
1295 }
1296 
1298  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1299  II.getIntrinsicID() == Intrinsic::ctlz) &&
1300  "Expected cttz or ctlz intrinsic");
1301  Value *Op0 = II.getArgOperand(0);
1302 
1303  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1304 
1305  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1306  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1307  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1308  : Known.countMaxLeadingZeros();
1309  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1310  : Known.countMinLeadingZeros();
1311 
1312  // If all bits above (ctlz) or below (cttz) the first known one are known
1313  // zero, this value is constant.
1314  // FIXME: This should be in InstSimplify because we're replacing an
1315  // instruction with a constant.
1316  if (PossibleZeros == DefiniteZeros) {
1317  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1318  return IC.replaceInstUsesWith(II, C);
1319  }
1320 
1321  // If the input to cttz/ctlz is known to be non-zero,
1322  // then change the 'ZeroIsUndef' parameter to 'true'
1323  // because we know the zero behavior can't affect the result.
1324  if (!Known.One.isNullValue() ||
1325  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1326  &IC.getDominatorTree())) {
1327  if (!match(II.getArgOperand(1), m_One())) {
1328  II.setOperand(1, IC.Builder.getTrue());
1329  return &II;
1330  }
1331  }
1332 
1333  // Add range metadata since known bits can't completely reflect what we know.
1334  // TODO: Handle splat vectors.
1335  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1336  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1337  Metadata *LowAndHigh[] = {
1338  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1339  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1342  return &II;
1343  }
1344 
1345  return nullptr;
1346 }
1347 
1349  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1350  "Expected ctpop intrinsic");
1351  Value *Op0 = II.getArgOperand(0);
1352  // FIXME: Try to simplify vectors of integers.
1353  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1354  if (!IT)
1355  return nullptr;
1356 
1357  unsigned BitWidth = IT->getBitWidth();
1358  KnownBits Known(BitWidth);
1359  IC.computeKnownBits(Op0, Known, 0, &II);
1360 
1361  unsigned MinCount = Known.countMinPopulation();
1362  unsigned MaxCount = Known.countMaxPopulation();
1363 
1364  // Add range metadata since known bits can't completely reflect what we know.
1365  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1366  Metadata *LowAndHigh[] = {
1368  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1371  return &II;
1372  }
1373 
1374  return nullptr;
1375 }
1376 
1377 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1378 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1379 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1381  Value *Ptr = II.getOperand(0);
1382  Value *Mask = II.getOperand(1);
1383  Constant *ZeroVec = Constant::getNullValue(II.getType());
1384 
1385  // Special case a zero mask since that's not a ConstantDataVector.
1386  // This masked load instruction creates a zero vector.
1387  if (isa<ConstantAggregateZero>(Mask))
1388  return IC.replaceInstUsesWith(II, ZeroVec);
1389 
1390  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1391  if (!ConstMask)
1392  return nullptr;
1393 
1394  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1395  // to allow target-independent optimizations.
1396 
1397  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1398  // the LLVM intrinsic definition for the pointer argument.
1399  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1400  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1401  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1402 
1403  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1404  // on each element's most significant bit (the sign bit).
1405  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1406 
1407  // The pass-through vector for an x86 masked load is a zero vector.
1408  CallInst *NewMaskedLoad =
1409  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1410  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1411 }
1412 
1413 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1414 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1415 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1417  Value *Ptr = II.getOperand(0);
1418  Value *Mask = II.getOperand(1);
1419  Value *Vec = II.getOperand(2);
1420 
1421  // Special case a zero mask since that's not a ConstantDataVector:
1422  // this masked store instruction does nothing.
1423  if (isa<ConstantAggregateZero>(Mask)) {
1424  IC.eraseInstFromFunction(II);
1425  return true;
1426  }
1427 
1428  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1429  // anything else at this level.
1430  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1431  return false;
1432 
1433  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1434  if (!ConstMask)
1435  return false;
1436 
1437  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1438  // to allow target-independent optimizations.
1439 
1440  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1441  // the LLVM intrinsic definition for the pointer argument.
1442  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1443  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1444  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1445 
1446  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1447  // on each element's most significant bit (the sign bit).
1448  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1449 
1450  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1451 
1452  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1453  IC.eraseInstFromFunction(II);
1454  return true;
1455 }
1456 
1457 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1458 //
1459 // A single NaN input is folded to minnum, so we rely on that folding for
1460 // handling NaNs.
1461 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1462  const APFloat &Src2) {
1463  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1464 
1465  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1466  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1467  if (Cmp0 == APFloat::cmpEqual)
1468  return maxnum(Src1, Src2);
1469 
1470  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1471  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1472  if (Cmp1 == APFloat::cmpEqual)
1473  return maxnum(Src0, Src2);
1474 
1475  return maxnum(Src0, Src1);
1476 }
1477 
1478 /// Convert a table lookup to shufflevector if the mask is constant.
1479 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
1480 /// which case we could lower the shufflevector with rev64 instructions
1481 /// as it's actually a byte reverse.
1483  InstCombiner::BuilderTy &Builder) {
1484  // Bail out if the mask is not a constant.
1485  auto *C = dyn_cast<Constant>(II.getArgOperand(1));
1486  if (!C)
1487  return nullptr;
1488 
1489  auto *VecTy = cast<VectorType>(II.getType());
1490  unsigned NumElts = VecTy->getNumElements();
1491 
1492  // Only perform this transformation for <8 x i8> vector types.
1493  if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
1494  return nullptr;
1495 
1496  uint32_t Indexes[8];
1497 
1498  for (unsigned I = 0; I < NumElts; ++I) {
1499  Constant *COp = C->getAggregateElement(I);
1500 
1501  if (!COp || !isa<ConstantInt>(COp))
1502  return nullptr;
1503 
1504  Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
1505 
1506  // Make sure the mask indices are in range.
1507  if (Indexes[I] >= NumElts)
1508  return nullptr;
1509  }
1510 
1511  auto *ShuffleMask = ConstantDataVector::get(II.getContext(),
1512  makeArrayRef(Indexes));
1513  auto *V1 = II.getArgOperand(0);
1514  auto *V2 = Constant::getNullValue(V1->getType());
1515  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1516 }
1517 
1518 /// Convert a vector load intrinsic into a simple llvm load instruction.
1519 /// This is beneficial when the underlying object being addressed comes
1520 /// from a constant, since we get constant-folding for free.
1522  unsigned MemAlign,
1523  InstCombiner::BuilderTy &Builder) {
1524  auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
1525 
1526  if (!IntrAlign)
1527  return nullptr;
1528 
1529  unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign ?
1530  MemAlign : IntrAlign->getLimitedValue();
1531 
1532  if (!isPowerOf2_32(Alignment))
1533  return nullptr;
1534 
1535  auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
1536  PointerType::get(II.getType(), 0));
1537  return Builder.CreateAlignedLoad(BCastInst, Alignment);
1538 }
1539 
1540 // Returns true iff the 2 intrinsics have the same operands, limiting the
1541 // comparison to the first NumOperands.
1542 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1543  unsigned NumOperands) {
1544  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1545  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1546  for (unsigned i = 0; i < NumOperands; i++)
1547  if (I.getArgOperand(i) != E.getArgOperand(i))
1548  return false;
1549  return true;
1550 }
1551 
1552 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1553 // immediately followed by an end (ignoring debuginfo or other
1554 // start/end intrinsics in between). As this handles only the most trivial
1555 // cases, tracking the nesting level is not needed:
1556 //
1557 // call @llvm.foo.start(i1 0) ; &I
1558 // call @llvm.foo.start(i1 0)
1559 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1560 // call @llvm.foo.end(i1 0)
1561 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1562  unsigned EndID, InstCombiner &IC) {
1563  assert(I.getIntrinsicID() == StartID &&
1564  "Start intrinsic does not have expected ID");
1565  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1566  for (++BI; BI != BE; ++BI) {
1567  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1568  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1569  continue;
1570  if (E->getIntrinsicID() == EndID &&
1571  haveSameOperands(I, *E, E->getNumArgOperands())) {
1572  IC.eraseInstFromFunction(*E);
1573  IC.eraseInstFromFunction(I);
1574  return true;
1575  }
1576  }
1577  break;
1578  }
1579 
1580  return false;
1581 }
1582 
1583 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1585  // Each NVVM intrinsic we can simplify can be replaced with one of:
1586  //
1587  // * an LLVM intrinsic,
1588  // * an LLVM cast operation,
1589  // * an LLVM binary operation, or
1590  // * ad-hoc LLVM IR for the particular operation.
1591 
1592  // Some transformations are only valid when the module's
1593  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1594  // transformations are valid regardless of the module's ftz setting.
1595  enum FtzRequirementTy {
1596  FTZ_Any, // Any ftz setting is ok.
1597  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1598  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1599  };
1600  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1601  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1602  // simplify.
1603  enum SpecialCase {
1604  SPC_Reciprocal,
1605  };
1606 
1607  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1608  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1609  struct SimplifyAction {
1610  // Invariant: At most one of these Optionals has a value.
1614  Optional<SpecialCase> Special;
1615 
1616  FtzRequirementTy FtzRequirement = FTZ_Any;
1617 
1618  SimplifyAction() = default;
1619 
1620  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1621  : IID(IID), FtzRequirement(FtzReq) {}
1622 
1623  // Cast operations don't have anything to do with FTZ, so we skip that
1624  // argument.
1625  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1626 
1627  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1628  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1629 
1630  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1631  : Special(Special), FtzRequirement(FtzReq) {}
1632  };
1633 
1634  // Try to generate a SimplifyAction describing how to replace our
1635  // IntrinsicInstr with target-generic LLVM IR.
1636  const SimplifyAction Action = [II]() -> SimplifyAction {
1637  switch (II->getIntrinsicID()) {
1638  // NVVM intrinsics that map directly to LLVM intrinsics.
1639  case Intrinsic::nvvm_ceil_d:
1640  return {Intrinsic::ceil, FTZ_Any};
1641  case Intrinsic::nvvm_ceil_f:
1642  return {Intrinsic::ceil, FTZ_MustBeOff};
1643  case Intrinsic::nvvm_ceil_ftz_f:
1644  return {Intrinsic::ceil, FTZ_MustBeOn};
1645  case Intrinsic::nvvm_fabs_d:
1646  return {Intrinsic::fabs, FTZ_Any};
1647  case Intrinsic::nvvm_fabs_f:
1648  return {Intrinsic::fabs, FTZ_MustBeOff};
1649  case Intrinsic::nvvm_fabs_ftz_f:
1650  return {Intrinsic::fabs, FTZ_MustBeOn};
1651  case Intrinsic::nvvm_floor_d:
1652  return {Intrinsic::floor, FTZ_Any};
1653  case Intrinsic::nvvm_floor_f:
1654  return {Intrinsic::floor, FTZ_MustBeOff};
1655  case Intrinsic::nvvm_floor_ftz_f:
1656  return {Intrinsic::floor, FTZ_MustBeOn};
1657  case Intrinsic::nvvm_fma_rn_d:
1658  return {Intrinsic::fma, FTZ_Any};
1659  case Intrinsic::nvvm_fma_rn_f:
1660  return {Intrinsic::fma, FTZ_MustBeOff};
1661  case Intrinsic::nvvm_fma_rn_ftz_f:
1662  return {Intrinsic::fma, FTZ_MustBeOn};
1663  case Intrinsic::nvvm_fmax_d:
1664  return {Intrinsic::maxnum, FTZ_Any};
1665  case Intrinsic::nvvm_fmax_f:
1666  return {Intrinsic::maxnum, FTZ_MustBeOff};
1667  case Intrinsic::nvvm_fmax_ftz_f:
1668  return {Intrinsic::maxnum, FTZ_MustBeOn};
1669  case Intrinsic::nvvm_fmin_d:
1670  return {Intrinsic::minnum, FTZ_Any};
1671  case Intrinsic::nvvm_fmin_f:
1672  return {Intrinsic::minnum, FTZ_MustBeOff};
1673  case Intrinsic::nvvm_fmin_ftz_f:
1674  return {Intrinsic::minnum, FTZ_MustBeOn};
1675  case Intrinsic::nvvm_round_d:
1676  return {Intrinsic::round, FTZ_Any};
1677  case Intrinsic::nvvm_round_f:
1678  return {Intrinsic::round, FTZ_MustBeOff};
1679  case Intrinsic::nvvm_round_ftz_f:
1680  return {Intrinsic::round, FTZ_MustBeOn};
1681  case Intrinsic::nvvm_sqrt_rn_d:
1682  return {Intrinsic::sqrt, FTZ_Any};
1683  case Intrinsic::nvvm_sqrt_f:
1684  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1685  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1686  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1687  // the versions with explicit ftz-ness.
1688  return {Intrinsic::sqrt, FTZ_Any};
1689  case Intrinsic::nvvm_sqrt_rn_f:
1690  return {Intrinsic::sqrt, FTZ_MustBeOff};
1691  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1692  return {Intrinsic::sqrt, FTZ_MustBeOn};
1693  case Intrinsic::nvvm_trunc_d:
1694  return {Intrinsic::trunc, FTZ_Any};
1695  case Intrinsic::nvvm_trunc_f:
1696  return {Intrinsic::trunc, FTZ_MustBeOff};
1697  case Intrinsic::nvvm_trunc_ftz_f:
1698  return {Intrinsic::trunc, FTZ_MustBeOn};
1699 
1700  // NVVM intrinsics that map to LLVM cast operations.
1701  //
1702  // Note that llvm's target-generic conversion operators correspond to the rz
1703  // (round to zero) versions of the nvvm conversion intrinsics, even though
1704  // most everything else here uses the rn (round to nearest even) nvvm ops.
1705  case Intrinsic::nvvm_d2i_rz:
1706  case Intrinsic::nvvm_f2i_rz:
1707  case Intrinsic::nvvm_d2ll_rz:
1708  case Intrinsic::nvvm_f2ll_rz:
1709  return {Instruction::FPToSI};
1710  case Intrinsic::nvvm_d2ui_rz:
1711  case Intrinsic::nvvm_f2ui_rz:
1712  case Intrinsic::nvvm_d2ull_rz:
1713  case Intrinsic::nvvm_f2ull_rz:
1714  return {Instruction::FPToUI};
1715  case Intrinsic::nvvm_i2d_rz:
1716  case Intrinsic::nvvm_i2f_rz:
1717  case Intrinsic::nvvm_ll2d_rz:
1718  case Intrinsic::nvvm_ll2f_rz:
1719  return {Instruction::SIToFP};
1720  case Intrinsic::nvvm_ui2d_rz:
1721  case Intrinsic::nvvm_ui2f_rz:
1722  case Intrinsic::nvvm_ull2d_rz:
1723  case Intrinsic::nvvm_ull2f_rz:
1724  return {Instruction::UIToFP};
1725 
1726  // NVVM intrinsics that map to LLVM binary ops.
1727  case Intrinsic::nvvm_add_rn_d:
1728  return {Instruction::FAdd, FTZ_Any};
1729  case Intrinsic::nvvm_add_rn_f:
1730  return {Instruction::FAdd, FTZ_MustBeOff};
1731  case Intrinsic::nvvm_add_rn_ftz_f:
1732  return {Instruction::FAdd, FTZ_MustBeOn};
1733  case Intrinsic::nvvm_mul_rn_d:
1734  return {Instruction::FMul, FTZ_Any};
1735  case Intrinsic::nvvm_mul_rn_f:
1736  return {Instruction::FMul, FTZ_MustBeOff};
1737  case Intrinsic::nvvm_mul_rn_ftz_f:
1738  return {Instruction::FMul, FTZ_MustBeOn};
1739  case Intrinsic::nvvm_div_rn_d:
1740  return {Instruction::FDiv, FTZ_Any};
1741  case Intrinsic::nvvm_div_rn_f:
1742  return {Instruction::FDiv, FTZ_MustBeOff};
1743  case Intrinsic::nvvm_div_rn_ftz_f:
1744  return {Instruction::FDiv, FTZ_MustBeOn};
1745 
1746  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1747  // need special handling.
1748  //
1749  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1750  // as well.
1751  case Intrinsic::nvvm_rcp_rn_d:
1752  return {SPC_Reciprocal, FTZ_Any};
1753  case Intrinsic::nvvm_rcp_rn_f:
1754  return {SPC_Reciprocal, FTZ_MustBeOff};
1755  case Intrinsic::nvvm_rcp_rn_ftz_f:
1756  return {SPC_Reciprocal, FTZ_MustBeOn};
1757 
1758  // We do not currently simplify intrinsics that give an approximate answer.
1759  // These include:
1760  //
1761  // - nvvm_cos_approx_{f,ftz_f}
1762  // - nvvm_ex2_approx_{d,f,ftz_f}
1763  // - nvvm_lg2_approx_{d,f,ftz_f}
1764  // - nvvm_sin_approx_{f,ftz_f}
1765  // - nvvm_sqrt_approx_{f,ftz_f}
1766  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1767  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1768  // - nvvm_rcp_approx_ftz_d
1769  //
1770  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1771  // means that fastmath is enabled in the intrinsic. Unfortunately only
1772  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1773  // information gets lost and we can't select on it.
1774  //
1775  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1776  // lower them to "fast fdiv".
1777 
1778  default:
1779  return {};
1780  }
1781  }();
1782 
1783  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1784  // can bail out now. (Notice that in the case that IID is not an NVVM
1785  // intrinsic, we don't have to look up any module metadata, as
1786  // FtzRequirementTy will be FTZ_Any.)
1787  if (Action.FtzRequirement != FTZ_Any) {
1788  bool FtzEnabled =
1789  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1790  "true";
1791 
1792  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1793  return nullptr;
1794  }
1795 
1796  // Simplify to target-generic intrinsic.
1797  if (Action.IID) {
1799  // All the target-generic intrinsics currently of interest to us have one
1800  // type argument, equal to that of the nvvm intrinsic's argument.
1801  Type *Tys[] = {II->getArgOperand(0)->getType()};
1802  return CallInst::Create(
1803  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1804  }
1805 
1806  // Simplify to target-generic binary op.
1807  if (Action.BinaryOp)
1808  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1809  II->getArgOperand(1), II->getName());
1810 
1811  // Simplify to target-generic cast op.
1812  if (Action.CastOp)
1813  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1814  II->getName());
1815 
1816  // All that's left are the special cases.
1817  if (!Action.Special)
1818  return nullptr;
1819 
1820  switch (*Action.Special) {
1821  case SPC_Reciprocal:
1822  // Simplify reciprocal.
1823  return BinaryOperator::Create(
1824  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1825  II->getArgOperand(0), II->getName());
1826  }
1827  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1828 }
1829 
1831  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1832  return nullptr;
1833 }
1834 
1836  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1837  return nullptr;
1838 }
1839 
1840 /// CallInst simplification. This mostly only handles folding of intrinsic
1841 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1842 /// lifting.
1844  if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
1845  return replaceInstUsesWith(CI, V);
1846 
1847  if (isFreeCall(&CI, &TLI))
1848  return visitFree(CI);
1849 
1850  // If the caller function is nounwind, mark the call as nounwind, even if the
1851  // callee isn't.
1852  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1853  CI.setDoesNotThrow();
1854  return &CI;
1855  }
1856 
1857  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1858  if (!II) return visitCallSite(&CI);
1859 
1860  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1861  // visitCallSite.
1862  if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1863  bool Changed = false;
1864 
1865  // memmove/cpy/set of zero bytes is a noop.
1866  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1867  if (NumBytes->isNullValue())
1868  return eraseInstFromFunction(CI);
1869 
1870  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1871  if (CI->getZExtValue() == 1) {
1872  // Replace the instruction with just byte operations. We would
1873  // transform other cases to loads/stores, but we don't know if
1874  // alignment is sufficient.
1875  }
1876  }
1877 
1878  // No other transformations apply to volatile transfers.
1879  if (auto *M = dyn_cast<MemIntrinsic>(MI))
1880  if (M->isVolatile())
1881  return nullptr;
1882 
1883  // If we have a memmove and the source operation is a constant global,
1884  // then the source and dest pointers can't alias, so we can change this
1885  // into a call to memcpy.
1886  if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1887  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1888  if (GVSrc->isConstant()) {
1889  Module *M = CI.getModule();
1890  Intrinsic::ID MemCpyID =
1891  isa<AtomicMemMoveInst>(MMI)
1892  ? Intrinsic::memcpy_element_unordered_atomic
1893  : Intrinsic::memcpy;
1894  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1895  CI.getArgOperand(1)->getType(),
1896  CI.getArgOperand(2)->getType() };
1897  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1898  Changed = true;
1899  }
1900  }
1901 
1902  if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1903  // memmove(x,x,size) -> noop.
1904  if (MTI->getSource() == MTI->getDest())
1905  return eraseInstFromFunction(CI);
1906  }
1907 
1908  // If we can determine a pointer alignment that is bigger than currently
1909  // set, update the alignment.
1910  if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1911  if (Instruction *I = SimplifyAnyMemTransfer(MTI))
1912  return I;
1913  } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1914  if (Instruction *I = SimplifyAnyMemSet(MSI))
1915  return I;
1916  }
1917 
1918  if (Changed) return II;
1919  }
1920 
1921  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1922  return I;
1923 
1924  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1925  unsigned DemandedWidth) {
1926  APInt UndefElts(Width, 0);
1927  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1928  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1929  };
1930 
1931  switch (II->getIntrinsicID()) {
1932  default: break;
1933  case Intrinsic::objectsize:
1934  if (ConstantInt *N =
1935  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1936  return replaceInstUsesWith(CI, N);
1937  return nullptr;
1938  case Intrinsic::bswap: {
1939  Value *IIOperand = II->getArgOperand(0);
1940  Value *X = nullptr;
1941 
1942  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1943  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1944  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1945  IIOperand->getType()->getPrimitiveSizeInBits();
1946  Value *CV = ConstantInt::get(X->getType(), C);
1947  Value *V = Builder.CreateLShr(X, CV);
1948  return new TruncInst(V, IIOperand->getType());
1949  }
1950  break;
1951  }
1952  case Intrinsic::masked_load:
1953  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1954  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1955  break;
1956  case Intrinsic::masked_store:
1957  return simplifyMaskedStore(*II, *this);
1958  case Intrinsic::masked_gather:
1959  return simplifyMaskedGather(*II, *this);
1960  case Intrinsic::masked_scatter:
1961  return simplifyMaskedScatter(*II, *this);
1962  case Intrinsic::launder_invariant_group:
1963  case Intrinsic::strip_invariant_group:
1964  if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
1965  return replaceInstUsesWith(*II, SkippedBarrier);
1966  break;
1967  case Intrinsic::powi:
1968  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1969  // 0 and 1 are handled in instsimplify
1970 
1971  // powi(x, -1) -> 1/x
1972  if (Power->isMinusOne())
1973  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1974  II->getArgOperand(0));
1975  // powi(x, 2) -> x*x
1976  if (Power->equalsInt(2))
1977  return BinaryOperator::CreateFMul(II->getArgOperand(0),
1978  II->getArgOperand(0));
1979  }
1980  break;
1981 
1982  case Intrinsic::cttz:
1983  case Intrinsic::ctlz:
1984  if (auto *I = foldCttzCtlz(*II, *this))
1985  return I;
1986  break;
1987 
1988  case Intrinsic::ctpop:
1989  if (auto *I = foldCtpop(*II, *this))
1990  return I;
1991  break;
1992 
1993  case Intrinsic::uadd_with_overflow:
1994  case Intrinsic::sadd_with_overflow:
1995  case Intrinsic::umul_with_overflow:
1996  case Intrinsic::smul_with_overflow:
1997  if (isa<Constant>(II->getArgOperand(0)) &&
1998  !isa<Constant>(II->getArgOperand(1))) {
1999  // Canonicalize constants into the RHS.
2000  Value *LHS = II->getArgOperand(0);
2001  II->setArgOperand(0, II->getArgOperand(1));
2002  II->setArgOperand(1, LHS);
2003  return II;
2004  }
2006 
2007  case Intrinsic::usub_with_overflow:
2008  case Intrinsic::ssub_with_overflow: {
2009  OverflowCheckFlavor OCF =
2011  assert(OCF != OCF_INVALID && "unexpected!");
2012 
2013  Value *OperationResult = nullptr;
2014  Constant *OverflowResult = nullptr;
2015  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
2016  *II, OperationResult, OverflowResult))
2017  return CreateOverflowTuple(II, OperationResult, OverflowResult);
2018 
2019  break;
2020  }
2021 
2022  case Intrinsic::minnum:
2023  case Intrinsic::maxnum: {
2024  Value *Arg0 = II->getArgOperand(0);
2025  Value *Arg1 = II->getArgOperand(1);
2026  // Canonicalize constants to the RHS.
2027  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
2028  II->setArgOperand(0, Arg1);
2029  II->setArgOperand(1, Arg0);
2030  return II;
2031  }
2032 
2033  Value *X, *Y;
2034  if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2035  (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2036  // If both operands are negated, invert the call and negate the result:
2037  // minnum(-X, -Y) --> -(maxnum(X, Y))
2038  // maxnum(-X, -Y) --> -(minnum(X, Y))
2039  Intrinsic::ID NewIID = II->getIntrinsicID() == Intrinsic::maxnum ?
2041  Value *NewCall = Builder.CreateIntrinsic(NewIID, { X, Y }, II);
2042  Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall);
2043  FNeg->copyIRFlags(II);
2044  return FNeg;
2045  }
2046  break;
2047  }
2048  case Intrinsic::fmuladd: {
2049  // Canonicalize fast fmuladd to the separate fmul + fadd.
2050  if (II->isFast()) {
2051  BuilderTy::FastMathFlagGuard Guard(Builder);
2052  Builder.setFastMathFlags(II->getFastMathFlags());
2053  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2054  II->getArgOperand(1));
2055  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2056  Add->takeName(II);
2057  return replaceInstUsesWith(*II, Add);
2058  }
2059 
2061  }
2062  case Intrinsic::fma: {
2063  Value *Src0 = II->getArgOperand(0);
2064  Value *Src1 = II->getArgOperand(1);
2065 
2066  // Canonicalize constant multiply operand to Src1.
2067  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2068  II->setArgOperand(0, Src1);
2069  II->setArgOperand(1, Src0);
2070  std::swap(Src0, Src1);
2071  }
2072 
2073  // fma fneg(x), fneg(y), z -> fma x, y, z
2074  Value *X, *Y;
2075  if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2076  II->setArgOperand(0, X);
2077  II->setArgOperand(1, Y);
2078  return II;
2079  }
2080 
2081  // fma fabs(x), fabs(x), z -> fma x, x, z
2082  if (match(Src0, m_FAbs(m_Value(X))) &&
2083  match(Src1, m_FAbs(m_Specific(X)))) {
2084  II->setArgOperand(0, X);
2085  II->setArgOperand(1, X);
2086  return II;
2087  }
2088 
2089  // fma x, 1, z -> fadd x, z
2090  if (match(Src1, m_FPOne())) {
2091  auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2092  FAdd->copyFastMathFlags(II);
2093  return FAdd;
2094  }
2095 
2096  break;
2097  }
2098  case Intrinsic::fabs: {
2099  Value *Cond;
2100  Constant *LHS, *RHS;
2101  if (match(II->getArgOperand(0),
2102  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2103  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2104  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2105  return SelectInst::Create(Cond, Call0, Call1);
2106  }
2107 
2109  }
2110  case Intrinsic::ceil:
2111  case Intrinsic::floor:
2112  case Intrinsic::round:
2113  case Intrinsic::nearbyint:
2114  case Intrinsic::rint:
2115  case Intrinsic::trunc: {
2116  Value *ExtSrc;
2117  if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
2118  // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2119  Value *NarrowII = Builder.CreateIntrinsic(II->getIntrinsicID(),
2120  { ExtSrc }, II);
2121  return new FPExtInst(NarrowII, II->getType());
2122  }
2123  break;
2124  }
2125  case Intrinsic::cos:
2126  case Intrinsic::amdgcn_cos: {
2127  Value *X;
2128  Value *Src = II->getArgOperand(0);
2129  if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
2130  // cos(-x) -> cos(x)
2131  // cos(fabs(x)) -> cos(x)
2132  II->setArgOperand(0, X);
2133  return II;
2134  }
2135  break;
2136  }
2137  case Intrinsic::sin: {
2138  Value *X;
2139  if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
2140  // sin(-x) --> -sin(x)
2141  Value *NewSin = Builder.CreateIntrinsic(Intrinsic::sin, { X }, II);
2142  Instruction *FNeg = BinaryOperator::CreateFNeg(NewSin);
2143  FNeg->copyFastMathFlags(II);
2144  return FNeg;
2145  }
2146  break;
2147  }
2148  case Intrinsic::ppc_altivec_lvx:
2149  case Intrinsic::ppc_altivec_lvxl:
2150  // Turn PPC lvx -> load if the pointer is known aligned.
2151  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2152  &DT) >= 16) {
2153  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2154  PointerType::getUnqual(II->getType()));
2155  return new LoadInst(Ptr);
2156  }
2157  break;
2158  case Intrinsic::ppc_vsx_lxvw4x:
2159  case Intrinsic::ppc_vsx_lxvd2x: {
2160  // Turn PPC VSX loads into normal loads.
2161  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2162  PointerType::getUnqual(II->getType()));
2163  return new LoadInst(Ptr, Twine(""), false, 1);
2164  }
2165  case Intrinsic::ppc_altivec_stvx:
2166  case Intrinsic::ppc_altivec_stvxl:
2167  // Turn stvx -> store if the pointer is known aligned.
2168  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2169  &DT) >= 16) {
2170  Type *OpPtrTy =
2171  PointerType::getUnqual(II->getArgOperand(0)->getType());
2172  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2173  return new StoreInst(II->getArgOperand(0), Ptr);
2174  }
2175  break;
2176  case Intrinsic::ppc_vsx_stxvw4x:
2177  case Intrinsic::ppc_vsx_stxvd2x: {
2178  // Turn PPC VSX stores into normal stores.
2179  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2180  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2181  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2182  }
2183  case Intrinsic::ppc_qpx_qvlfs:
2184  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2185  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2186  &DT) >= 16) {
2187  Type *VTy = VectorType::get(Builder.getFloatTy(),
2188  II->getType()->getVectorNumElements());
2189  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2190  PointerType::getUnqual(VTy));
2191  Value *Load = Builder.CreateLoad(Ptr);
2192  return new FPExtInst(Load, II->getType());
2193  }
2194  break;
2195  case Intrinsic::ppc_qpx_qvlfd:
2196  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2197  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2198  &DT) >= 32) {
2199  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2200  PointerType::getUnqual(II->getType()));
2201  return new LoadInst(Ptr);
2202  }
2203  break;
2204  case Intrinsic::ppc_qpx_qvstfs:
2205  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2206  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2207  &DT) >= 16) {
2208  Type *VTy = VectorType::get(Builder.getFloatTy(),
2209  II->getArgOperand(0)->getType()->getVectorNumElements());
2210  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2211  Type *OpPtrTy = PointerType::getUnqual(VTy);
2212  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2213  return new StoreInst(TOp, Ptr);
2214  }
2215  break;
2216  case Intrinsic::ppc_qpx_qvstfd:
2217  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2218  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2219  &DT) >= 32) {
2220  Type *OpPtrTy =
2221  PointerType::getUnqual(II->getArgOperand(0)->getType());
2222  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2223  return new StoreInst(II->getArgOperand(0), Ptr);
2224  }
2225  break;
2226 
2227  case Intrinsic::x86_bmi_bextr_32:
2228  case Intrinsic::x86_bmi_bextr_64:
2229  case Intrinsic::x86_tbm_bextri_u32:
2230  case Intrinsic::x86_tbm_bextri_u64:
2231  // If the RHS is a constant we can try some simplifications.
2232  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2233  uint64_t Shift = C->getZExtValue();
2234  uint64_t Length = (Shift >> 8) & 0xff;
2235  Shift &= 0xff;
2236  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2237  // If the length is 0 or the shift is out of range, replace with zero.
2238  if (Length == 0 || Shift >= BitWidth)
2239  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2240  // If the LHS is also a constant, we can completely constant fold this.
2241  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2242  uint64_t Result = InC->getZExtValue() >> Shift;
2243  if (Length > BitWidth)
2244  Length = BitWidth;
2245  Result &= maskTrailingOnes<uint64_t>(Length);
2246  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2247  }
2248  // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2249  // are only masking bits that a shift already cleared?
2250  }
2251  break;
2252 
2253  case Intrinsic::x86_bmi_bzhi_32:
2254  case Intrinsic::x86_bmi_bzhi_64:
2255  // If the RHS is a constant we can try some simplifications.
2256  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2257  uint64_t Index = C->getZExtValue() & 0xff;
2258  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2259  if (Index >= BitWidth)
2260  return replaceInstUsesWith(CI, II->getArgOperand(0));
2261  if (Index == 0)
2262  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2263  // If the LHS is also a constant, we can completely constant fold this.
2264  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2265  uint64_t Result = InC->getZExtValue();
2266  Result &= maskTrailingOnes<uint64_t>(Index);
2267  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2268  }
2269  // TODO should we convert this to an AND if the RHS is constant?
2270  }
2271  break;
2272 
2273  case Intrinsic::x86_vcvtph2ps_128:
2274  case Intrinsic::x86_vcvtph2ps_256: {
2275  auto Arg = II->getArgOperand(0);
2276  auto ArgType = cast<VectorType>(Arg->getType());
2277  auto RetType = cast<VectorType>(II->getType());
2278  unsigned ArgWidth = ArgType->getNumElements();
2279  unsigned RetWidth = RetType->getNumElements();
2280  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2281  assert(ArgType->isIntOrIntVectorTy() &&
2282  ArgType->getScalarSizeInBits() == 16 &&
2283  "CVTPH2PS input type should be 16-bit integer vector");
2284  assert(RetType->getScalarType()->isFloatTy() &&
2285  "CVTPH2PS output type should be 32-bit float vector");
2286 
2287  // Constant folding: Convert to generic half to single conversion.
2288  if (isa<ConstantAggregateZero>(Arg))
2289  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2290 
2291  if (isa<ConstantDataVector>(Arg)) {
2292  auto VectorHalfAsShorts = Arg;
2293  if (RetWidth < ArgWidth) {
2294  SmallVector<uint32_t, 8> SubVecMask;
2295  for (unsigned i = 0; i != RetWidth; ++i)
2296  SubVecMask.push_back((int)i);
2297  VectorHalfAsShorts = Builder.CreateShuffleVector(
2298  Arg, UndefValue::get(ArgType), SubVecMask);
2299  }
2300 
2301  auto VectorHalfType =
2302  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2303  auto VectorHalfs =
2304  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2305  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2306  return replaceInstUsesWith(*II, VectorFloats);
2307  }
2308 
2309  // We only use the lowest lanes of the argument.
2310  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2311  II->setArgOperand(0, V);
2312  return II;
2313  }
2314  break;
2315  }
2316 
2317  case Intrinsic::x86_sse_cvtss2si:
2318  case Intrinsic::x86_sse_cvtss2si64:
2319  case Intrinsic::x86_sse_cvttss2si:
2320  case Intrinsic::x86_sse_cvttss2si64:
2321  case Intrinsic::x86_sse2_cvtsd2si:
2322  case Intrinsic::x86_sse2_cvtsd2si64:
2323  case Intrinsic::x86_sse2_cvttsd2si:
2324  case Intrinsic::x86_sse2_cvttsd2si64:
2325  case Intrinsic::x86_avx512_vcvtss2si32:
2326  case Intrinsic::x86_avx512_vcvtss2si64:
2327  case Intrinsic::x86_avx512_vcvtss2usi32:
2328  case Intrinsic::x86_avx512_vcvtss2usi64:
2329  case Intrinsic::x86_avx512_vcvtsd2si32:
2330  case Intrinsic::x86_avx512_vcvtsd2si64:
2331  case Intrinsic::x86_avx512_vcvtsd2usi32:
2332  case Intrinsic::x86_avx512_vcvtsd2usi64:
2333  case Intrinsic::x86_avx512_cvttss2si:
2334  case Intrinsic::x86_avx512_cvttss2si64:
2335  case Intrinsic::x86_avx512_cvttss2usi:
2336  case Intrinsic::x86_avx512_cvttss2usi64:
2337  case Intrinsic::x86_avx512_cvttsd2si:
2338  case Intrinsic::x86_avx512_cvttsd2si64:
2339  case Intrinsic::x86_avx512_cvttsd2usi:
2340  case Intrinsic::x86_avx512_cvttsd2usi64: {
2341  // These intrinsics only demand the 0th element of their input vectors. If
2342  // we can simplify the input based on that, do so now.
2343  Value *Arg = II->getArgOperand(0);
2344  unsigned VWidth = Arg->getType()->getVectorNumElements();
2345  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2346  II->setArgOperand(0, V);
2347  return II;
2348  }
2349  break;
2350  }
2351 
2352  case Intrinsic::x86_sse41_round_ps:
2353  case Intrinsic::x86_sse41_round_pd:
2354  case Intrinsic::x86_avx_round_ps_256:
2355  case Intrinsic::x86_avx_round_pd_256:
2356  case Intrinsic::x86_avx512_mask_rndscale_ps_128:
2357  case Intrinsic::x86_avx512_mask_rndscale_ps_256:
2358  case Intrinsic::x86_avx512_mask_rndscale_ps_512:
2359  case Intrinsic::x86_avx512_mask_rndscale_pd_128:
2360  case Intrinsic::x86_avx512_mask_rndscale_pd_256:
2361  case Intrinsic::x86_avx512_mask_rndscale_pd_512:
2362  case Intrinsic::x86_avx512_mask_rndscale_ss:
2363  case Intrinsic::x86_avx512_mask_rndscale_sd:
2364  if (Value *V = simplifyX86round(*II, Builder))
2365  return replaceInstUsesWith(*II, V);
2366  break;
2367 
2368  case Intrinsic::x86_mmx_pmovmskb:
2369  case Intrinsic::x86_sse_movmsk_ps:
2370  case Intrinsic::x86_sse2_movmsk_pd:
2371  case Intrinsic::x86_sse2_pmovmskb_128:
2372  case Intrinsic::x86_avx_movmsk_pd_256:
2373  case Intrinsic::x86_avx_movmsk_ps_256:
2374  case Intrinsic::x86_avx2_pmovmskb:
2375  if (Value *V = simplifyX86movmsk(*II))
2376  return replaceInstUsesWith(*II, V);
2377  break;
2378 
2379  case Intrinsic::x86_sse_comieq_ss:
2380  case Intrinsic::x86_sse_comige_ss:
2381  case Intrinsic::x86_sse_comigt_ss:
2382  case Intrinsic::x86_sse_comile_ss:
2383  case Intrinsic::x86_sse_comilt_ss:
2384  case Intrinsic::x86_sse_comineq_ss:
2385  case Intrinsic::x86_sse_ucomieq_ss:
2386  case Intrinsic::x86_sse_ucomige_ss:
2387  case Intrinsic::x86_sse_ucomigt_ss:
2388  case Intrinsic::x86_sse_ucomile_ss:
2389  case Intrinsic::x86_sse_ucomilt_ss:
2390  case Intrinsic::x86_sse_ucomineq_ss:
2391  case Intrinsic::x86_sse2_comieq_sd:
2392  case Intrinsic::x86_sse2_comige_sd:
2393  case Intrinsic::x86_sse2_comigt_sd:
2394  case Intrinsic::x86_sse2_comile_sd:
2395  case Intrinsic::x86_sse2_comilt_sd:
2396  case Intrinsic::x86_sse2_comineq_sd:
2397  case Intrinsic::x86_sse2_ucomieq_sd:
2398  case Intrinsic::x86_sse2_ucomige_sd:
2399  case Intrinsic::x86_sse2_ucomigt_sd:
2400  case Intrinsic::x86_sse2_ucomile_sd:
2401  case Intrinsic::x86_sse2_ucomilt_sd:
2402  case Intrinsic::x86_sse2_ucomineq_sd:
2403  case Intrinsic::x86_avx512_vcomi_ss:
2404  case Intrinsic::x86_avx512_vcomi_sd:
2405  case Intrinsic::x86_avx512_mask_cmp_ss:
2406  case Intrinsic::x86_avx512_mask_cmp_sd: {
2407  // These intrinsics only demand the 0th element of their input vectors. If
2408  // we can simplify the input based on that, do so now.
2409  bool MadeChange = false;
2410  Value *Arg0 = II->getArgOperand(0);
2411  Value *Arg1 = II->getArgOperand(1);
2412  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2413  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2414  II->setArgOperand(0, V);
2415  MadeChange = true;
2416  }
2417  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2418  II->setArgOperand(1, V);
2419  MadeChange = true;
2420  }
2421  if (MadeChange)
2422  return II;
2423  break;
2424  }
2425  case Intrinsic::x86_avx512_cmp_pd_128:
2426  case Intrinsic::x86_avx512_cmp_pd_256:
2427  case Intrinsic::x86_avx512_cmp_pd_512:
2428  case Intrinsic::x86_avx512_cmp_ps_128:
2429  case Intrinsic::x86_avx512_cmp_ps_256:
2430  case Intrinsic::x86_avx512_cmp_ps_512: {
2431  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2432  Value *Arg0 = II->getArgOperand(0);
2433  Value *Arg1 = II->getArgOperand(1);
2434  bool Arg0IsZero = match(Arg0, m_PosZeroFP());
2435  if (Arg0IsZero)
2436  std::swap(Arg0, Arg1);
2437  Value *A, *B;
2438  // This fold requires only the NINF(not +/- inf) since inf minus
2439  // inf is nan.
2440  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2441  // equal for both compares.
2442  // NNAN is not needed because nans compare the same for both compares.
2443  // The compare intrinsic uses the above assumptions and therefore
2444  // doesn't require additional flags.
2445  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2446  match(Arg1, m_PosZeroFP()) && isa<Instruction>(Arg0) &&
2447  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2448  if (Arg0IsZero)
2449  std::swap(A, B);
2450  II->setArgOperand(0, A);
2451  II->setArgOperand(1, B);
2452  return II;
2453  }
2454  break;
2455  }
2456 
2457  case Intrinsic::x86_avx512_add_ps_512:
2458  case Intrinsic::x86_avx512_div_ps_512:
2459  case Intrinsic::x86_avx512_mul_ps_512:
2460  case Intrinsic::x86_avx512_sub_ps_512:
2461  case Intrinsic::x86_avx512_add_pd_512:
2462  case Intrinsic::x86_avx512_div_pd_512:
2463  case Intrinsic::x86_avx512_mul_pd_512:
2464  case Intrinsic::x86_avx512_sub_pd_512:
2465  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2466  // IR operations.
2467  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2468  if (R->getValue() == 4) {
2469  Value *Arg0 = II->getArgOperand(0);
2470  Value *Arg1 = II->getArgOperand(1);
2471 
2472  Value *V;
2473  switch (II->getIntrinsicID()) {
2474  default: llvm_unreachable("Case stmts out of sync!");
2475  case Intrinsic::x86_avx512_add_ps_512:
2476  case Intrinsic::x86_avx512_add_pd_512:
2477  V = Builder.CreateFAdd(Arg0, Arg1);
2478  break;
2479  case Intrinsic::x86_avx512_sub_ps_512:
2480  case Intrinsic::x86_avx512_sub_pd_512:
2481  V = Builder.CreateFSub(Arg0, Arg1);
2482  break;
2483  case Intrinsic::x86_avx512_mul_ps_512:
2484  case Intrinsic::x86_avx512_mul_pd_512:
2485  V = Builder.CreateFMul(Arg0, Arg1);
2486  break;
2487  case Intrinsic::x86_avx512_div_ps_512:
2488  case Intrinsic::x86_avx512_div_pd_512:
2489  V = Builder.CreateFDiv(Arg0, Arg1);
2490  break;
2491  }
2492 
2493  return replaceInstUsesWith(*II, V);
2494  }
2495  }
2496  break;
2497 
2498  case Intrinsic::x86_avx512_mask_add_ss_round:
2499  case Intrinsic::x86_avx512_mask_div_ss_round:
2500  case Intrinsic::x86_avx512_mask_mul_ss_round:
2501  case Intrinsic::x86_avx512_mask_sub_ss_round:
2502  case Intrinsic::x86_avx512_mask_add_sd_round:
2503  case Intrinsic::x86_avx512_mask_div_sd_round:
2504  case Intrinsic::x86_avx512_mask_mul_sd_round:
2505  case Intrinsic::x86_avx512_mask_sub_sd_round:
2506  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2507  // IR operations.
2508  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2509  if (R->getValue() == 4) {
2510  // Extract the element as scalars.
2511  Value *Arg0 = II->getArgOperand(0);
2512  Value *Arg1 = II->getArgOperand(1);
2513  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2514  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2515 
2516  Value *V;
2517  switch (II->getIntrinsicID()) {
2518  default: llvm_unreachable("Case stmts out of sync!");
2519  case Intrinsic::x86_avx512_mask_add_ss_round:
2520  case Intrinsic::x86_avx512_mask_add_sd_round:
2521  V = Builder.CreateFAdd(LHS, RHS);
2522  break;
2523  case Intrinsic::x86_avx512_mask_sub_ss_round:
2524  case Intrinsic::x86_avx512_mask_sub_sd_round:
2525  V = Builder.CreateFSub(LHS, RHS);
2526  break;
2527  case Intrinsic::x86_avx512_mask_mul_ss_round:
2528  case Intrinsic::x86_avx512_mask_mul_sd_round:
2529  V = Builder.CreateFMul(LHS, RHS);
2530  break;
2531  case Intrinsic::x86_avx512_mask_div_ss_round:
2532  case Intrinsic::x86_avx512_mask_div_sd_round:
2533  V = Builder.CreateFDiv(LHS, RHS);
2534  break;
2535  }
2536 
2537  // Handle the masking aspect of the intrinsic.
2538  Value *Mask = II->getArgOperand(3);
2539  auto *C = dyn_cast<ConstantInt>(Mask);
2540  // We don't need a select if we know the mask bit is a 1.
2541  if (!C || !C->getValue()[0]) {
2542  // Cast the mask to an i1 vector and then extract the lowest element.
2543  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2544  cast<IntegerType>(Mask->getType())->getBitWidth());
2545  Mask = Builder.CreateBitCast(Mask, MaskTy);
2546  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2547  // Extract the lowest element from the passthru operand.
2548  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2549  (uint64_t)0);
2550  V = Builder.CreateSelect(Mask, V, Passthru);
2551  }
2552 
2553  // Insert the result back into the original argument 0.
2554  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2555 
2556  return replaceInstUsesWith(*II, V);
2557  }
2558  }
2560 
2561  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2562  case Intrinsic::x86_avx512_mask_max_ss_round:
2563  case Intrinsic::x86_avx512_mask_min_ss_round:
2564  case Intrinsic::x86_avx512_mask_max_sd_round:
2565  case Intrinsic::x86_avx512_mask_min_sd_round:
2566  case Intrinsic::x86_sse_cmp_ss:
2567  case Intrinsic::x86_sse_min_ss:
2568  case Intrinsic::x86_sse_max_ss:
2569  case Intrinsic::x86_sse2_cmp_sd:
2570  case Intrinsic::x86_sse2_min_sd:
2571  case Intrinsic::x86_sse2_max_sd:
2572  case Intrinsic::x86_xop_vfrcz_ss:
2573  case Intrinsic::x86_xop_vfrcz_sd: {
2574  unsigned VWidth = II->getType()->getVectorNumElements();
2575  APInt UndefElts(VWidth, 0);
2576  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2577  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2578  if (V != II)
2579  return replaceInstUsesWith(*II, V);
2580  return II;
2581  }
2582  break;
2583  }
2584  case Intrinsic::x86_sse41_round_ss:
2585  case Intrinsic::x86_sse41_round_sd: {
2586  unsigned VWidth = II->getType()->getVectorNumElements();
2587  APInt UndefElts(VWidth, 0);
2588  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2589  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2590  if (V != II)
2591  return replaceInstUsesWith(*II, V);
2592  return II;
2593  } else if (Value *V = simplifyX86round(*II, Builder))
2594  return replaceInstUsesWith(*II, V);
2595  break;
2596  }
2597 
2598  // Constant fold add/sub with saturation intrinsics.
2599  case Intrinsic::x86_sse2_padds_b:
2600  case Intrinsic::x86_sse2_padds_w:
2601  case Intrinsic::x86_sse2_psubs_b:
2602  case Intrinsic::x86_sse2_psubs_w:
2603  case Intrinsic::x86_avx2_padds_b:
2604  case Intrinsic::x86_avx2_padds_w:
2605  case Intrinsic::x86_avx2_psubs_b:
2606  case Intrinsic::x86_avx2_psubs_w:
2607  case Intrinsic::x86_avx512_padds_b_512:
2608  case Intrinsic::x86_avx512_padds_w_512:
2609  case Intrinsic::x86_avx512_psubs_b_512:
2610  case Intrinsic::x86_avx512_psubs_w_512:
2611  if (Value *V = simplifyX86AddsSubs(*II, Builder))
2612  return replaceInstUsesWith(*II, V);
2613  break;
2614 
2615  // Constant fold ashr( <A x Bi>, Ci ).
2616  // Constant fold lshr( <A x Bi>, Ci ).
2617  // Constant fold shl( <A x Bi>, Ci ).
2618  case Intrinsic::x86_sse2_psrai_d:
2619  case Intrinsic::x86_sse2_psrai_w:
2620  case Intrinsic::x86_avx2_psrai_d:
2621  case Intrinsic::x86_avx2_psrai_w:
2622  case Intrinsic::x86_avx512_psrai_q_128:
2623  case Intrinsic::x86_avx512_psrai_q_256:
2624  case Intrinsic::x86_avx512_psrai_d_512:
2625  case Intrinsic::x86_avx512_psrai_q_512:
2626  case Intrinsic::x86_avx512_psrai_w_512:
2627  case Intrinsic::x86_sse2_psrli_d:
2628  case Intrinsic::x86_sse2_psrli_q:
2629  case Intrinsic::x86_sse2_psrli_w:
2630  case Intrinsic::x86_avx2_psrli_d:
2631  case Intrinsic::x86_avx2_psrli_q:
2632  case Intrinsic::x86_avx2_psrli_w:
2633  case Intrinsic::x86_avx512_psrli_d_512:
2634  case Intrinsic::x86_avx512_psrli_q_512:
2635  case Intrinsic::x86_avx512_psrli_w_512:
2636  case Intrinsic::x86_sse2_pslli_d:
2637  case Intrinsic::x86_sse2_pslli_q:
2638  case Intrinsic::x86_sse2_pslli_w:
2639  case Intrinsic::x86_avx2_pslli_d:
2640  case Intrinsic::x86_avx2_pslli_q:
2641  case Intrinsic::x86_avx2_pslli_w:
2642  case Intrinsic::x86_avx512_pslli_d_512:
2643  case Intrinsic::x86_avx512_pslli_q_512:
2644  case Intrinsic::x86_avx512_pslli_w_512:
2645  if (Value *V = simplifyX86immShift(*II, Builder))
2646  return replaceInstUsesWith(*II, V);
2647  break;
2648 
2649  case Intrinsic::x86_sse2_psra_d:
2650  case Intrinsic::x86_sse2_psra_w:
2651  case Intrinsic::x86_avx2_psra_d:
2652  case Intrinsic::x86_avx2_psra_w:
2653  case Intrinsic::x86_avx512_psra_q_128:
2654  case Intrinsic::x86_avx512_psra_q_256:
2655  case Intrinsic::x86_avx512_psra_d_512:
2656  case Intrinsic::x86_avx512_psra_q_512:
2657  case Intrinsic::x86_avx512_psra_w_512:
2658  case Intrinsic::x86_sse2_psrl_d:
2659  case Intrinsic::x86_sse2_psrl_q:
2660  case Intrinsic::x86_sse2_psrl_w:
2661  case Intrinsic::x86_avx2_psrl_d:
2662  case Intrinsic::x86_avx2_psrl_q:
2663  case Intrinsic::x86_avx2_psrl_w:
2664  case Intrinsic::x86_avx512_psrl_d_512:
2665  case Intrinsic::x86_avx512_psrl_q_512:
2666  case Intrinsic::x86_avx512_psrl_w_512:
2667  case Intrinsic::x86_sse2_psll_d:
2668  case Intrinsic::x86_sse2_psll_q:
2669  case Intrinsic::x86_sse2_psll_w:
2670  case Intrinsic::x86_avx2_psll_d:
2671  case Intrinsic::x86_avx2_psll_q:
2672  case Intrinsic::x86_avx2_psll_w:
2673  case Intrinsic::x86_avx512_psll_d_512:
2674  case Intrinsic::x86_avx512_psll_q_512:
2675  case Intrinsic::x86_avx512_psll_w_512: {
2676  if (Value *V = simplifyX86immShift(*II, Builder))
2677  return replaceInstUsesWith(*II, V);
2678 
2679  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2680  // operand to compute the shift amount.
2681  Value *Arg1 = II->getArgOperand(1);
2682  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2683  "Unexpected packed shift size");
2684  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2685 
2686  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2687  II->setArgOperand(1, V);
2688  return II;
2689  }
2690  break;
2691  }
2692 
2693  case Intrinsic::x86_avx2_psllv_d:
2694  case Intrinsic::x86_avx2_psllv_d_256:
2695  case Intrinsic::x86_avx2_psllv_q:
2696  case Intrinsic::x86_avx2_psllv_q_256:
2697  case Intrinsic::x86_avx512_psllv_d_512:
2698  case Intrinsic::x86_avx512_psllv_q_512:
2699  case Intrinsic::x86_avx512_psllv_w_128:
2700  case Intrinsic::x86_avx512_psllv_w_256:
2701  case Intrinsic::x86_avx512_psllv_w_512:
2702  case Intrinsic::x86_avx2_psrav_d:
2703  case Intrinsic::x86_avx2_psrav_d_256:
2704  case Intrinsic::x86_avx512_psrav_q_128:
2705  case Intrinsic::x86_avx512_psrav_q_256:
2706  case Intrinsic::x86_avx512_psrav_d_512:
2707  case Intrinsic::x86_avx512_psrav_q_512:
2708  case Intrinsic::x86_avx512_psrav_w_128:
2709  case Intrinsic::x86_avx512_psrav_w_256:
2710  case Intrinsic::x86_avx512_psrav_w_512:
2711  case Intrinsic::x86_avx2_psrlv_d:
2712  case Intrinsic::x86_avx2_psrlv_d_256:
2713  case Intrinsic::x86_avx2_psrlv_q:
2714  case Intrinsic::x86_avx2_psrlv_q_256:
2715  case Intrinsic::x86_avx512_psrlv_d_512:
2716  case Intrinsic::x86_avx512_psrlv_q_512:
2717  case Intrinsic::x86_avx512_psrlv_w_128:
2718  case Intrinsic::x86_avx512_psrlv_w_256:
2719  case Intrinsic::x86_avx512_psrlv_w_512:
2720  if (Value *V = simplifyX86varShift(*II, Builder))
2721  return replaceInstUsesWith(*II, V);
2722  break;
2723 
2724  case Intrinsic::x86_sse2_packssdw_128:
2725  case Intrinsic::x86_sse2_packsswb_128:
2726  case Intrinsic::x86_avx2_packssdw:
2727  case Intrinsic::x86_avx2_packsswb:
2728  case Intrinsic::x86_avx512_packssdw_512:
2729  case Intrinsic::x86_avx512_packsswb_512:
2730  if (Value *V = simplifyX86pack(*II, true))
2731  return replaceInstUsesWith(*II, V);
2732  break;
2733 
2734  case Intrinsic::x86_sse2_packuswb_128:
2735  case Intrinsic::x86_sse41_packusdw:
2736  case Intrinsic::x86_avx2_packusdw:
2737  case Intrinsic::x86_avx2_packuswb:
2738  case Intrinsic::x86_avx512_packusdw_512:
2739  case Intrinsic::x86_avx512_packuswb_512:
2740  if (Value *V = simplifyX86pack(*II, false))
2741  return replaceInstUsesWith(*II, V);
2742  break;
2743 
2744  case Intrinsic::x86_pclmulqdq:
2745  case Intrinsic::x86_pclmulqdq_256:
2746  case Intrinsic::x86_pclmulqdq_512: {
2747  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2748  unsigned Imm = C->getZExtValue();
2749 
2750  bool MadeChange = false;
2751  Value *Arg0 = II->getArgOperand(0);
2752  Value *Arg1 = II->getArgOperand(1);
2753  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2754 
2755  APInt UndefElts1(VWidth, 0);
2756  APInt DemandedElts1 = APInt::getSplat(VWidth,
2757  APInt(2, (Imm & 0x01) ? 2 : 1));
2758  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,
2759  UndefElts1)) {
2760  II->setArgOperand(0, V);
2761  MadeChange = true;
2762  }
2763 
2764  APInt UndefElts2(VWidth, 0);
2765  APInt DemandedElts2 = APInt::getSplat(VWidth,
2766  APInt(2, (Imm & 0x10) ? 2 : 1));
2767  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,
2768  UndefElts2)) {
2769  II->setArgOperand(1, V);
2770  MadeChange = true;
2771  }
2772 
2773  // If either input elements are undef, the result is zero.
2774  if (DemandedElts1.isSubsetOf(UndefElts1) ||
2775  DemandedElts2.isSubsetOf(UndefElts2))
2776  return replaceInstUsesWith(*II,
2777  ConstantAggregateZero::get(II->getType()));
2778 
2779  if (MadeChange)
2780  return II;
2781  }
2782  break;
2783  }
2784 
2785  case Intrinsic::x86_sse41_insertps:
2786  if (Value *V = simplifyX86insertps(*II, Builder))
2787  return replaceInstUsesWith(*II, V);
2788  break;
2789 
2790  case Intrinsic::x86_sse4a_extrq: {
2791  Value *Op0 = II->getArgOperand(0);
2792  Value *Op1 = II->getArgOperand(1);
2793  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2794  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2795  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2796  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2797  VWidth1 == 16 && "Unexpected operand sizes");
2798 
2799  // See if we're dealing with constant values.
2800  Constant *C1 = dyn_cast<Constant>(Op1);
2801  ConstantInt *CILength =
2802  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2803  : nullptr;
2804  ConstantInt *CIIndex =
2805  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2806  : nullptr;
2807 
2808  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2809  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2810  return replaceInstUsesWith(*II, V);
2811 
2812  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2813  // operands and the lowest 16-bits of the second.
2814  bool MadeChange = false;
2815  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2816  II->setArgOperand(0, V);
2817  MadeChange = true;
2818  }
2819  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2820  II->setArgOperand(1, V);
2821  MadeChange = true;
2822  }
2823  if (MadeChange)
2824  return II;
2825  break;
2826  }
2827 
2828  case Intrinsic::x86_sse4a_extrqi: {
2829  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2830  // bits of the lower 64-bits. The upper 64-bits are undefined.
2831  Value *Op0 = II->getArgOperand(0);
2832  unsigned VWidth = Op0->getType()->getVectorNumElements();
2833  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2834  "Unexpected operand size");
2835 
2836  // See if we're dealing with constant values.
2837  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2838  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2839 
2840  // Attempt to simplify to a constant or shuffle vector.
2841  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2842  return replaceInstUsesWith(*II, V);
2843 
2844  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2845  // operand.
2846  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2847  II->setArgOperand(0, V);
2848  return II;
2849  }
2850  break;
2851  }
2852 
2853  case Intrinsic::x86_sse4a_insertq: {
2854  Value *Op0 = II->getArgOperand(0);
2855  Value *Op1 = II->getArgOperand(1);
2856  unsigned VWidth = Op0->getType()->getVectorNumElements();
2857  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2858  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2859  Op1->getType()->getVectorNumElements() == 2 &&
2860  "Unexpected operand size");
2861 
2862  // See if we're dealing with constant values.
2863  Constant *C1 = dyn_cast<Constant>(Op1);
2864  ConstantInt *CI11 =
2865  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2866  : nullptr;
2867 
2868  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2869  if (CI11) {
2870  const APInt &V11 = CI11->getValue();
2871  APInt Len = V11.zextOrTrunc(6);
2872  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2873  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2874  return replaceInstUsesWith(*II, V);
2875  }
2876 
2877  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2878  // operand.
2879  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2880  II->setArgOperand(0, V);
2881  return II;
2882  }
2883  break;
2884  }
2885 
2886  case Intrinsic::x86_sse4a_insertqi: {
2887  // INSERTQI: Extract lowest Length bits from lower half of second source and
2888  // insert over first source starting at Index bit. The upper 64-bits are
2889  // undefined.
2890  Value *Op0 = II->getArgOperand(0);
2891  Value *Op1 = II->getArgOperand(1);
2892  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2893  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2894  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2895  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2896  VWidth1 == 2 && "Unexpected operand sizes");
2897 
2898  // See if we're dealing with constant values.
2899  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2900  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2901 
2902  // Attempt to simplify to a constant or shuffle vector.
2903  if (CILength && CIIndex) {
2904  APInt Len = CILength->getValue().zextOrTrunc(6);
2905  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2906  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2907  return replaceInstUsesWith(*II, V);
2908  }
2909 
2910  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2911  // operands.
2912  bool MadeChange = false;
2913  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2914  II->setArgOperand(0, V);
2915  MadeChange = true;
2916  }
2917  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2918  II->setArgOperand(1, V);
2919  MadeChange = true;
2920  }
2921  if (MadeChange)
2922  return II;
2923  break;
2924  }
2925 
2926  case Intrinsic::x86_sse41_pblendvb:
2927  case Intrinsic::x86_sse41_blendvps:
2928  case Intrinsic::x86_sse41_blendvpd:
2929  case Intrinsic::x86_avx_blendv_ps_256:
2930  case Intrinsic::x86_avx_blendv_pd_256:
2931  case Intrinsic::x86_avx2_pblendvb: {
2932  // fold (blend A, A, Mask) -> A
2933  Value *Op0 = II->getArgOperand(0);
2934  Value *Op1 = II->getArgOperand(1);
2935  Value *Mask = II->getArgOperand(2);
2936  if (Op0 == Op1)
2937  return replaceInstUsesWith(CI, Op0);
2938 
2939  // Zero Mask - select 1st argument.
2940  if (isa<ConstantAggregateZero>(Mask))
2941  return replaceInstUsesWith(CI, Op0);
2942 
2943  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2944  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2945  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2946  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2947  }
2948 
2949  // Convert to a vector select if we can bypass casts and find a boolean
2950  // vector condition value.
2951  Value *BoolVec;
2952  if (match(peekThroughBitcast(Mask), m_SExt(m_Value(BoolVec)))) {
2953  auto *VTy = dyn_cast<VectorType>(BoolVec->getType());
2954  if (VTy && VTy->getScalarSizeInBits() == 1 &&
2955  VTy->getVectorNumElements() == II->getType()->getVectorNumElements())
2956  return SelectInst::Create(BoolVec, Op1, Op0);
2957  // TODO: If we can find a boolean vector condition with less elements,
2958  // then we can form a vector select by bitcasting Op0/Op1 to a
2959  // vector type with wider elements and bitcasting the result.
2960  }
2961 
2962  break;
2963  }
2964 
2965  case Intrinsic::x86_ssse3_pshuf_b_128:
2966  case Intrinsic::x86_avx2_pshuf_b:
2967  case Intrinsic::x86_avx512_pshuf_b_512:
2968  if (Value *V = simplifyX86pshufb(*II, Builder))
2969  return replaceInstUsesWith(*II, V);
2970  break;
2971 
2972  case Intrinsic::x86_avx_vpermilvar_ps:
2973  case Intrinsic::x86_avx_vpermilvar_ps_256:
2974  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2975  case Intrinsic::x86_avx_vpermilvar_pd:
2976  case Intrinsic::x86_avx_vpermilvar_pd_256:
2977  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2978  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2979  return replaceInstUsesWith(*II, V);
2980  break;
2981 
2982  case Intrinsic::x86_avx2_permd:
2983  case Intrinsic::x86_avx2_permps:
2984  case Intrinsic::x86_avx512_permvar_df_256:
2985  case Intrinsic::x86_avx512_permvar_df_512:
2986  case Intrinsic::x86_avx512_permvar_di_256:
2987  case Intrinsic::x86_avx512_permvar_di_512:
2988  case Intrinsic::x86_avx512_permvar_hi_128:
2989  case Intrinsic::x86_avx512_permvar_hi_256:
2990  case Intrinsic::x86_avx512_permvar_hi_512:
2991  case Intrinsic::x86_avx512_permvar_qi_128:
2992  case Intrinsic::x86_avx512_permvar_qi_256:
2993  case Intrinsic::x86_avx512_permvar_qi_512:
2994  case Intrinsic::x86_avx512_permvar_sf_512:
2995  case Intrinsic::x86_avx512_permvar_si_512:
2996  if (Value *V = simplifyX86vpermv(*II, Builder))
2997  return replaceInstUsesWith(*II, V);
2998  break;
2999 
3000  case Intrinsic::x86_avx_maskload_ps:
3001  case Intrinsic::x86_avx_maskload_pd:
3002  case Intrinsic::x86_avx_maskload_ps_256:
3003  case Intrinsic::x86_avx_maskload_pd_256:
3004  case Intrinsic::x86_avx2_maskload_d:
3005  case Intrinsic::x86_avx2_maskload_q:
3006  case Intrinsic::x86_avx2_maskload_d_256:
3007  case Intrinsic::x86_avx2_maskload_q_256:
3008  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
3009  return I;
3010  break;
3011 
3012  case Intrinsic::x86_sse2_maskmov_dqu:
3013  case Intrinsic::x86_avx_maskstore_ps:
3014  case Intrinsic::x86_avx_maskstore_pd:
3015  case Intrinsic::x86_avx_maskstore_ps_256:
3016  case Intrinsic::x86_avx_maskstore_pd_256:
3017  case Intrinsic::x86_avx2_maskstore_d:
3018  case Intrinsic::x86_avx2_maskstore_q:
3019  case Intrinsic::x86_avx2_maskstore_d_256:
3020  case Intrinsic::x86_avx2_maskstore_q_256:
3021  if (simplifyX86MaskedStore(*II, *this))
3022  return nullptr;
3023  break;
3024 
3025  case Intrinsic::x86_xop_vpcomb:
3026  case Intrinsic::x86_xop_vpcomd:
3027  case Intrinsic::x86_xop_vpcomq:
3028  case Intrinsic::x86_xop_vpcomw:
3029  if (Value *V = simplifyX86vpcom(*II, Builder, true))
3030  return replaceInstUsesWith(*II, V);
3031  break;
3032 
3033  case Intrinsic::x86_xop_vpcomub:
3034  case Intrinsic::x86_xop_vpcomud:
3035  case Intrinsic::x86_xop_vpcomuq:
3036  case Intrinsic::x86_xop_vpcomuw:
3037  if (Value *V = simplifyX86vpcom(*II, Builder, false))
3038  return replaceInstUsesWith(*II, V);
3039  break;
3040 
3041  case Intrinsic::ppc_altivec_vperm:
3042  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
3043  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
3044  // a vectorshuffle for little endian, we must undo the transformation
3045  // performed on vec_perm in altivec.h. That is, we must complement
3046  // the permutation mask with respect to 31 and reverse the order of
3047  // V1 and V2.
3048  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
3049  assert(Mask->getType()->getVectorNumElements() == 16 &&
3050  "Bad type for intrinsic!");
3051 
3052  // Check that all of the elements are integer constants or undefs.
3053  bool AllEltsOk = true;
3054  for (unsigned i = 0; i != 16; ++i) {
3055  Constant *Elt = Mask->getAggregateElement(i);
3056  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3057  AllEltsOk = false;
3058  break;
3059  }
3060  }
3061 
3062  if (AllEltsOk) {
3063  // Cast the input vectors to byte vectors.
3064  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3065  Mask->getType());
3066  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3067  Mask->getType());
3068  Value *Result = UndefValue::get(Op0->getType());
3069 
3070  // Only extract each element once.
3071  Value *ExtractedElts[32];
3072  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3073 
3074  for (unsigned i = 0; i != 16; ++i) {
3075  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3076  continue;
3077  unsigned Idx =
3078  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3079  Idx &= 31; // Match the hardware behavior.
3080  if (DL.isLittleEndian())
3081  Idx = 31 - Idx;
3082 
3083  if (!ExtractedElts[Idx]) {
3084  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3085  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3086  ExtractedElts[Idx] =
3087  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3088  Builder.getInt32(Idx&15));
3089  }
3090 
3091  // Insert this value into the result vector.
3092  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3093  Builder.getInt32(i));
3094  }
3095  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3096  }
3097  }
3098  break;
3099 
3100  case Intrinsic::arm_neon_vld1: {
3101  unsigned MemAlign = getKnownAlignment(II->getArgOperand(0),
3102  DL, II, &AC, &DT);
3103  if (Value *V = simplifyNeonVld1(*II, MemAlign, Builder))
3104  return replaceInstUsesWith(*II, V);
3105  break;
3106  }
3107 
3108  case Intrinsic::arm_neon_vld2:
3109  case Intrinsic::arm_neon_vld3:
3110  case Intrinsic::arm_neon_vld4:
3111  case Intrinsic::arm_neon_vld2lane:
3112  case Intrinsic::arm_neon_vld3lane:
3113  case Intrinsic::arm_neon_vld4lane:
3114  case Intrinsic::arm_neon_vst1:
3115  case Intrinsic::arm_neon_vst2:
3116  case Intrinsic::arm_neon_vst3:
3117  case Intrinsic::arm_neon_vst4:
3118  case Intrinsic::arm_neon_vst2lane:
3119  case Intrinsic::arm_neon_vst3lane:
3120  case Intrinsic::arm_neon_vst4lane: {
3121  unsigned MemAlign =
3122  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3123  unsigned AlignArg = II->getNumArgOperands() - 1;
3124  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3125  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3126  II->setArgOperand(AlignArg,
3127  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3128  MemAlign, false));
3129  return II;
3130  }
3131  break;
3132  }
3133 
3134  case Intrinsic::arm_neon_vtbl1:
3135  case Intrinsic::aarch64_neon_tbl1:
3136  if (Value *V = simplifyNeonTbl1(*II, Builder))
3137  return replaceInstUsesWith(*II, V);
3138  break;
3139 
3140  case Intrinsic::arm_neon_vmulls:
3141  case Intrinsic::arm_neon_vmullu:
3142  case Intrinsic::aarch64_neon_smull:
3143  case Intrinsic::aarch64_neon_umull: {
3144  Value *Arg0 = II->getArgOperand(0);
3145  Value *Arg1 = II->getArgOperand(1);
3146 
3147  // Handle mul by zero first:
3148  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3149  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3150  }
3151 
3152  // Check for constant LHS & RHS - in this case we just simplify.
3153  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3154  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3155  VectorType *NewVT = cast<VectorType>(II->getType());
3156  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3157  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3158  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3159  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3160 
3161  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3162  }
3163 
3164  // Couldn't simplify - canonicalize constant to the RHS.
3165  std::swap(Arg0, Arg1);
3166  }
3167 
3168  // Handle mul by one:
3169  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3170  if (ConstantInt *Splat =
3171  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3172  if (Splat->isOne())
3173  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3174  /*isSigned=*/!Zext);
3175 
3176  break;
3177  }
3178  case Intrinsic::arm_neon_aesd:
3179  case Intrinsic::arm_neon_aese:
3180  case Intrinsic::aarch64_crypto_aesd:
3181  case Intrinsic::aarch64_crypto_aese: {
3182  Value *DataArg = II->getArgOperand(0);
3183  Value *KeyArg = II->getArgOperand(1);
3184 
3185  // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3186  Value *Data, *Key;
3187  if (match(KeyArg, m_ZeroInt()) &&
3188  match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3189  II->setArgOperand(0, Data);
3190  II->setArgOperand(1, Key);
3191  return II;
3192  }
3193  break;
3194  }
3195  case Intrinsic::amdgcn_rcp: {
3196  Value *Src = II->getArgOperand(0);
3197 
3198  // TODO: Move to ConstantFolding/InstSimplify?
3199  if (isa<UndefValue>(Src))
3200  return replaceInstUsesWith(CI, Src);
3201 
3202  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3203  const APFloat &ArgVal = C->getValueAPF();
3204  APFloat Val(ArgVal.getSemantics(), 1.0);
3205  APFloat::opStatus Status = Val.divide(ArgVal,
3207  // Only do this if it was exact and therefore not dependent on the
3208  // rounding mode.
3209  if (Status == APFloat::opOK)
3210  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3211  }
3212 
3213  break;
3214  }
3215  case Intrinsic::amdgcn_rsq: {
3216  Value *Src = II->getArgOperand(0);
3217 
3218  // TODO: Move to ConstantFolding/InstSimplify?
3219  if (isa<UndefValue>(Src))
3220  return replaceInstUsesWith(CI, Src);
3221  break;
3222  }
3223  case Intrinsic::amdgcn_frexp_mant:
3224  case Intrinsic::amdgcn_frexp_exp: {
3225  Value *Src = II->getArgOperand(0);
3226  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3227  int Exp;
3228  APFloat Significand = frexp(C->getValueAPF(), Exp,
3230 
3231  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3232  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3233  Significand));
3234  }
3235 
3236  // Match instruction special case behavior.
3237  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3238  Exp = 0;
3239 
3240  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3241  }
3242 
3243  if (isa<UndefValue>(Src))
3244  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3245 
3246  break;
3247  }
3248  case Intrinsic::amdgcn_class: {
3249  enum {
3250  S_NAN = 1 << 0, // Signaling NaN
3251  Q_NAN = 1 << 1, // Quiet NaN
3252  N_INFINITY = 1 << 2, // Negative infinity
3253  N_NORMAL = 1 << 3, // Negative normal
3254  N_SUBNORMAL = 1 << 4, // Negative subnormal
3255  N_ZERO = 1 << 5, // Negative zero
3256  P_ZERO = 1 << 6, // Positive zero
3257  P_SUBNORMAL = 1 << 7, // Positive subnormal
3258  P_NORMAL = 1 << 8, // Positive normal
3259  P_INFINITY = 1 << 9 // Positive infinity
3260  };
3261 
3262  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3264 
3265  Value *Src0 = II->getArgOperand(0);
3266  Value *Src1 = II->getArgOperand(1);
3267  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3268  if (!CMask) {
3269  if (isa<UndefValue>(Src0))
3270  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3271 
3272  if (isa<UndefValue>(Src1))
3273  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3274  break;
3275  }
3276 
3277  uint32_t Mask = CMask->getZExtValue();
3278 
3279  // If all tests are made, it doesn't matter what the value is.
3280  if ((Mask & FullMask) == FullMask)
3281  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3282 
3283  if ((Mask & FullMask) == 0)
3284  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3285 
3286  if (Mask == (S_NAN | Q_NAN)) {
3287  // Equivalent of isnan. Replace with standard fcmp.
3288  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3289  FCmp->takeName(II);
3290  return replaceInstUsesWith(*II, FCmp);
3291  }
3292 
3293  if (Mask == (N_ZERO | P_ZERO)) {
3294  // Equivalent of == 0.
3295  Value *FCmp = Builder.CreateFCmpOEQ(
3296  Src0, ConstantFP::get(Src0->getType(), 0.0));
3297 
3298  FCmp->takeName(II);
3299  return replaceInstUsesWith(*II, FCmp);
3300  }
3301 
3302  // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
3303  if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI)) {
3304  II->setArgOperand(1, ConstantInt::get(Src1->getType(),
3305  Mask & ~(S_NAN | Q_NAN)));
3306  return II;
3307  }
3308 
3309  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3310  if (!CVal) {
3311  if (isa<UndefValue>(Src0))
3312  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3313 
3314  // Clamp mask to used bits
3315  if ((Mask & FullMask) != Mask) {
3316  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3317  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3318  );
3319 
3320  NewCall->takeName(II);
3321  return replaceInstUsesWith(*II, NewCall);
3322  }
3323 
3324  break;
3325  }
3326 
3327  const APFloat &Val = CVal->getValueAPF();
3328 
3329  bool Result =
3330  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3331  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3332  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3333  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3334  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3335  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3336  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3337  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3338  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3339  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3340 
3341  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3342  }
3343  case Intrinsic::amdgcn_cvt_pkrtz: {
3344  Value *Src0 = II->getArgOperand(0);
3345  Value *Src1 = II->getArgOperand(1);
3346  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3347  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3348  const fltSemantics &HalfSem
3349  = II->getType()->getScalarType()->getFltSemantics();
3350  bool LosesInfo;
3351  APFloat Val0 = C0->getValueAPF();
3352  APFloat Val1 = C1->getValueAPF();
3353  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3354  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3355 
3356  Constant *Folded = ConstantVector::get({
3357  ConstantFP::get(II->getContext(), Val0),
3358  ConstantFP::get(II->getContext(), Val1) });
3359  return replaceInstUsesWith(*II, Folded);
3360  }
3361  }
3362 
3363  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3364  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3365 
3366  break;
3367  }
3368  case Intrinsic::amdgcn_cvt_pknorm_i16:
3369  case Intrinsic::amdgcn_cvt_pknorm_u16:
3370  case Intrinsic::amdgcn_cvt_pk_i16:
3371  case Intrinsic::amdgcn_cvt_pk_u16: {
3372  Value *Src0 = II->getArgOperand(0);
3373  Value *Src1 = II->getArgOperand(1);
3374 
3375  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3376  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3377 
3378  break;
3379  }
3380  case Intrinsic::amdgcn_ubfe:
3381  case Intrinsic::amdgcn_sbfe: {
3382  // Decompose simple cases into standard shifts.
3383  Value *Src = II->getArgOperand(0);
3384  if (isa<UndefValue>(Src))
3385  return replaceInstUsesWith(*II, Src);
3386 
3387  unsigned Width;
3388  Type *Ty = II->getType();
3389  unsigned IntSize = Ty->getIntegerBitWidth();
3390 
3391  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3392  if (CWidth) {
3393  Width = CWidth->getZExtValue();
3394  if ((Width & (IntSize - 1)) == 0)
3395  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3396 
3397  if (Width >= IntSize) {
3398  // Hardware ignores high bits, so remove those.
3399  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3400  Width & (IntSize - 1)));
3401  return II;
3402  }
3403  }
3404 
3405  unsigned Offset;
3406  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3407  if (COffset) {
3408  Offset = COffset->getZExtValue();
3409  if (Offset >= IntSize) {
3410  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3411  Offset & (IntSize - 1)));
3412  return II;
3413  }
3414  }
3415 
3416  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3417 
3418  // TODO: Also emit sub if only width is constant.
3419  if (!CWidth && COffset && Offset == 0) {
3420  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3421  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3422  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3423 
3424  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3425  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3426  : Builder.CreateLShr(Shl, ShiftVal);
3427  RightShift->takeName(II);
3428  return replaceInstUsesWith(*II, RightShift);
3429  }
3430 
3431  if (!CWidth || !COffset)
3432  break;
3433 
3434  // TODO: This allows folding to undef when the hardware has specific
3435  // behavior?
3436  if (Offset + Width < IntSize) {
3437  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3438  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3439  : Builder.CreateLShr(Shl, IntSize - Width);
3440  RightShift->takeName(II);
3441  return replaceInstUsesWith(*II, RightShift);
3442  }
3443 
3444  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3445  : Builder.CreateLShr(Src, Offset);
3446 
3447  RightShift->takeName(II);
3448  return replaceInstUsesWith(*II, RightShift);
3449  }
3450  case Intrinsic::amdgcn_exp:
3451  case Intrinsic::amdgcn_exp_compr: {
3452  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3453  if (!En) // Illegal.
3454  break;
3455 
3456  unsigned EnBits = En->getZExtValue();
3457  if (EnBits == 0xf)
3458  break; // All inputs enabled.
3459 
3460  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3461  bool Changed = false;
3462  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3463  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3464  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3465  Value *Src = II->getArgOperand(I + 2);
3466  if (!isa<UndefValue>(Src)) {
3467  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3468  Changed = true;
3469  }
3470  }
3471  }
3472 
3473  if (Changed)
3474  return II;
3475 
3476  break;
3477  }
3478  case Intrinsic::amdgcn_fmed3: {
3479  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3480  // for the shader.
3481 
3482  Value *Src0 = II->getArgOperand(0);
3483  Value *Src1 = II->getArgOperand(1);
3484  Value *Src2 = II->getArgOperand(2);
3485 
3486  // Checking for NaN before canonicalization provides better fidelity when
3487  // mapping other operations onto fmed3 since the order of operands is
3488  // unchanged.
3489  CallInst *NewCall = nullptr;
3490  if (match(Src0, m_NaN()) || isa<UndefValue>(Src0)) {
3491  NewCall = Builder.CreateMinNum(Src1, Src2);
3492  } else if (match(Src1, m_NaN()) || isa<UndefValue>(Src1)) {
3493  NewCall = Builder.CreateMinNum(Src0, Src2);
3494  } else if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3495  NewCall = Builder.CreateMaxNum(Src0, Src1);
3496  }
3497 
3498  if (NewCall) {
3499  NewCall->copyFastMathFlags(II);
3500  NewCall->takeName(II);
3501  return replaceInstUsesWith(*II, NewCall);
3502  }
3503 
3504  bool Swap = false;
3505  // Canonicalize constants to RHS operands.
3506  //
3507  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3508  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3509  std::swap(Src0, Src1);
3510  Swap = true;
3511  }
3512 
3513  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3514  std::swap(Src1, Src2);
3515  Swap = true;
3516  }
3517 
3518  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3519  std::swap(Src0, Src1);
3520  Swap = true;
3521  }
3522 
3523  if (Swap) {
3524  II->setArgOperand(0, Src0);
3525  II->setArgOperand(1, Src1);
3526  II->setArgOperand(2, Src2);
3527  return II;
3528  }
3529 
3530  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3531  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3532  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3533  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3534  C2->getValueAPF());
3535  return replaceInstUsesWith(*II,
3536  ConstantFP::get(Builder.getContext(), Result));
3537  }
3538  }
3539  }
3540 
3541  break;
3542  }
3543  case Intrinsic::amdgcn_icmp:
3544  case Intrinsic::amdgcn_fcmp: {
3545  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3546  if (!CC)
3547  break;
3548 
3549  // Guard against invalid arguments.
3550  int64_t CCVal = CC->getZExtValue();
3551  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3552  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3553  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3554  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3555  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3556  break;
3557 
3558  Value *Src0 = II->getArgOperand(0);
3559  Value *Src1 = II->getArgOperand(1);
3560 
3561  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3562  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3563  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3564  if (CCmp->isNullValue()) {
3565  return replaceInstUsesWith(
3566  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3567  }
3568 
3569  // The result of V_ICMP/V_FCMP assembly instructions (which this
3570  // intrinsic exposes) is one bit per thread, masked with the EXEC
3571  // register (which contains the bitmask of live threads). So a
3572  // comparison that always returns true is the same as a read of the
3573  // EXEC register.
3575  II->getModule(), Intrinsic::read_register, II->getType());
3576  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3577  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3578  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3579  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3582  NewCall->takeName(II);
3583  return replaceInstUsesWith(*II, NewCall);
3584  }
3585 
3586  // Canonicalize constants to RHS.
3587  CmpInst::Predicate SwapPred
3588  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3589  II->setArgOperand(0, Src1);
3590  II->setArgOperand(1, Src0);
3591  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3592  static_cast<int>(SwapPred)));
3593  return II;
3594  }
3595 
3596  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3597  break;
3598 
3599  // Canonicalize compare eq with true value to compare != 0
3600  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3601  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3602  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3603  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3604  Value *ExtSrc;
3605  if (CCVal == CmpInst::ICMP_EQ &&
3606  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3607  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3608  ExtSrc->getType()->isIntegerTy(1)) {
3609  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3610  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3611  return II;
3612  }
3613 
3614  CmpInst::Predicate SrcPred;
3615  Value *SrcLHS;
3616  Value *SrcRHS;
3617 
3618  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3619  // intrinsic. The typical use is a wave vote function in the library, which
3620  // will be fed from a user code condition compared with 0. Fold in the
3621  // redundant compare.
3622 
3623  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3624  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3625  //
3626  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3627  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3628  if (match(Src1, m_Zero()) &&
3629  match(Src0,
3630  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3631  if (CCVal == CmpInst::ICMP_EQ)
3632  SrcPred = CmpInst::getInversePredicate(SrcPred);
3633 
3634  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3635  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3636 
3637  Type *Ty = SrcLHS->getType();
3638  if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
3639  // Promote to next legal integer type.
3640  unsigned Width = CmpType->getBitWidth();
3641  unsigned NewWidth = Width;
3642  if (Width <= 16)
3643  NewWidth = 16;
3644  else if (Width <= 32)
3645  NewWidth = 32;
3646  else if (Width <= 64)
3647  NewWidth = 64;
3648  else if (Width > 64)
3649  break; // Can't handle this.
3650 
3651  if (Width != NewWidth) {
3652  IntegerType *CmpTy = Builder.getIntNTy(NewWidth);
3653  if (CmpInst::isSigned(SrcPred)) {
3654  SrcLHS = Builder.CreateSExt(SrcLHS, CmpTy);
3655  SrcRHS = Builder.CreateSExt(SrcRHS, CmpTy);
3656  } else {
3657  SrcLHS = Builder.CreateZExt(SrcLHS, CmpTy);
3658  SrcRHS = Builder.CreateZExt(SrcRHS, CmpTy);
3659  }
3660  }
3661  } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
3662  break;
3663 
3664  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3665  SrcLHS->getType());
3666  Value *Args[] = { SrcLHS, SrcRHS,
3667  ConstantInt::get(CC->getType(), SrcPred) };
3668  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3669  NewCall->takeName(II);
3670  return replaceInstUsesWith(*II, NewCall);
3671  }
3672 
3673  break;
3674  }
3675  case Intrinsic::amdgcn_wqm_vote: {
3676  // wqm_vote is identity when the argument is constant.
3677  if (!isa<Constant>(II->getArgOperand(0)))
3678  break;
3679 
3680  return replaceInstUsesWith(*II, II->getArgOperand(0));
3681  }
3682  case Intrinsic::amdgcn_kill: {
3683  const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
3684  if (!C || !C->getZExtValue())
3685  break;
3686 
3687  // amdgcn.kill(i1 1) is a no-op
3688  return eraseInstFromFunction(CI);
3689  }
3690  case Intrinsic::amdgcn_update_dpp: {
3691  Value *Old = II->getArgOperand(0);
3692 
3693  auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
3694  auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
3695  auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
3696  if (!BC || !RM || !BM ||
3697  BC->isZeroValue() ||
3698  RM->getZExtValue() != 0xF ||
3699  BM->getZExtValue() != 0xF ||
3700  isa<UndefValue>(Old))
3701  break;
3702 
3703  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
3704  II->setOperand(0, UndefValue::get(Old->getType()));
3705  return II;
3706  }
3707  case Intrinsic::stackrestore: {
3708  // If the save is right next to the restore, remove the restore. This can
3709  // happen when variable allocas are DCE'd.
3710  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3711  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3712  // Skip over debug info.
3713  if (SS->getNextNonDebugInstruction() == II) {
3714  return eraseInstFromFunction(CI);
3715  }
3716  }
3717  }
3718 
3719  // Scan down this block to see if there is another stack restore in the
3720  // same block without an intervening call/alloca.
3721  BasicBlock::iterator BI(II);
3722  TerminatorInst *TI = II->getParent()->getTerminator();
3723  bool CannotRemove = false;
3724  for (++BI; &*BI != TI; ++BI) {
3725  if (isa<AllocaInst>(BI)) {
3726  CannotRemove = true;
3727  break;
3728  }
3729  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3730  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3731  // If there is a stackrestore below this one, remove this one.
3732  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3733  return eraseInstFromFunction(CI);
3734 
3735  // Bail if we cross over an intrinsic with side effects, such as
3736  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3737  if (II->mayHaveSideEffects()) {
3738  CannotRemove = true;
3739  break;
3740  }
3741  } else {
3742  // If we found a non-intrinsic call, we can't remove the stack
3743  // restore.
3744  CannotRemove = true;
3745  break;
3746  }
3747  }
3748  }
3749 
3750  // If the stack restore is in a return, resume, or unwind block and if there
3751  // are no allocas or calls between the restore and the return, nuke the
3752  // restore.
3753  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3754  return eraseInstFromFunction(CI);
3755  break;
3756  }
3757  case Intrinsic::lifetime_start:
3758  // Asan needs to poison memory to detect invalid access which is possible
3759  // even for empty lifetime range.
3760  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3761  II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3762  break;
3763 
3764  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3765  Intrinsic::lifetime_end, *this))
3766  return nullptr;
3767  break;
3768  case Intrinsic::assume: {
3769  Value *IIOperand = II->getArgOperand(0);
3770  // Remove an assume if it is followed by an identical assume.
3771  // TODO: Do we need this? Unless there are conflicting assumptions, the
3772  // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3774  if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3775  return eraseInstFromFunction(CI);
3776 
3777  // Canonicalize assume(a && b) -> assume(a); assume(b);
3778  // Note: New assumption intrinsics created here are registered by
3779  // the InstCombineIRInserter object.
3780  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3781  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3782  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3783  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3784  return eraseInstFromFunction(*II);
3785  }
3786  // assume(!(a || b)) -> assume(!a); assume(!b);
3787  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3788  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3789  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3790  return eraseInstFromFunction(*II);
3791  }
3792 
3793  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3794  // (if assume is valid at the load)
3795  CmpInst::Predicate Pred;
3796  Instruction *LHS;
3797  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3798  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3799  LHS->getType()->isPointerTy() &&
3800  isValidAssumeForContext(II, LHS, &DT)) {
3801  MDNode *MD = MDNode::get(II->getContext(), None);
3803  return eraseInstFromFunction(*II);
3804 
3805  // TODO: apply nonnull return attributes to calls and invokes
3806  // TODO: apply range metadata for range check patterns?
3807  }
3808 
3809  // If there is a dominating assume with the same condition as this one,
3810  // then this one is redundant, and should be removed.
3811  KnownBits Known(1);
3812  computeKnownBits(IIOperand, Known, 0, II);
3813  if (Known.isAllOnes())
3814  return eraseInstFromFunction(*II);
3815 
3816  // Update the cache of affected values for this assumption (we might be
3817  // here because we just simplified the condition).
3818  AC.updateAffectedValues(II);
3819  break;
3820  }
3821  case Intrinsic::experimental_gc_relocate: {
3822  // Translate facts known about a pointer before relocating into
3823  // facts about the relocate value, while being careful to
3824  // preserve relocation semantics.
3825  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3826 
3827  // Remove the relocation if unused, note that this check is required
3828  // to prevent the cases below from looping forever.
3829  if (II->use_empty())
3830  return eraseInstFromFunction(*II);
3831 
3832  // Undef is undef, even after relocation.
3833  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3834  // most practical collectors, but there was discussion in the review thread
3835  // about whether it was legal for all possible collectors.
3836  if (isa<UndefValue>(DerivedPtr))
3837  // Use undef of gc_relocate's type to replace it.
3838  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3839 
3840  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3841  // The relocation of null will be null for most any collector.
3842  // TODO: provide a hook for this in GCStrategy. There might be some
3843  // weird collector this property does not hold for.
3844  if (isa<ConstantPointerNull>(DerivedPtr))
3845  // Use null-pointer of gc_relocate's type to replace it.
3846  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3847 
3848  // isKnownNonNull -> nonnull attribute
3849  if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
3850  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3851  }
3852 
3853  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3854  // Canonicalize on the type from the uses to the defs
3855 
3856  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3857  break;
3858  }
3859 
3860  case Intrinsic::experimental_guard: {
3861  // Is this guard followed by another guard? We scan forward over a small
3862  // fixed window of instructions to handle common cases with conditions
3863  // computed between guards.
3864  Instruction *NextInst = II->getNextNode();
3865  for (unsigned i = 0; i < GuardWideningWindow; i++) {
3866  // Note: Using context-free form to avoid compile time blow up
3867  if (!isSafeToSpeculativelyExecute(NextInst))
3868  break;
3869  NextInst = NextInst->getNextNode();
3870  }
3871  Value *NextCond = nullptr;
3872  if (match(NextInst,
3873  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3874  Value *CurrCond = II->getArgOperand(0);
3875 
3876  // Remove a guard that it is immediately preceded by an identical guard.
3877  if (CurrCond == NextCond)
3878  return eraseInstFromFunction(*NextInst);
3879 
3880  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3881  Instruction* MoveI = II->getNextNode();
3882  while (MoveI != NextInst) {
3883  auto *Temp = MoveI;
3884  MoveI = MoveI->getNextNode();
3885  Temp->moveBefore(II);
3886  }
3887  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3888  return eraseInstFromFunction(*NextInst);
3889  }
3890  break;
3891  }
3892  }
3893  return visitCallSite(II);
3894 }
3895 
3896 // Fence instruction simplification
3898  // Remove identical consecutive fences.
3900  if (auto *NFI = dyn_cast<FenceInst>(Next))
3901  if (FI.isIdenticalTo(NFI))
3902  return eraseInstFromFunction(FI);
3903  return nullptr;
3904 }
3905 
3906 // InvokeInst simplification
3908  return visitCallSite(&II);
3909 }
3910 
3911 /// If this cast does not affect the value passed through the varargs area, we
3912 /// can eliminate the use of the cast.
3914  const DataLayout &DL,
3915  const CastInst *const CI,
3916  const int ix) {
3917  if (!CI->isLosslessCast())
3918  return false;
3919 
3920  // If this is a GC intrinsic, avoid munging types. We need types for
3921  // statepoint reconstruction in SelectionDAG.
3922  // TODO: This is probably something which should be expanded to all
3923  // intrinsics since the entire point of intrinsics is that
3924  // they are understandable by the optimizer.
3925  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3926  return false;
3927 
3928  // The size of ByVal or InAlloca arguments is derived from the type, so we
3929  // can't change to a type with a different size. If the size were
3930  // passed explicitly we could avoid this check.
3931  if (!CS.isByValOrInAllocaArgument(ix))
3932  return true;
3933 
3934  Type* SrcTy =
3935  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3936  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3937  if (!SrcTy->isSized() || !DstTy->isSized())
3938  return false;
3939  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3940  return false;
3941  return true;
3942 }
3943 
3944 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3945  if (!CI->getCalledFunction()) return nullptr;
3946 
3947  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3948  replaceInstUsesWith(*From, With);
3949  };
3950  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
3951  if (Value *With = Simplifier.optimizeCall(CI)) {
3952  ++NumSimplified;
3953  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3954  }
3955 
3956  return nullptr;
3957 }
3958 
3960  // Strip off at most one level of pointer casts, looking for an alloca. This
3961  // is good enough in practice and simpler than handling any number of casts.
3962  Value *Underlying = TrampMem->stripPointerCasts();
3963  if (Underlying != TrampMem &&
3964  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3965  return nullptr;
3966  if (!isa<AllocaInst>(Underlying))
3967  return nullptr;
3968 
3969  IntrinsicInst *InitTrampoline = nullptr;
3970  for (User *U : TrampMem->users()) {
3972  if (!II)
3973  return nullptr;
3974  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3975  if (InitTrampoline)
3976  // More than one init_trampoline writes to this value. Give up.
3977  return nullptr;
3978  InitTrampoline = II;
3979  continue;
3980  }
3981  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3982  // Allow any number of calls to adjust.trampoline.
3983  continue;
3984  return nullptr;
3985  }
3986 
3987  // No call to init.trampoline found.
3988  if (!InitTrampoline)
3989  return nullptr;
3990 
3991  // Check that the alloca is being used in the expected way.
3992  if (InitTrampoline->getOperand(0) != TrampMem)
3993  return nullptr;
3994 
3995  return InitTrampoline;
3996 }
3997 
3999  Value *TrampMem) {
4000  // Visit all the previous instructions in the basic block, and try to find a
4001  // init.trampoline which has a direct path to the adjust.trampoline.
4002  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4003  E = AdjustTramp->getParent()->begin();
4004  I != E;) {
4005  Instruction *Inst = &*--I;
4006  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
4007  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4008  II->getOperand(0) == TrampMem)
4009  return II;
4010  if (Inst->mayWriteToMemory())
4011  return nullptr;
4012  }
4013  return nullptr;
4014 }
4015 
4016 // Given a call to llvm.adjust.trampoline, find and return the corresponding
4017 // call to llvm.init.trampoline if the call to the trampoline can be optimized
4018 // to a direct call to a function. Otherwise return NULL.
4020  Callee = Callee->stripPointerCasts();
4021  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4022  if (!AdjustTramp ||
4023  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4024  return nullptr;
4025 
4026  Value *TrampMem = AdjustTramp->getOperand(0);
4027 
4029  return IT;
4030  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4031  return IT;
4032  return nullptr;
4033 }
4034 
4035 /// Improvements for call and invoke instructions.
4036 Instruction *InstCombiner::visitCallSite(CallSite CS) {
4037  if (isAllocLikeFn(CS.getInstruction(), &TLI))
4038  return visitAllocSite(*CS.getInstruction());
4039 
4040  bool Changed = false;
4041 
4042  // Mark any parameters that are known to be non-null with the nonnull
4043  // attribute. This is helpful for inlining calls to functions with null
4044  // checks on their arguments.
4045  SmallVector<unsigned, 4> ArgNos;
4046  unsigned ArgNo = 0;
4047 
4048  for (Value *V : CS.args()) {
4049  if (V->getType()->isPointerTy() &&
4050  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
4051  isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
4052  ArgNos.push_back(ArgNo);
4053  ArgNo++;
4054  }
4055 
4056  assert(ArgNo == CS.arg_size() && "sanity check");
4057 
4058  if (!ArgNos.empty()) {
4059  AttributeList AS = CS.getAttributes();
4060  LLVMContext &Ctx = CS.getInstruction()->getContext();
4061  AS = AS.addParamAttribute(Ctx, ArgNos,
4062  Attribute::get(Ctx, Attribute::NonNull));
4063  CS.setAttributes(AS);
4064  Changed = true;
4065  }
4066 
4067  // If the callee is a pointer to a function, attempt to move any casts to the
4068  // arguments of the call/invoke.
4069  Value *Callee = CS.getCalledValue();
4070  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
4071  return nullptr;
4072 
4073  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
4074  // Remove the convergent attr on calls when the callee is not convergent.
4075  if (CS.isConvergent() && !CalleeF->isConvergent() &&
4076  !CalleeF->isIntrinsic()) {
4077  LLVM_DEBUG(dbgs() << "Removing convergent attr from instr "
4078  << CS.getInstruction() << "\n");
4079  CS.setNotConvergent();
4080  return CS.getInstruction();
4081  }
4082 
4083  // If the call and callee calling conventions don't match, this call must
4084  // be unreachable, as the call is undefined.
4085  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
4086  // Only do this for calls to a function with a body. A prototype may
4087  // not actually end up matching the implementation's calling conv for a
4088  // variety of reasons (e.g. it may be written in assembly).
4089  !CalleeF->isDeclaration()) {
4090  Instruction *OldCall = CS.getInstruction();
4091  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4093  OldCall);
4094  // If OldCall does not return void then replaceAllUsesWith undef.
4095  // This allows ValueHandlers and custom metadata to adjust itself.
4096  if (!OldCall->getType()->isVoidTy())
4097  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
4098  if (isa<CallInst>(OldCall))
4099  return eraseInstFromFunction(*OldCall);
4100 
4101  // We cannot remove an invoke, because it would change the CFG, just
4102  // change the callee to a null pointer.
4103  cast<InvokeInst>(OldCall)->setCalledFunction(
4104  Constant::getNullValue(CalleeF->getType()));
4105  return nullptr;
4106  }
4107  }
4108 
4109  if ((isa<ConstantPointerNull>(Callee) &&
4111  isa<UndefValue>(Callee)) {
4112  // If CS does not return void then replaceAllUsesWith undef.
4113  // This allows ValueHandlers and custom metadata to adjust itself.
4114  if (!CS.getInstruction()->getType()->isVoidTy())
4115  replaceInstUsesWith(*CS.getInstruction(),
4117 
4118  if (isa<InvokeInst>(CS.getInstruction())) {
4119  // Can't remove an invoke because we cannot change the CFG.
4120  return nullptr;
4121  }
4122 
4123  // This instruction is not reachable, just remove it. We insert a store to
4124  // undef so that we know that this code is not reachable, despite the fact
4125  // that we can't modify the CFG here.
4126  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4128  CS.getInstruction());
4129 
4130  return eraseInstFromFunction(*CS.getInstruction());
4131  }
4132 
4133  if (IntrinsicInst *II = findInitTrampoline(Callee))
4134  return transformCallThroughTrampoline(CS, II);
4135 
4136  PointerType *PTy = cast<PointerType>(Callee->getType());
4137  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4138  if (FTy->isVarArg()) {
4139  int ix = FTy->getNumParams();
4140  // See if we can optimize any arguments passed through the varargs area of
4141  // the call.
4142  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
4143  E = CS.arg_end(); I != E; ++I, ++ix) {
4144  CastInst *CI = dyn_cast<CastInst>(*I);
4145  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
4146  *I = CI->getOperand(0);
4147  Changed = true;
4148  }
4149  }
4150  }
4151 
4152  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
4153  // Inline asm calls cannot throw - mark them 'nounwind'.
4154  CS.setDoesNotThrow();
4155  Changed = true;
4156  }
4157 
4158  // Try to optimize the call if possible, we require DataLayout for most of
4159  // this. None of these calls are seen as possibly dead so go ahead and
4160  // delete the instruction now.
4161  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
4162  Instruction *I = tryOptimizeCall(CI);
4163  // If we changed something return the result, etc. Otherwise let
4164  // the fallthrough check.
4165  if (I) return eraseInstFromFunction(*I);
4166  }
4167 
4168  return Changed ? CS.getInstruction() : nullptr;
4169 }
4170 
4171 /// If the callee is a constexpr cast of a function, attempt to move the cast to
4172 /// the arguments of the call/invoke.
4173 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4175  if (!Callee)
4176  return false;
4177 
4178  // If this is a call to a thunk function, don't remove the cast. Thunks are
4179  // used to transparently forward all incoming parameters and outgoing return
4180  // values, so it's important to leave the cast in place.
4181  if (Callee->hasFnAttribute("thunk"))
4182  return false;
4183 
4184  // If this is a musttail call, the callee's prototype must match the caller's
4185  // prototype with the exception of pointee types. The code below doesn't
4186  // implement that, so we can't do this transform.
4187  // TODO: Do the transform if it only requires adding pointer casts.
4188  if (CS.isMustTailCall())
4189  return false;
4190 
4191  Instruction *Caller = CS.getInstruction();
4192  const AttributeList &CallerPAL = CS.getAttributes();
4193 
4194  // Okay, this is a cast from a function to a different type. Unless doing so
4195  // would cause a type conversion of one of our arguments, change this call to
4196  // be a direct call with arguments casted to the appropriate types.
4197  FunctionType *FT = Callee->getFunctionType();
4198  Type *OldRetTy = Caller->getType();
4199  Type *NewRetTy = FT->getReturnType();
4200 
4201  // Check to see if we are changing the return type...
4202  if (OldRetTy != NewRetTy) {
4203 
4204  if (NewRetTy->isStructTy())
4205  return false; // TODO: Handle multiple return values.
4206 
4207  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4208  if (Callee->isDeclaration())
4209  return false; // Cannot transform this return value.
4210 
4211  if (!Caller->use_empty() &&
4212  // void -> non-void is handled specially
4213  !NewRetTy->isVoidTy())
4214  return false; // Cannot transform this return value.
4215  }
4216 
4217  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4218  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4219  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4220  return false; // Attribute not compatible with transformed value.
4221  }
4222 
4223  // If the callsite is an invoke instruction, and the return value is used by
4224  // a PHI node in a successor, we cannot change the return type of the call
4225  // because there is no place to put the cast instruction (without breaking
4226  // the critical edge). Bail out in this case.
4227  if (!Caller->use_empty())
4228  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4229  for (User *U : II->users())
4230  if (PHINode *PN = dyn_cast<PHINode>(U))
4231  if (PN->getParent() == II->getNormalDest() ||
4232  PN->getParent() == II->getUnwindDest())
4233  return false;
4234  }
4235 
4236  unsigned NumActualArgs = CS.arg_size();
4237  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4238 
4239  // Prevent us turning:
4240  // declare void @takes_i32_inalloca(i32* inalloca)
4241  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4242  //
4243  // into:
4244  // call void @takes_i32_inalloca(i32* null)
4245  //
4246  // Similarly, avoid folding away bitcasts of byval calls.
4247  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4248  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4249  return false;
4250 
4252  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4253  Type *ParamTy = FT->getParamType(i);
4254  Type *ActTy = (*AI)->getType();
4255 
4256  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4257  return false; // Cannot transform this parameter value.
4258 
4259  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4260  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4261  return false; // Attribute not compatible with transformed value.
4262 
4263  if (CS.isInAllocaArgument(i))
4264  return false; // Cannot transform to and from inalloca.
4265 
4266  // If the parameter is passed as a byval argument, then we have to have a
4267  // sized type and the sized type has to have the same size as the old type.
4268  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4269  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4270  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4271  return false;
4272 
4273  Type *CurElTy = ActTy->getPointerElementType();
4274  if (DL.getTypeAllocSize(CurElTy) !=
4275  DL.getTypeAllocSize(ParamPTy->getElementType()))
4276  return false;
4277  }
4278  }
4279 
4280  if (Callee->isDeclaration()) {
4281  // Do not delete arguments unless we have a function body.
4282  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4283  return false;
4284 
4285  // If the callee is just a declaration, don't change the varargsness of the
4286  // call. We don't want to introduce a varargs call where one doesn't
4287  // already exist.
4288  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4289  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4290  return false;
4291 
4292  // If both the callee and the cast type are varargs, we still have to make
4293  // sure the number of fixed parameters are the same or we have the same
4294  // ABI issues as if we introduce a varargs call.
4295  if (FT->isVarArg() &&
4296  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4297  FT->getNumParams() !=
4298  cast<FunctionType>(APTy->getElementType())->getNumParams())
4299  return false;
4300  }
4301 
4302  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4303  !CallerPAL.isEmpty()) {
4304  // In this case we have more arguments than the new function type, but we
4305  // won't be dropping them. Check that these extra arguments have attributes
4306  // that are compatible with being a vararg call argument.
4307  unsigned SRetIdx;
4308  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4309  SRetIdx > FT->getNumParams())
4310  return false;
4311  }
4312 
4313  // Okay, we decided that this is a safe thing to do: go ahead and start
4314  // inserting cast instructions as necessary.
4317  Args.reserve(NumActualArgs);
4318  ArgAttrs.reserve(NumActualArgs);
4319 
4320  // Get any return attributes.
4321  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4322 
4323  // If the return value is not being used, the type may not be compatible
4324  // with the existing attributes. Wipe out any problematic attributes.
4325  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4326 
4327  AI = CS.arg_begin();
4328  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4329  Type *ParamTy = FT->getParamType(i);
4330 
4331  Value *NewArg = *AI;
4332  if ((*AI)->getType() != ParamTy)
4333  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4334  Args.push_back(NewArg);
4335 
4336  // Add any parameter attributes.
4337  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4338  }
4339 
4340  // If the function takes more arguments than the call was taking, add them
4341  // now.
4342  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4344  ArgAttrs.push_back(AttributeSet());
4345  }
4346 
4347  // If we are removing arguments to the function, emit an obnoxious warning.
4348  if (FT->getNumParams() < NumActualArgs) {
4349  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4350  if (FT->isVarArg()) {
4351  // Add all of the arguments in their promoted form to the arg list.
4352  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4353  Type *PTy = getPromotedType((*AI)->getType());
4354  Value *NewArg = *AI;
4355  if (PTy != (*AI)->getType()) {
4356  // Must promote to pass through va_arg area!
4357  Instruction::CastOps opcode =
4358  CastInst::getCastOpcode(*AI, false, PTy, false);
4359  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4360  }
4361  Args.push_back(NewArg);
4362 
4363  // Add any parameter attributes.
4364  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4365  }
4366  }
4367  }
4368 
4369  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4370 
4371  if (NewRetTy->isVoidTy())
4372  Caller->setName(""); // Void type should not have a name.
4373 
4374  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4375  "missing argument attributes");
4376  LLVMContext &Ctx = Callee->getContext();
4377  AttributeList NewCallerPAL = AttributeList::get(
4378  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4379 
4381  CS.getOperandBundlesAsDefs(OpBundles);
4382 
4383  CallSite NewCS;
4384  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4385  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4386  II->getUnwindDest(), Args, OpBundles);
4387  } else {
4388  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4389  cast<CallInst>(NewCS.getInstruction())
4390  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4391  }
4392  NewCS->takeName(Caller);
4393  NewCS.setCallingConv(CS.getCallingConv());
4394  NewCS.setAttributes(NewCallerPAL);
4395 
4396  // Preserve the weight metadata for the new call instruction. The metadata
4397  // is used by SamplePGO to check callsite's hotness.
4398  uint64_t W;
4399  if (Caller->extractProfTotalWeight(W))
4400  NewCS->setProfWeight(W);
4401 
4402  // Insert a cast of the return type as necessary.
4403  Instruction *NC = NewCS.getInstruction();
4404  Value *NV = NC;
4405  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4406  if (!NV->getType()->isVoidTy()) {
4407  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4408  NC->setDebugLoc(Caller->getDebugLoc());
4409 
4410  // If this is an invoke instruction, we should insert it after the first
4411  // non-phi, instruction in the normal successor block.
4412  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4413  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4414  InsertNewInstBefore(NC, *I);
4415  } else {
4416  // Otherwise, it's a call, just insert cast right after the call.
4417  InsertNewInstBefore(NC, *Caller);
4418  }
4419  Worklist.AddUsersToWorkList(*Caller);
4420  } else {
4421  NV = UndefValue::get(Caller->getType());
4422  }
4423  }
4424 
4425  if (!Caller->use_empty())
4426  replaceInstUsesWith(*Caller, NV);
4427  else if (Caller->hasValueHandle()) {
4428  if (OldRetTy == NV->getType())
4429  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4430  else
4431  // We cannot call ValueIsRAUWd with a different type, and the
4432  // actual tracked value will disappear.
4434  }
4435 
4436  eraseInstFromFunction(*Caller);
4437  return true;
4438 }
4439 
4440 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4441 /// intrinsic pair into a direct call to the underlying function.
4442 Instruction *
4443 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4444  IntrinsicInst *Tramp) {
4445  Value *Callee = CS.getCalledValue();
4446  PointerType *PTy = cast<PointerType>(Callee->getType());
4447  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4449 
4450  // If the call already has the 'nest' attribute somewhere then give up -
4451  // otherwise 'nest' would occur twice after splicing in the chain.
4452  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4453  return nullptr;
4454 
4455  assert(Tramp &&
4456  "transformCallThroughTrampoline called with incorrect CallSite.");
4457 
4458  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4459  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4460 
4461  AttributeList NestAttrs = NestF->getAttributes();
4462  if (!NestAttrs.isEmpty()) {
4463  unsigned NestArgNo = 0;
4464  Type *NestTy = nullptr;
4465  AttributeSet NestAttr;
4466 
4467  // Look for a parameter marked with the 'nest' attribute.
4468  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4469  E = NestFTy->param_end();
4470  I != E; ++NestArgNo, ++I) {
4471  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4472  if (AS.hasAttribute(Attribute::Nest)) {
4473  // Record the parameter type and any other attributes.
4474  NestTy = *I;
4475  NestAttr = AS;
4476  break;
4477  }
4478  }
4479 
4480  if (NestTy) {
4481  Instruction *Caller = CS.getInstruction();
4482  std::vector<Value*> NewArgs;
4483  std::vector<AttributeSet> NewArgAttrs;
4484  NewArgs.reserve(CS.arg_size() + 1);
4485  NewArgAttrs.reserve(CS.arg_size());
4486 
4487  // Insert the nest argument into the call argument list, which may
4488  // mean appending it. Likewise for attributes.
4489 
4490  {
4491  unsigned ArgNo = 0;
4492  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4493  do {
4494  if (ArgNo == NestArgNo) {
4495  // Add the chain argument and attributes.
4496  Value *NestVal = Tramp->getArgOperand(2);
4497  if (NestVal->getType() != NestTy)
4498  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4499  NewArgs.push_back(NestVal);
4500  NewArgAttrs.push_back(NestAttr);
4501  }
4502 
4503  if (I == E)
4504  break;
4505 
4506  // Add the original argument and attributes.
4507  NewArgs.push_back(*I);
4508  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4509 
4510  ++ArgNo;
4511  ++I;
4512  } while (true);
4513  }
4514 
4515  // The trampoline may have been bitcast to a bogus type (FTy).
4516  // Handle this by synthesizing a new function type, equal to FTy
4517  // with the chain parameter inserted.
4518 
4519  std::vector<Type*> NewTypes;
4520  NewTypes.reserve(FTy->getNumParams()+1);
4521 
4522  // Insert the chain's type into the list of parameter types, which may
4523  // mean appending it.
4524  {
4525  unsigned ArgNo = 0;
4526  FunctionType::param_iterator I = FTy->param_begin(),
4527  E = FTy->param_end();
4528 
4529  do {
4530  if (ArgNo == NestArgNo)
4531  // Add the chain's type.
4532  NewTypes.push_back(NestTy);
4533 
4534  if (I == E)
4535  break;
4536 
4537  // Add the original type.
4538  NewTypes.push_back(*I);
4539 
4540  ++ArgNo;
4541  ++I;
4542  } while (true);
4543  }
4544 
4545  // Replace the trampoline call with a direct call. Let the generic
4546  // code sort out any function type mismatches.
4547  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4548  FTy->isVarArg());
4549  Constant *NewCallee =
4550  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4551  NestF : ConstantExpr::getBitCast(NestF,
4552  PointerType::getUnqual(NewFTy));
4553  AttributeList NewPAL =
4554  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4555  Attrs.getRetAttributes(), NewArgAttrs);
4556 
4558  CS.getOperandBundlesAsDefs(OpBundles);
4559 
4560  Instruction *NewCaller;
4561  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4562  NewCaller = InvokeInst::Create(NewCallee,
4563  II->getNormalDest(), II->getUnwindDest(),
4564  NewArgs, OpBundles);
4565  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4566  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4567  } else {
4568  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4569  cast<CallInst>(NewCaller)->setTailCallKind(
4570  cast<CallInst>(Caller)->getTailCallKind());
4571  cast<CallInst>(NewCaller)->setCallingConv(
4572  cast<CallInst>(Caller)->getCallingConv());
4573  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4574  }
4575  NewCaller->setDebugLoc(Caller->getDebugLoc());
4576 
4577  return NewCaller;
4578  }
4579  }
4580 
4581  // Replace the trampoline call with a direct call. Since there is no 'nest'
4582  // parameter, there is no need to adjust the argument list. Let the generic
4583  // code sort out any function type mismatches.
4584  Constant *NewCallee =
4585  NestF->getType() == PTy ? NestF :
4586  ConstantExpr::getBitCast(NestF, PTy);
4587  CS.setCalledFunction(NewCallee);
4588  return CS.getInstruction();
4589 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:776
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:758
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:718
uint64_t CallInst * C
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
BinaryOp_match< cstfp_pred_ty< is_neg_zero_fp >, RHS, Instruction::FSub > m_FNeg(const RHS &X)
Match &#39;fneg X&#39; as &#39;fsub -0.0, X&#39;.
Definition: PatternMatch.h:665
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:841
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1846
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1143
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1557
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1175
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:373
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
Atomic ordering constants.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1634
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2709
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:186
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:603
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
An instruction for ordering other memory operations.
Definition: Instructions.h:444
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:454
Instruction * visitVACopyInst(VACopyInst &I)
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombiner &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1309
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:243
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:215
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:486
unsigned less or equal
Definition: InstrTypes.h:711
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:710
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1323
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static Value * simplifyX86AddsSubs(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:864
F(f)
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:657
An instruction for reading from memory.
Definition: Instructions.h:168
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:876
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1903
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:166
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
void reserve(size_type N)
Definition: SmallVector.h:376
Value * getLength() const
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:361
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:535
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
Definition: IRBuilder.h:2022
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:136
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1590
bool isSigned() const
Definition: InstrTypes.h:854
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:516
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:730
Type * getPointerElementType() const
Definition: Type.h:376
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:783
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:392
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:993
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:397
static Value * simplifyNeonTbl1(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Convert a table lookup to shufflevector if the mask is constant.
IterTy arg_end() const
Definition: CallSite.h:575
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1628
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
This file implements a class to represent arbitrary precision integral constant values and operations...
All zero aggregate value.
Definition: Constants.h:337
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:191
Key
PAL metadata keys.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
Class to represent function types.
Definition: DerivedTypes.h:103
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
bool isInfinity() const
Definition: APFloat.h:1144
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1581
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:424
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4444
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:217
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2076
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
Definition: PatternMatch.h:442
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:310
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1340
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:894
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:410
static Value * simplifyX86movmsk(const IntrinsicInst &II)
amdgpu Simplify well known AMD library false Value * Callee
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1021
This class represents a truncation of integer types.
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2369
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:578
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:338
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
Value * getOperand(unsigned i_nocapture) const
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1750
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:333
bool doesNotThrow() const
Determine if the call cannot unwind.
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:513
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:304
bool isNegative() const
Definition: APFloat.h:1147
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1378
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1092
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:724
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:287
CallInst * CreateIntrinsic(Intrinsic::ID ID, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with no operands.
Definition: IRBuilder.cpp:742
bool isNaN() const
Definition: APFloat.h:1145
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1901
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1888
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:306
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:499
const Instruction * getNextNonDebugInstruction() const
Return a pointer to the next non-debug instruction in the same basic block as &#39;this&#39;, or nullptr if no such instruction exists.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool isFast() const
Determine whether all fast-math-flags are set.
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
void setCalledFunction(Value *Fn)
Set the function called.
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:144
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:685
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
This class represents any memset intrinsic.
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
void setArgOperand(unsigned i, Value *v)
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:360
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1921
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:450
void setNotConvergent()
Definition: CallSite.h:527
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:322
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:539
iterator_range< User::op_iterator > arg_operands()
Iteration adapter for range-for loops.
size_t size() const
Definition: SmallVector.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:971
signed greater than
Definition: InstrTypes.h:712
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:305
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
const APFloat & getValueAPF() const
Definition: Constants.h:299
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:520
static BinaryOperator * CreateFNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:176
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
BlockVerifier::State From
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:266
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:571
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1934
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:523
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Value * simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, InstCombiner::BuilderTy &Builder)
Convert a vector load intrinsic into a simple llvm load instruction.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:508
signed less than
Definition: InstrTypes.h:714
Type * getReturnType() const
Definition: DerivedTypes.h:124
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:492
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1205
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1948
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:635
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:684
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:471
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1287
const Value * stripPointerCastsAndInvariantGroups() const
Strip off pointer casts, all-zero GEPs, aliases and invariant group info.
Definition: Value.cpp:551
Value * SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q)
Given a callsite, fold the result or return null.
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1440
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
bool isDenormal() const
Definition: APFloat.h:1148
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:715
void setOperand(unsigned i_nocapture, Value *Val_nocapture)
Class to represent vector types.
Definition: DerivedTypes.h:393
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:346
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to be non-zero when defined.
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:400
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1051
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:332
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:540
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:368
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:307
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getNumArgOperands() const
Return the number of call arguments.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:709
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:582
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:505
bool isNormal() const
Definition: APFloat.h:1151
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void setDoesNotThrow()
Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
Definition: IRBuilder.h:2049
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:262
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:276
uint32_t Size
Definition: Profile.cpp:47
static IntrinsicInst * findInitTrampoline(Value *Callee)
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:608
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquif