57 using namespace PatternMatch;
59 #define DEBUG_TYPE "instcombine"
61 STATISTIC(NumSimplified,
"Number of library calls simplified");
67 if (ITy->getBitWidth() < 32)
77 if (
StructType *STy = dyn_cast<StructType>(T)) {
78 if (STy->getNumElements() == 1)
79 T = STy->getElementType(0);
82 }
else if (
ArrayType *ATy = dyn_cast<ArrayType>(T)) {
83 if (ATy->getNumElements() == 1)
84 T = ATy->getElementType();
101 assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
102 "Unexpected constant data vector element type");
104 ? cast<ConstantInt>(Elt)->isNegative()
105 : cast<ConstantFP>(Elt)->isNegative();
115 unsigned CopyAlign = MI->getAlignment();
117 if (CopyAlign < MinAlign) {
125 if (!MemOpLength)
return nullptr;
132 assert(Size &&
"0-sized memory transferring should be removed already.");
134 if (Size > 8 || (Size&(Size-1)))
139 cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
141 cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
155 if (StrippedDest != MI->getArgOperand(0)) {
156 Type *SrcETy = cast<PointerType>(StrippedDest->
getType())
158 if (SrcETy->
isSized() &&
DL.getTypeStoreSize(SrcETy) == Size) {
170 if (M->getNumOperands() == 3 && M->getOperand(0) &&
171 mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
172 mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
174 mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
175 mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
177 M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
178 CopyMD = cast<MDNode>(M->getOperand(2));
186 SrcAlign = std::max(SrcAlign, CopyAlign);
187 DstAlign = std::max(DstAlign, CopyAlign);
189 Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
190 Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
191 LoadInst *
L = Builder->CreateLoad(Src, MI->isVolatile());
195 MDNode *LoopMemParallelMD =
197 if (LoopMemParallelMD)
200 StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
204 if (LoopMemParallelMD)
214 if (MI->getAlignment() < Alignment) {
226 Alignment = MI->getAlignment();
227 assert(Len &&
"0-sized memory setting should be removed already.");
233 Value *Dest = MI->getDest();
234 unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
236 Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
239 if (Alignment == 0) Alignment = 1;
242 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
257 bool LogicalShift =
false;
258 bool ShiftLeft =
false;
262 case Intrinsic::x86_sse2_psra_d:
263 case Intrinsic::x86_sse2_psra_w:
264 case Intrinsic::x86_sse2_psrai_d:
265 case Intrinsic::x86_sse2_psrai_w:
266 case Intrinsic::x86_avx2_psra_d:
267 case Intrinsic::x86_avx2_psra_w:
268 case Intrinsic::x86_avx2_psrai_d:
269 case Intrinsic::x86_avx2_psrai_w:
270 case Intrinsic::x86_avx512_psra_q_128:
271 case Intrinsic::x86_avx512_psrai_q_128:
272 case Intrinsic::x86_avx512_psra_q_256:
273 case Intrinsic::x86_avx512_psrai_q_256:
274 case Intrinsic::x86_avx512_psra_d_512:
275 case Intrinsic::x86_avx512_psra_q_512:
276 case Intrinsic::x86_avx512_psra_w_512:
277 case Intrinsic::x86_avx512_psrai_d_512:
278 case Intrinsic::x86_avx512_psrai_q_512:
279 case Intrinsic::x86_avx512_psrai_w_512:
280 LogicalShift =
false; ShiftLeft =
false;
282 case Intrinsic::x86_sse2_psrl_d:
283 case Intrinsic::x86_sse2_psrl_q:
284 case Intrinsic::x86_sse2_psrl_w:
285 case Intrinsic::x86_sse2_psrli_d:
286 case Intrinsic::x86_sse2_psrli_q:
287 case Intrinsic::x86_sse2_psrli_w:
288 case Intrinsic::x86_avx2_psrl_d:
289 case Intrinsic::x86_avx2_psrl_q:
290 case Intrinsic::x86_avx2_psrl_w:
291 case Intrinsic::x86_avx2_psrli_d:
292 case Intrinsic::x86_avx2_psrli_q:
293 case Intrinsic::x86_avx2_psrli_w:
294 case Intrinsic::x86_avx512_psrl_d_512:
295 case Intrinsic::x86_avx512_psrl_q_512:
296 case Intrinsic::x86_avx512_psrl_w_512:
297 case Intrinsic::x86_avx512_psrli_d_512:
298 case Intrinsic::x86_avx512_psrli_q_512:
299 case Intrinsic::x86_avx512_psrli_w_512:
300 LogicalShift =
true; ShiftLeft =
false;
302 case Intrinsic::x86_sse2_psll_d:
303 case Intrinsic::x86_sse2_psll_q:
304 case Intrinsic::x86_sse2_psll_w:
305 case Intrinsic::x86_sse2_pslli_d:
306 case Intrinsic::x86_sse2_pslli_q:
307 case Intrinsic::x86_sse2_pslli_w:
308 case Intrinsic::x86_avx2_psll_d:
309 case Intrinsic::x86_avx2_psll_q:
310 case Intrinsic::x86_avx2_psll_w:
311 case Intrinsic::x86_avx2_pslli_d:
312 case Intrinsic::x86_avx2_pslli_q:
313 case Intrinsic::x86_avx2_pslli_w:
314 case Intrinsic::x86_avx512_psll_d_512:
315 case Intrinsic::x86_avx512_psll_q_512:
316 case Intrinsic::x86_avx512_psll_w_512:
317 case Intrinsic::x86_avx512_pslli_d_512:
318 case Intrinsic::x86_avx512_pslli_q_512:
319 case Intrinsic::x86_avx512_pslli_w_512:
320 LogicalShift =
true; ShiftLeft =
true;
323 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
330 if (!CAZ && !CDV && !CInt)
337 auto VT = cast<VectorType>(CDV->getType());
338 unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
339 assert((64 % BitWidth) == 0 &&
"Unexpected packed shift size");
340 unsigned NumSubElts = 64 / BitWidth;
343 for (
unsigned i = 0;
i != NumSubElts; ++
i) {
344 unsigned SubEltIdx = (NumSubElts - 1) -
i;
345 auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
346 Count = Count.
shl(BitWidth);
351 Count = CInt->getValue();
354 auto VT = cast<VectorType>(Vec->getType());
355 auto SVT = VT->getElementType();
356 unsigned VWidth = VT->getNumElements();
357 unsigned BitWidth = SVT->getPrimitiveSizeInBits();
364 if (Count.
uge(BitWidth)) {
370 Count =
APInt(64, BitWidth - 1);
391 bool LogicalShift =
false;
392 bool ShiftLeft =
false;
396 case Intrinsic::x86_avx2_psrav_d:
397 case Intrinsic::x86_avx2_psrav_d_256:
398 case Intrinsic::x86_avx512_psrav_q_128:
399 case Intrinsic::x86_avx512_psrav_q_256:
400 case Intrinsic::x86_avx512_psrav_d_512:
401 case Intrinsic::x86_avx512_psrav_q_512:
402 case Intrinsic::x86_avx512_psrav_w_128:
403 case Intrinsic::x86_avx512_psrav_w_256:
404 case Intrinsic::x86_avx512_psrav_w_512:
405 LogicalShift =
false;
408 case Intrinsic::x86_avx2_psrlv_d:
409 case Intrinsic::x86_avx2_psrlv_d_256:
410 case Intrinsic::x86_avx2_psrlv_q:
411 case Intrinsic::x86_avx2_psrlv_q_256:
412 case Intrinsic::x86_avx512_psrlv_d_512:
413 case Intrinsic::x86_avx512_psrlv_q_512:
414 case Intrinsic::x86_avx512_psrlv_w_128:
415 case Intrinsic::x86_avx512_psrlv_w_256:
416 case Intrinsic::x86_avx512_psrlv_w_512:
420 case Intrinsic::x86_avx2_psllv_d:
421 case Intrinsic::x86_avx2_psllv_d_256:
422 case Intrinsic::x86_avx2_psllv_q:
423 case Intrinsic::x86_avx2_psllv_q_256:
424 case Intrinsic::x86_avx512_psllv_d_512:
425 case Intrinsic::x86_avx512_psllv_q_512:
426 case Intrinsic::x86_avx512_psllv_w_128:
427 case Intrinsic::x86_avx512_psllv_w_256:
428 case Intrinsic::x86_avx512_psllv_w_512:
433 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
441 auto VT = cast<VectorType>(II.
getType());
442 auto SVT = VT->getVectorElementType();
443 int NumElts = VT->getNumElements();
444 int BitWidth = SVT->getIntegerBitWidth();
448 bool AnyOutOfRange =
false;
450 for (
int I = 0;
I < NumElts; ++
I) {
451 auto *CElt = CShift->getAggregateElement(
I);
452 if (CElt && isa<UndefValue>(CElt)) {
457 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
464 APInt ShiftVal = COp->getValue();
465 if (ShiftVal.
uge(BitWidth)) {
466 AnyOutOfRange = LogicalShift;
467 ShiftAmts.
push_back(LogicalShift ? BitWidth : BitWidth - 1);
476 auto OutOfRange = [&](
int Idx) {
return (Idx < 0) || (BitWidth <= Idx); };
477 if (
all_of(ShiftAmts, OutOfRange)) {
479 for (
int Idx : ShiftAmts) {
483 assert(LogicalShift &&
"Logical shift expected");
496 for (
int Idx : ShiftAmts) {
520 if (isa<UndefValue>(Arg))
534 auto *COp =
C->getAggregateElement(
I);
537 if (isa<UndefValue>(COp))
545 if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
566 uint8_t Imm = CInt->getZExtValue();
567 uint8_t ZMask = Imm & 0xf;
568 uint8_t DestLane = (Imm >> 4) & 0x3;
569 uint8_t SourceLane = (Imm >> 6) & 0x3;
579 uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
588 (ZMask & (1 << DestLane))) {
592 ShuffleMask[DestLane] = SourceLane;
594 for (
unsigned i = 0;
i < 4; ++
i)
595 if ((ZMask >>
i) & 0x1)
596 ShuffleMask[
i] =
i + 4;
603 ShuffleMask[DestLane] = SourceLane + 4;
614 auto LowConstantHighUndef = [&](uint64_t Val) {
628 if (CILength && CIIndex) {
638 unsigned Length = APLength == 0 ? 64 : APLength.
getZExtValue();
642 unsigned End = Index + Length;
653 if ((Length % 8) == 0 && (Index % 8) == 0) {
663 for (
int i = 0;
i != (int)Length; ++
i)
666 for (
int i = Length;
i != 8; ++
i)
669 for (
int i = 8;
i != 16; ++
i)
681 APInt Elt = CI0->getValue();
688 Value *
Args[] = {Op0, CILength, CIIndex};
696 if (CI0 && CI0->equalsInt(0))
697 return LowConstantHighUndef(0);
717 unsigned Length = APLength == 0 ? 64 : APLength.
getZExtValue();
721 unsigned End = Index + Length;
732 if ((Length % 8) == 0 && (Index % 8) == 0) {
742 for (
int i = 0;
i != (int)Index; ++
i)
744 for (
int i = 0;
i != (int)Length; ++
i)
747 for (
int i = Index + Length;
i != 8; ++
i)
749 for (
int i = 8;
i != 16; ++
i)
765 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
770 APInt V00 = CI00->getValue();
771 APInt V10 = CI10->getValue();
775 APInt Val = V00 | V10;
789 Value *
Args[] = {Op0, Op1, CILength, CIIndex};
805 auto *VecTy = cast<VectorType>(II.
getType());
807 unsigned NumElts = VecTy->getNumElements();
808 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
809 "Unexpected number of elements in shuffle mask!");
816 for (
unsigned I = 0;
I < NumElts; ++
I) {
818 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
821 if (isa<UndefValue>(COp)) {
826 int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
835 Index = ((Index < 0) ? NumElts : Index & 0x0F) + (
I & 0xF0);
852 auto *VecTy = cast<VectorType>(II.
getType());
855 bool IsPD = VecTy->getScalarType()->isDoubleTy();
856 unsigned NumLaneElts = IsPD ? 2 : 4;
857 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
863 for (
unsigned I = 0;
I < NumElts; ++
I) {
865 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
868 if (isa<UndefValue>(COp)) {
873 APInt Index = cast<ConstantInt>(COp)->getValue();
879 Index = Index.
lshr(1);
884 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
902 auto *VecTy = cast<VectorType>(II.
getType());
904 unsigned Size = VecTy->getNumElements();
905 assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
906 "Unexpected shuffle mask size");
911 for (
unsigned I = 0;
I < Size; ++
I) {
913 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
916 if (isa<UndefValue>(COp)) {
921 uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
952 uint8_t Imm = CInt->getZExtValue();
954 bool LowHalfZero = Imm & 0x08;
955 bool HighHalfZero = Imm & 0x80;
959 if (LowHalfZero && HighHalfZero)
964 unsigned HalfSize = NumElts / 2;
968 bool LowInputSelect = Imm & 0x02;
969 bool HighInputSelect = Imm & 0x20;
973 bool LowHalfSelect = Imm & 0x01;
974 bool HighHalfSelect = Imm & 0x10;
981 V0 = LowHalfZero ? ZeroVector : V0;
982 V1 = HighHalfZero ? ZeroVector : V1;
985 unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
986 for (
unsigned i = 0;
i < HalfSize; ++
i)
987 ShuffleMask[
i] = StartIndex +
i;
990 StartIndex = HighHalfSelect ? HalfSize : 0;
991 StartIndex += NumElts;
992 for (
unsigned i = 0;
i < HalfSize; ++
i)
993 ShuffleMask[
i + HalfSize] = StartIndex +
i;
1002 if (
auto *CInt = dyn_cast<ConstantInt>(II.
getArgOperand(2))) {
1003 uint64_t Imm = CInt->getZExtValue() & 0x7;
1045 if (
auto *
C = dyn_cast<ConstantInt>(Mask))
1046 if (
C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
1057 for (
unsigned i = 0;
i != VWidth; ++
i)
1078 if (C1 && C1->isNaN())
1085 if (isa<UndefValue>(Arg0))
1089 if (isa<UndefValue>(Arg1))
1098 if (Arg0 == X || Arg0 == Y)
1105 if (Arg1 == X || Arg1 == Y)
1111 if (C1 && C1->isInfinity()) {
1113 if (C1->isNegative())
1121 if (Arg0 == X || Arg0 == Y)
1128 if (Arg1 == X || Arg1 == Y)
1134 if (C1 && C1->isInfinity()) {
1136 if (!C1->isNegative())
1147 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1149 for (
unsigned I = 0,
E = ConstMask->getType()->getVectorNumElements();
I !=
E;
1151 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1152 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1165 unsigned Alignment = cast<ConstantInt>(II.
getArgOperand(1))->getZExtValue();
1178 if (ConstMask->isNullValue())
1182 if (ConstMask->isAllOnesValue()) {
1184 unsigned Alignment = cast<ConstantInt>(II.
getArgOperand(2))->getZExtValue();
1194 if (ConstMask && ConstMask->isNullValue())
1203 if (ConstMask && ConstMask->isNullValue())
1212 "Expected cttz or ctlz intrinsic");
1219 unsigned BitWidth =
IT->getBitWidth();
1220 APInt KnownZero(BitWidth, 0);
1221 APInt KnownOne(BitWidth, 0);
1235 if ((Mask & KnownZero) == Mask) {
1263 if (isa<ConstantAggregateZero>(Mask))
1275 unsigned AddrSpace = cast<PointerType>(Ptr->
getType())->getAddressSpace();
1299 if (isa<ConstantAggregateZero>(Mask)) {
1318 unsigned AddrSpace = cast<PointerType>(Ptr->
getType())->getAddressSpace();
1336 unsigned NumOperands) {
1339 for (
unsigned i = 0;
i < NumOperands;
i++)
1357 "Start intrinsic does not have expected ID");
1359 for (++BI; BI != BE; ++BI) {
1360 if (
auto *
E = dyn_cast<IntrinsicInst>(BI)) {
1361 if (isa<DbgInfoIntrinsic>(
E) ||
E->getIntrinsicID() == StartID)
1363 if (
E->getIntrinsicID() == EndID &&
1393 return replaceInstUsesWith(CI, V);
1396 return visitFree(CI);
1406 if (!II)
return visitCallSite(&CI);
1411 bool Changed =
false;
1414 if (
Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1415 if (NumBytes->isNullValue())
1416 return eraseInstFromFunction(CI);
1418 if (
ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1419 if (CI->getZExtValue() == 1) {
1427 if (MI->isVolatile())
1433 if (
MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
1434 if (
GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1435 if (GVSrc->isConstant()) {
1448 if (MTI->getSource() == MTI->getDest())
1449 return eraseInstFromFunction(CI);
1454 if (isa<MemTransferInst>(MI)) {
1457 }
else if (
MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
1462 if (Changed)
return II;
1465 auto SimplifyDemandedVectorEltsLow = [
this](
Value *
Op,
unsigned Width,
1466 unsigned DemandedWidth) {
1467 APInt UndefElts(Width, 0);
1469 return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1474 case Intrinsic::objectsize:
1477 return replaceInstUsesWith(CI,
N);
1480 case Intrinsic::bswap: {
1486 return replaceInstUsesWith(CI, X);
1493 Value *V = Builder->CreateLShr(X, CV);
1499 case Intrinsic::bitreverse: {
1504 if (
match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(
m_Value(X))))
1505 return replaceInstUsesWith(CI, X);
1509 case Intrinsic::masked_load:
1511 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1513 case Intrinsic::masked_store:
1515 case Intrinsic::masked_gather:
1517 case Intrinsic::masked_scatter:
1520 case Intrinsic::powi:
1523 if (Power->isZero())
1529 if (Power->isAllOnesValue())
1535 case Intrinsic::cttz:
1536 case Intrinsic::ctlz:
1541 case Intrinsic::uadd_with_overflow:
1542 case Intrinsic::sadd_with_overflow:
1543 case Intrinsic::umul_with_overflow:
1544 case Intrinsic::smul_with_overflow:
1555 case Intrinsic::usub_with_overflow:
1556 case Intrinsic::ssub_with_overflow: {
1561 Value *OperationResult =
nullptr;
1565 return CreateOverflowTuple(II, OperationResult, OverflowResult);
1575 if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
1581 return replaceInstUsesWith(*II, V);
1584 case Intrinsic::fma:
1585 case Intrinsic::fmuladd: {
1590 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
1596 Value *LHS =
nullptr;
1597 Value *RHS =
nullptr;
1608 if (
match(Src0, m_Intrinsic<Intrinsic::fabs>(
m_Value(LHS))) &&
1609 match(Src1, m_Intrinsic<Intrinsic::fabs>(
m_Value(RHS))) && LHS == RHS) {
1624 case Intrinsic::fabs: {
1636 case Intrinsic::cos:
1637 case Intrinsic::amdgcn_cos: {
1641 match(Src, m_Intrinsic<Intrinsic::fabs>(
m_Value(SrcSrc)))) {
1650 case Intrinsic::ppc_altivec_lvx:
1651 case Intrinsic::ppc_altivec_lvxl:
1655 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1660 case Intrinsic::ppc_vsx_lxvw4x:
1661 case Intrinsic::ppc_vsx_lxvd2x: {
1663 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1667 case Intrinsic::ppc_altivec_stvx:
1668 case Intrinsic::ppc_altivec_stvxl:
1674 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1678 case Intrinsic::ppc_vsx_stxvw4x:
1679 case Intrinsic::ppc_vsx_stxvd2x: {
1682 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1683 return new StoreInst(II->getArgOperand(0),
Ptr,
false, 1);
1685 case Intrinsic::ppc_qpx_qvlfs:
1690 II->getType()->getVectorNumElements());
1691 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1697 case Intrinsic::ppc_qpx_qvlfd:
1701 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(0),
1706 case Intrinsic::ppc_qpx_qvstfs:
1711 II->getArgOperand(0)->getType()->getVectorNumElements());
1712 Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
1714 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1718 case Intrinsic::ppc_qpx_qvstfd:
1724 Value *
Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
1729 case Intrinsic::x86_vcvtph2ps_128:
1730 case Intrinsic::x86_vcvtph2ps_256: {
1731 auto Arg = II->getArgOperand(0);
1732 auto ArgType = cast<VectorType>(Arg->getType());
1733 auto RetType = cast<VectorType>(II->getType());
1734 unsigned ArgWidth = ArgType->getNumElements();
1735 unsigned RetWidth = RetType->getNumElements();
1736 assert(RetWidth <= ArgWidth &&
"Unexpected input/return vector widths");
1737 assert(ArgType->isIntOrIntVectorTy() &&
1738 ArgType->getScalarSizeInBits() == 16 &&
1739 "CVTPH2PS input type should be 16-bit integer vector");
1740 assert(RetType->getScalarType()->isFloatTy() &&
1741 "CVTPH2PS output type should be 32-bit float vector");
1744 if (isa<ConstantAggregateZero>(Arg))
1747 if (isa<ConstantDataVector>(Arg)) {
1748 auto VectorHalfAsShorts = Arg;
1749 if (RetWidth < ArgWidth) {
1751 for (
unsigned i = 0;
i != RetWidth; ++
i)
1753 VectorHalfAsShorts = Builder->CreateShuffleVector(
1757 auto VectorHalfType =
1760 Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
1761 auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
1762 return replaceInstUsesWith(*II, VectorFloats);
1766 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
1767 II->setArgOperand(0, V);
1773 case Intrinsic::x86_sse_cvtss2si:
1774 case Intrinsic::x86_sse_cvtss2si64:
1775 case Intrinsic::x86_sse_cvttss2si:
1776 case Intrinsic::x86_sse_cvttss2si64:
1777 case Intrinsic::x86_sse2_cvtsd2si:
1778 case Intrinsic::x86_sse2_cvtsd2si64:
1779 case Intrinsic::x86_sse2_cvttsd2si:
1780 case Intrinsic::x86_sse2_cvttsd2si64:
1781 case Intrinsic::x86_avx512_vcvtss2si32:
1782 case Intrinsic::x86_avx512_vcvtss2si64:
1783 case Intrinsic::x86_avx512_vcvtss2usi32:
1784 case Intrinsic::x86_avx512_vcvtss2usi64:
1785 case Intrinsic::x86_avx512_vcvtsd2si32:
1786 case Intrinsic::x86_avx512_vcvtsd2si64:
1787 case Intrinsic::x86_avx512_vcvtsd2usi32:
1788 case Intrinsic::x86_avx512_vcvtsd2usi64:
1789 case Intrinsic::x86_avx512_cvttss2si:
1790 case Intrinsic::x86_avx512_cvttss2si64:
1791 case Intrinsic::x86_avx512_cvttss2usi:
1792 case Intrinsic::x86_avx512_cvttss2usi64:
1793 case Intrinsic::x86_avx512_cvttsd2si:
1794 case Intrinsic::x86_avx512_cvttsd2si64:
1795 case Intrinsic::x86_avx512_cvttsd2usi:
1796 case Intrinsic::x86_avx512_cvttsd2usi64: {
1799 Value *Arg = II->getArgOperand(0);
1801 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
1802 II->setArgOperand(0, V);
1808 case Intrinsic::x86_mmx_pmovmskb:
1809 case Intrinsic::x86_sse_movmsk_ps:
1810 case Intrinsic::x86_sse2_movmsk_pd:
1811 case Intrinsic::x86_sse2_pmovmskb_128:
1812 case Intrinsic::x86_avx_movmsk_pd_256:
1813 case Intrinsic::x86_avx_movmsk_ps_256:
1814 case Intrinsic::x86_avx2_pmovmskb: {
1816 return replaceInstUsesWith(*II, V);
1820 case Intrinsic::x86_sse_comieq_ss:
1821 case Intrinsic::x86_sse_comige_ss:
1822 case Intrinsic::x86_sse_comigt_ss:
1823 case Intrinsic::x86_sse_comile_ss:
1824 case Intrinsic::x86_sse_comilt_ss:
1825 case Intrinsic::x86_sse_comineq_ss:
1826 case Intrinsic::x86_sse_ucomieq_ss:
1827 case Intrinsic::x86_sse_ucomige_ss:
1828 case Intrinsic::x86_sse_ucomigt_ss:
1829 case Intrinsic::x86_sse_ucomile_ss:
1830 case Intrinsic::x86_sse_ucomilt_ss:
1831 case Intrinsic::x86_sse_ucomineq_ss:
1832 case Intrinsic::x86_sse2_comieq_sd:
1833 case Intrinsic::x86_sse2_comige_sd:
1834 case Intrinsic::x86_sse2_comigt_sd:
1835 case Intrinsic::x86_sse2_comile_sd:
1836 case Intrinsic::x86_sse2_comilt_sd:
1837 case Intrinsic::x86_sse2_comineq_sd:
1838 case Intrinsic::x86_sse2_ucomieq_sd:
1839 case Intrinsic::x86_sse2_ucomige_sd:
1840 case Intrinsic::x86_sse2_ucomigt_sd:
1841 case Intrinsic::x86_sse2_ucomile_sd:
1842 case Intrinsic::x86_sse2_ucomilt_sd:
1843 case Intrinsic::x86_sse2_ucomineq_sd:
1844 case Intrinsic::x86_avx512_vcomi_ss:
1845 case Intrinsic::x86_avx512_vcomi_sd:
1846 case Intrinsic::x86_avx512_mask_cmp_ss:
1847 case Intrinsic::x86_avx512_mask_cmp_sd: {
1850 bool MadeChange =
false;
1851 Value *Arg0 = II->getArgOperand(0);
1852 Value *Arg1 = II->getArgOperand(1);
1854 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
1855 II->setArgOperand(0, V);
1858 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
1859 II->setArgOperand(1, V);
1867 case Intrinsic::x86_avx512_mask_add_ps_512:
1868 case Intrinsic::x86_avx512_mask_div_ps_512:
1869 case Intrinsic::x86_avx512_mask_mul_ps_512:
1870 case Intrinsic::x86_avx512_mask_sub_ps_512:
1871 case Intrinsic::x86_avx512_mask_add_pd_512:
1872 case Intrinsic::x86_avx512_mask_div_pd_512:
1873 case Intrinsic::x86_avx512_mask_mul_pd_512:
1874 case Intrinsic::x86_avx512_mask_sub_pd_512:
1877 if (
auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
1878 if (R->getValue() == 4) {
1879 Value *Arg0 = II->getArgOperand(0);
1880 Value *Arg1 = II->getArgOperand(1);
1883 switch (II->getIntrinsicID()) {
1885 case Intrinsic::x86_avx512_mask_add_ps_512:
1886 case Intrinsic::x86_avx512_mask_add_pd_512:
1887 V = Builder->CreateFAdd(Arg0, Arg1);
1889 case Intrinsic::x86_avx512_mask_sub_ps_512:
1890 case Intrinsic::x86_avx512_mask_sub_pd_512:
1891 V = Builder->CreateFSub(Arg0, Arg1);
1893 case Intrinsic::x86_avx512_mask_mul_ps_512:
1894 case Intrinsic::x86_avx512_mask_mul_pd_512:
1895 V = Builder->CreateFMul(Arg0, Arg1);
1897 case Intrinsic::x86_avx512_mask_div_ps_512:
1898 case Intrinsic::x86_avx512_mask_div_pd_512:
1899 V = Builder->CreateFDiv(Arg0, Arg1);
1906 return replaceInstUsesWith(*II, V);
1911 case Intrinsic::x86_avx512_mask_add_ss_round:
1912 case Intrinsic::x86_avx512_mask_div_ss_round:
1913 case Intrinsic::x86_avx512_mask_mul_ss_round:
1914 case Intrinsic::x86_avx512_mask_sub_ss_round:
1915 case Intrinsic::x86_avx512_mask_add_sd_round:
1916 case Intrinsic::x86_avx512_mask_div_sd_round:
1917 case Intrinsic::x86_avx512_mask_mul_sd_round:
1918 case Intrinsic::x86_avx512_mask_sub_sd_round:
1921 if (
auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
1922 if (R->getValue() == 4) {
1924 Value *Arg0 = II->getArgOperand(0);
1925 Value *Arg1 = II->getArgOperand(1);
1926 Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
1927 Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
1930 switch (II->getIntrinsicID()) {
1932 case Intrinsic::x86_avx512_mask_add_ss_round:
1933 case Intrinsic::x86_avx512_mask_add_sd_round:
1934 V = Builder->CreateFAdd(LHS, RHS);
1936 case Intrinsic::x86_avx512_mask_sub_ss_round:
1937 case Intrinsic::x86_avx512_mask_sub_sd_round:
1938 V = Builder->CreateFSub(LHS, RHS);
1940 case Intrinsic::x86_avx512_mask_mul_ss_round:
1941 case Intrinsic::x86_avx512_mask_mul_sd_round:
1942 V = Builder->CreateFMul(LHS, RHS);
1944 case Intrinsic::x86_avx512_mask_div_ss_round:
1945 case Intrinsic::x86_avx512_mask_div_sd_round:
1946 V = Builder->CreateFDiv(LHS, RHS);
1954 if (!
C || !
C->getValue()[0]) {
1958 Mask = Builder->CreateBitCast(Mask, MaskTy);
1959 Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
1961 Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
1963 V = Builder->CreateSelect(Mask, V, Passthru);
1967 V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
1969 return replaceInstUsesWith(*II, V);
1975 case Intrinsic::x86_avx512_mask_max_ss_round:
1976 case Intrinsic::x86_avx512_mask_min_ss_round:
1977 case Intrinsic::x86_avx512_mask_max_sd_round:
1978 case Intrinsic::x86_avx512_mask_min_sd_round:
1979 case Intrinsic::x86_avx512_mask_vfmadd_ss:
1980 case Intrinsic::x86_avx512_mask_vfmadd_sd:
1981 case Intrinsic::x86_avx512_maskz_vfmadd_ss:
1982 case Intrinsic::x86_avx512_maskz_vfmadd_sd:
1983 case Intrinsic::x86_avx512_mask3_vfmadd_ss:
1984 case Intrinsic::x86_avx512_mask3_vfmadd_sd:
1985 case Intrinsic::x86_avx512_mask3_vfmsub_ss:
1986 case Intrinsic::x86_avx512_mask3_vfmsub_sd:
1987 case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
1988 case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
1989 case Intrinsic::x86_fma_vfmadd_ss:
1990 case Intrinsic::x86_fma_vfmsub_ss:
1991 case Intrinsic::x86_fma_vfnmadd_ss:
1992 case Intrinsic::x86_fma_vfnmsub_ss:
1993 case Intrinsic::x86_fma_vfmadd_sd:
1994 case Intrinsic::x86_fma_vfmsub_sd:
1995 case Intrinsic::x86_fma_vfnmadd_sd:
1996 case Intrinsic::x86_fma_vfnmsub_sd:
1997 case Intrinsic::x86_sse_cmp_ss:
1998 case Intrinsic::x86_sse_min_ss:
1999 case Intrinsic::x86_sse_max_ss:
2000 case Intrinsic::x86_sse2_cmp_sd:
2001 case Intrinsic::x86_sse2_min_sd:
2002 case Intrinsic::x86_sse2_max_sd:
2003 case Intrinsic::x86_sse41_round_ss:
2004 case Intrinsic::x86_sse41_round_sd:
2005 case Intrinsic::x86_xop_vfrcz_ss:
2006 case Intrinsic::x86_xop_vfrcz_sd: {
2007 unsigned VWidth = II->getType()->getVectorNumElements();
2008 APInt UndefElts(VWidth, 0);
2010 if (
Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2012 return replaceInstUsesWith(*II, V);
2021 case Intrinsic::x86_sse2_psrai_d:
2022 case Intrinsic::x86_sse2_psrai_w:
2023 case Intrinsic::x86_avx2_psrai_d:
2024 case Intrinsic::x86_avx2_psrai_w:
2025 case Intrinsic::x86_avx512_psrai_q_128:
2026 case Intrinsic::x86_avx512_psrai_q_256:
2027 case Intrinsic::x86_avx512_psrai_d_512:
2028 case Intrinsic::x86_avx512_psrai_q_512:
2029 case Intrinsic::x86_avx512_psrai_w_512:
2030 case Intrinsic::x86_sse2_psrli_d:
2031 case Intrinsic::x86_sse2_psrli_q:
2032 case Intrinsic::x86_sse2_psrli_w:
2033 case Intrinsic::x86_avx2_psrli_d:
2034 case Intrinsic::x86_avx2_psrli_q:
2035 case Intrinsic::x86_avx2_psrli_w:
2036 case Intrinsic::x86_avx512_psrli_d_512:
2037 case Intrinsic::x86_avx512_psrli_q_512:
2038 case Intrinsic::x86_avx512_psrli_w_512:
2039 case Intrinsic::x86_sse2_pslli_d:
2040 case Intrinsic::x86_sse2_pslli_q:
2041 case Intrinsic::x86_sse2_pslli_w:
2042 case Intrinsic::x86_avx2_pslli_d:
2043 case Intrinsic::x86_avx2_pslli_q:
2044 case Intrinsic::x86_avx2_pslli_w:
2045 case Intrinsic::x86_avx512_pslli_d_512:
2046 case Intrinsic::x86_avx512_pslli_q_512:
2047 case Intrinsic::x86_avx512_pslli_w_512:
2049 return replaceInstUsesWith(*II, V);
2052 case Intrinsic::x86_sse2_psra_d:
2053 case Intrinsic::x86_sse2_psra_w:
2054 case Intrinsic::x86_avx2_psra_d:
2055 case Intrinsic::x86_avx2_psra_w:
2056 case Intrinsic::x86_avx512_psra_q_128:
2057 case Intrinsic::x86_avx512_psra_q_256:
2058 case Intrinsic::x86_avx512_psra_d_512:
2059 case Intrinsic::x86_avx512_psra_q_512:
2060 case Intrinsic::x86_avx512_psra_w_512:
2061 case Intrinsic::x86_sse2_psrl_d:
2062 case Intrinsic::x86_sse2_psrl_q:
2063 case Intrinsic::x86_sse2_psrl_w:
2064 case Intrinsic::x86_avx2_psrl_d:
2065 case Intrinsic::x86_avx2_psrl_q:
2066 case Intrinsic::x86_avx2_psrl_w:
2067 case Intrinsic::x86_avx512_psrl_d_512:
2068 case Intrinsic::x86_avx512_psrl_q_512:
2069 case Intrinsic::x86_avx512_psrl_w_512:
2070 case Intrinsic::x86_sse2_psll_d:
2071 case Intrinsic::x86_sse2_psll_q:
2072 case Intrinsic::x86_sse2_psll_w:
2073 case Intrinsic::x86_avx2_psll_d:
2074 case Intrinsic::x86_avx2_psll_q:
2075 case Intrinsic::x86_avx2_psll_w:
2076 case Intrinsic::x86_avx512_psll_d_512:
2077 case Intrinsic::x86_avx512_psll_q_512:
2078 case Intrinsic::x86_avx512_psll_w_512: {
2080 return replaceInstUsesWith(*II, V);
2084 Value *Arg1 = II->getArgOperand(1);
2086 "Unexpected packed shift size");
2089 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2090 II->setArgOperand(1, V);
2096 case Intrinsic::x86_avx2_psllv_d:
2097 case Intrinsic::x86_avx2_psllv_d_256:
2098 case Intrinsic::x86_avx2_psllv_q:
2099 case Intrinsic::x86_avx2_psllv_q_256:
2100 case Intrinsic::x86_avx512_psllv_d_512:
2101 case Intrinsic::x86_avx512_psllv_q_512:
2102 case Intrinsic::x86_avx512_psllv_w_128:
2103 case Intrinsic::x86_avx512_psllv_w_256:
2104 case Intrinsic::x86_avx512_psllv_w_512:
2105 case Intrinsic::x86_avx2_psrav_d:
2106 case Intrinsic::x86_avx2_psrav_d_256:
2107 case Intrinsic::x86_avx512_psrav_q_128:
2108 case Intrinsic::x86_avx512_psrav_q_256:
2109 case Intrinsic::x86_avx512_psrav_d_512:
2110 case Intrinsic::x86_avx512_psrav_q_512:
2111 case Intrinsic::x86_avx512_psrav_w_128:
2112 case Intrinsic::x86_avx512_psrav_w_256:
2113 case Intrinsic::x86_avx512_psrav_w_512:
2114 case Intrinsic::x86_avx2_psrlv_d:
2115 case Intrinsic::x86_avx2_psrlv_d_256:
2116 case Intrinsic::x86_avx2_psrlv_q:
2117 case Intrinsic::x86_avx2_psrlv_q_256:
2118 case Intrinsic::x86_avx512_psrlv_d_512:
2119 case Intrinsic::x86_avx512_psrlv_q_512:
2120 case Intrinsic::x86_avx512_psrlv_w_128:
2121 case Intrinsic::x86_avx512_psrlv_w_256:
2122 case Intrinsic::x86_avx512_psrlv_w_512:
2124 return replaceInstUsesWith(*II, V);
2127 case Intrinsic::x86_sse2_pmulu_dq:
2128 case Intrinsic::x86_sse41_pmuldq:
2129 case Intrinsic::x86_avx2_pmul_dq:
2130 case Intrinsic::x86_avx2_pmulu_dq:
2131 case Intrinsic::x86_avx512_pmul_dq_512:
2132 case Intrinsic::x86_avx512_pmulu_dq_512: {
2133 unsigned VWidth = II->getType()->getVectorNumElements();
2134 APInt UndefElts(VWidth, 0);
2136 if (
Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
2138 return replaceInstUsesWith(*II, V);
2144 case Intrinsic::x86_sse41_insertps:
2146 return replaceInstUsesWith(*II, V);
2149 case Intrinsic::x86_sse4a_extrq: {
2150 Value *Op0 = II->getArgOperand(0);
2151 Value *Op1 = II->getArgOperand(1);
2156 VWidth1 == 16 &&
"Unexpected operand sizes");
2161 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2164 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2169 return replaceInstUsesWith(*II, V);
2173 bool MadeChange =
false;
2174 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2175 II->setArgOperand(0, V);
2178 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2179 II->setArgOperand(1, V);
2187 case Intrinsic::x86_sse4a_extrqi: {
2190 Value *Op0 = II->getArgOperand(0);
2193 "Unexpected operand size");
2201 return replaceInstUsesWith(*II, V);
2205 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2206 II->setArgOperand(0, V);
2212 case Intrinsic::x86_sse4a_insertq: {
2213 Value *Op0 = II->getArgOperand(0);
2214 Value *Op1 = II->getArgOperand(1);
2219 "Unexpected operand size");
2224 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2229 const APInt &V11 = CI11->getValue();
2233 return replaceInstUsesWith(*II, V);
2238 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2239 II->setArgOperand(0, V);
2245 case Intrinsic::x86_sse4a_insertqi: {
2249 Value *Op0 = II->getArgOperand(0);
2250 Value *Op1 = II->getArgOperand(1);
2255 VWidth1 == 2 &&
"Unexpected operand sizes");
2262 if (CILength && CIIndex) {
2263 APInt Len = CILength->getValue().zextOrTrunc(6);
2266 return replaceInstUsesWith(*II, V);
2271 bool MadeChange =
false;
2272 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2273 II->setArgOperand(0, V);
2276 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2277 II->setArgOperand(1, V);
2285 case Intrinsic::x86_sse41_pblendvb:
2286 case Intrinsic::x86_sse41_blendvps:
2287 case Intrinsic::x86_sse41_blendvpd:
2288 case Intrinsic::x86_avx_blendv_ps_256:
2289 case Intrinsic::x86_avx_blendv_pd_256:
2290 case Intrinsic::x86_avx2_pblendvb: {
2296 Value *Op0 = II->getArgOperand(0);
2297 Value *Op1 = II->getArgOperand(1);
2302 return replaceInstUsesWith(CI, Op0);
2305 if (isa<ConstantAggregateZero>(Mask))
2306 return replaceInstUsesWith(CI, Op0);
2309 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2316 case Intrinsic::x86_ssse3_pshuf_b_128:
2317 case Intrinsic::x86_avx2_pshuf_b:
2318 case Intrinsic::x86_avx512_pshuf_b_512:
2320 return replaceInstUsesWith(*II, V);
2323 case Intrinsic::x86_avx_vpermilvar_ps:
2324 case Intrinsic::x86_avx_vpermilvar_ps_256:
2325 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2326 case Intrinsic::x86_avx_vpermilvar_pd:
2327 case Intrinsic::x86_avx_vpermilvar_pd_256:
2328 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2330 return replaceInstUsesWith(*II, V);
2333 case Intrinsic::x86_avx2_permd:
2334 case Intrinsic::x86_avx2_permps:
2336 return replaceInstUsesWith(*II, V);
2339 case Intrinsic::x86_avx512_mask_permvar_df_256:
2340 case Intrinsic::x86_avx512_mask_permvar_df_512:
2341 case Intrinsic::x86_avx512_mask_permvar_di_256:
2342 case Intrinsic::x86_avx512_mask_permvar_di_512:
2343 case Intrinsic::x86_avx512_mask_permvar_hi_128:
2344 case Intrinsic::x86_avx512_mask_permvar_hi_256:
2345 case Intrinsic::x86_avx512_mask_permvar_hi_512:
2346 case Intrinsic::x86_avx512_mask_permvar_qi_128:
2347 case Intrinsic::x86_avx512_mask_permvar_qi_256:
2348 case Intrinsic::x86_avx512_mask_permvar_qi_512:
2349 case Intrinsic::x86_avx512_mask_permvar_sf_256:
2350 case Intrinsic::x86_avx512_mask_permvar_sf_512:
2351 case Intrinsic::x86_avx512_mask_permvar_si_256:
2352 case Intrinsic::x86_avx512_mask_permvar_si_512:
2357 return replaceInstUsesWith(*II, V);
2361 case Intrinsic::x86_avx_vperm2f128_pd_256:
2362 case Intrinsic::x86_avx_vperm2f128_ps_256:
2363 case Intrinsic::x86_avx_vperm2f128_si_256:
2364 case Intrinsic::x86_avx2_vperm2i128:
2366 return replaceInstUsesWith(*II, V);
2369 case Intrinsic::x86_avx_maskload_ps:
2370 case Intrinsic::x86_avx_maskload_pd:
2371 case Intrinsic::x86_avx_maskload_ps_256:
2372 case Intrinsic::x86_avx_maskload_pd_256:
2373 case Intrinsic::x86_avx2_maskload_d:
2374 case Intrinsic::x86_avx2_maskload_q:
2375 case Intrinsic::x86_avx2_maskload_d_256:
2376 case Intrinsic::x86_avx2_maskload_q_256:
2381 case Intrinsic::x86_sse2_maskmov_dqu:
2382 case Intrinsic::x86_avx_maskstore_ps:
2383 case Intrinsic::x86_avx_maskstore_pd:
2384 case Intrinsic::x86_avx_maskstore_ps_256:
2385 case Intrinsic::x86_avx_maskstore_pd_256:
2386 case Intrinsic::x86_avx2_maskstore_d:
2387 case Intrinsic::x86_avx2_maskstore_q:
2388 case Intrinsic::x86_avx2_maskstore_d_256:
2389 case Intrinsic::x86_avx2_maskstore_q_256:
2394 case Intrinsic::x86_xop_vpcomb:
2395 case Intrinsic::x86_xop_vpcomd:
2396 case Intrinsic::x86_xop_vpcomq:
2397 case Intrinsic::x86_xop_vpcomw:
2399 return replaceInstUsesWith(*II, V);
2402 case Intrinsic::x86_xop_vpcomub:
2403 case Intrinsic::x86_xop_vpcomud:
2404 case Intrinsic::x86_xop_vpcomuq:
2405 case Intrinsic::x86_xop_vpcomuw:
2407 return replaceInstUsesWith(*II, V);
2410 case Intrinsic::ppc_altivec_vperm:
2417 if (
Constant *
Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
2418 assert(
Mask->getType()->getVectorNumElements() == 16 &&
2419 "Bad type for intrinsic!");
2422 bool AllEltsOk =
true;
2423 for (
unsigned i = 0;
i != 16; ++
i) {
2425 if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
2433 Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
2435 Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
2440 Value *ExtractedElts[32];
2441 memset(ExtractedElts, 0,
sizeof(ExtractedElts));
2443 for (
unsigned i = 0;
i != 16; ++
i) {
2444 if (isa<UndefValue>(
Mask->getAggregateElement(
i)))
2447 cast<ConstantInt>(
Mask->getAggregateElement(
i))->getZExtValue();
2449 if (DL.isLittleEndian())
2452 if (!ExtractedElts[Idx]) {
2453 Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
2454 Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
2455 ExtractedElts[Idx] =
2456 Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
2457 Builder->getInt32(Idx&15));
2461 Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
2462 Builder->getInt32(
i));
2469 case Intrinsic::arm_neon_vld1:
2470 case Intrinsic::arm_neon_vld2:
2471 case Intrinsic::arm_neon_vld3:
2472 case Intrinsic::arm_neon_vld4:
2473 case Intrinsic::arm_neon_vld2lane:
2474 case Intrinsic::arm_neon_vld3lane:
2475 case Intrinsic::arm_neon_vld4lane:
2476 case Intrinsic::arm_neon_vst1:
2477 case Intrinsic::arm_neon_vst2:
2478 case Intrinsic::arm_neon_vst3:
2479 case Intrinsic::arm_neon_vst4:
2480 case Intrinsic::arm_neon_vst2lane:
2481 case Intrinsic::arm_neon_vst3lane:
2482 case Intrinsic::arm_neon_vst4lane: {
2485 unsigned AlignArg = II->getNumArgOperands() - 1;
2487 if (IntrAlign && IntrAlign->
getZExtValue() < MemAlign) {
2488 II->setArgOperand(AlignArg,
2496 case Intrinsic::arm_neon_vmulls:
2497 case Intrinsic::arm_neon_vmullu:
2498 case Intrinsic::aarch64_neon_smull:
2499 case Intrinsic::aarch64_neon_umull: {
2500 Value *Arg0 = II->getArgOperand(0);
2501 Value *Arg1 = II->getArgOperand(1);
2504 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
2509 bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
2510 II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
2511 VectorType *NewVT = cast<VectorType>(II->getType());
2512 if (
Constant *CV0 = dyn_cast<Constant>(Arg0)) {
2513 if (
Constant *CV1 = dyn_cast<Constant>(Arg1)) {
2525 if (
Constant *CV1 = dyn_cast<Constant>(Arg1))
2527 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
2535 case Intrinsic::amdgcn_rcp: {
2536 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
2537 const APFloat &ArgVal =
C->getValueAPF();
2544 return replaceInstUsesWith(CI,
ConstantFP::get(II->getContext(), Val));
2549 case Intrinsic::amdgcn_frexp_mant:
2550 case Intrinsic::amdgcn_frexp_exp: {
2551 Value *Src = II->getArgOperand(0);
2552 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(Src)) {
2557 if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
2569 if (isa<UndefValue>(Src))
2574 case Intrinsic::amdgcn_class: {
2591 Value *Src0 = II->getArgOperand(0);
2592 Value *Src1 = II->getArgOperand(1);
2595 if (isa<UndefValue>(Src0))
2598 if (isa<UndefValue>(Src1))
2606 if ((Mask & FullMask) == FullMask)
2609 if ((Mask & FullMask) == 0)
2614 Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0);
2616 return replaceInstUsesWith(*II, FCmp);
2621 if (isa<UndefValue>(Src0))
2625 if ((Mask & FullMask) !=
Mask) {
2626 CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
2631 return replaceInstUsesWith(*II, NewCall);
2653 case Intrinsic::stackrestore: {
2656 if (
IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
2657 if (SS->getIntrinsicID() == Intrinsic::stacksave) {
2658 if (&*++SS->getIterator() == II)
2659 return eraseInstFromFunction(CI);
2667 bool CannotRemove =
false;
2668 for (++BI; &*BI != TI; ++BI) {
2669 if (isa<AllocaInst>(BI)) {
2670 CannotRemove =
true;
2673 if (
CallInst *BCI = dyn_cast<CallInst>(BI)) {
2676 if (II->getIntrinsicID() == Intrinsic::stackrestore)
2677 return eraseInstFromFunction(CI);
2681 if (II->mayHaveSideEffects()) {
2682 CannotRemove =
true;
2688 CannotRemove =
true;
2697 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
2698 return eraseInstFromFunction(CI);
2701 case Intrinsic::lifetime_start:
2704 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
2708 Intrinsic::lifetime_end, *
this))
2711 case Intrinsic::assume: {
2712 Value *IIOperand = II->getArgOperand(0);
2714 if (
match(II->getNextNode(),
2715 m_Intrinsic<Intrinsic::assume>(
m_Specific(IIOperand))))
2716 return eraseInstFromFunction(CI);
2721 Value *AssumeIntrinsic = II->getCalledValue(), *
A, *
B;
2723 Builder->CreateCall(AssumeIntrinsic,
A, II->getName());
2724 Builder->CreateCall(AssumeIntrinsic, B, II->
getName());
2725 return eraseInstFromFunction(*II);
2729 Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(
A),
2731 Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
2733 return eraseInstFromFunction(*II);
2746 return eraseInstFromFunction(*II);
2754 APInt KnownZero(1, 0), KnownOne(1, 0);
2757 return eraseInstFromFunction(*II);
2761 AC.updateAffectedValues(II);
2764 case Intrinsic::experimental_gc_relocate: {
2768 Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
2772 if (II->use_empty())
2773 return eraseInstFromFunction(*II);
2779 if (isa<UndefValue>(DerivedPtr))
2783 if (
auto *PT = dyn_cast<PointerType>(II->getType())) {
2787 if (isa<ConstantPointerNull>(DerivedPtr))
2804 return visitCallSite(II);
2810 return visitCallSite(&II);
2838 Type* DstTy = cast<PointerType>(CI->
getType())->getElementType();
2850 replaceInstUsesWith(*From, With);
2853 if (
Value *With = Simplifier.optimizeCall(CI)) {
2855 return CI->
use_empty() ? CI : replaceInstUsesWith(*CI, With);
2865 if (Underlying != TrampMem &&
2868 if (!isa<AllocaInst>(Underlying))
2880 InitTrampoline = II;
2890 if (!InitTrampoline)
2894 if (InitTrampoline->
getOperand(0) != TrampMem)
2897 return InitTrampoline;
2943 bool Changed =
false;
2952 if (V->getType()->isPointerTy() &&
2961 if (!Indices.
empty()) {
2973 if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
2976 if (
Function *CalleeF = dyn_cast<Function>(Callee)) {
2979 !CalleeF->isIntrinsic()) {
2980 DEBUG(
dbgs() <<
"Removing convergent attr from instr "
2992 !CalleeF->isDeclaration()) {
3001 if (isa<CallInst>(OldCall))
3002 return eraseInstFromFunction(*OldCall);
3006 cast<InvokeInst>(OldCall)->setCalledFunction(
3012 if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
3035 return transformCallThroughTrampoline(CS, II);
3039 if (FTy->isVarArg()) {
3066 if (I)
return eraseInstFromFunction(*I);
3074 bool InstCombiner::transformConstExprCastCall(
CallSite CS) {
3080 if (Callee->hasFnAttribute(
"thunk"))
3095 if (OldRetTy != NewRetTy) {
3101 if (Callee->isDeclaration())
3121 if (
InvokeInst *II = dyn_cast<InvokeInst>(Caller))
3123 if (
PHINode *PN = dyn_cast<PHINode>(U))
3124 if (PN->getParent() == II->getNormalDest() ||
3129 unsigned NumActualArgs = CS.
arg_size();
3140 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
3141 Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
3145 for (
unsigned i = 0, e = NumCommonArgs;
i != e; ++
i, ++AI) {
3147 Type *ActTy = (*AI)->getType();
3161 if (ParamTy != ActTy &&
3163 Attribute::ByVal)) {
3169 if (
DL.getTypeAllocSize(CurElTy) !=
3175 if (Callee->isDeclaration()) {
3204 if (Index <= FT->getNumParams())
3216 std::vector<Value*>
Args;
3217 Args.reserve(NumActualArgs);
3219 attrVec.
reserve(NumCommonArgs);
3229 if (RAttrs.hasAttributes())
3234 for (
unsigned i = 0;
i != NumCommonArgs; ++
i, ++AI) {
3237 if ((*AI)->getType() == ParamTy) {
3238 Args.push_back(*AI);
3240 Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
3260 for (
unsigned i = FT->
getNumParams();
i != NumActualArgs; ++
i, ++AI) {
3262 if (PTy != (*AI)->getType()) {
3266 Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
3268 Args.push_back(*AI);
3294 if (
InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3295 NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
3299 cast<InvokeInst>(
NC)->setAttributes(NewCallerPAL);
3301 CallInst *CI = cast<CallInst>(Caller);
3302 NC = Builder->CreateCall(Callee, Args, OpBundles);
3306 cast<CallInst>(
NC)->setAttributes(NewCallerPAL);
3318 if (
InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3320 InsertNewInstBefore(NC, *I);
3323 InsertNewInstBefore(NC, *Caller);
3325 Worklist.AddUsersToWorkList(*Caller);
3332 replaceInstUsesWith(*Caller, NV);
3334 if (OldRetTy == NV->
getType())
3342 eraseInstFromFunction(*Caller);
3349 InstCombiner::transformCallThroughTrampoline(
CallSite CS,
3358 if (Attrs.hasAttrSomewhere(Attribute::Nest))
3362 "transformCallThroughTrampoline called with incorrect CallSite.");
3368 if (!NestAttrs.isEmpty()) {
3369 unsigned NestIdx = 1;
3370 Type *NestTy =
nullptr;
3375 E = NestFTy->param_end(); I !=
E; ++NestIdx, ++
I)
3376 if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
3385 std::vector<Value*> NewArgs;
3386 NewArgs.reserve(CS.
arg_size() + 1);
3389 NewAttrs.
reserve(Attrs.getNumSlots() + 1);
3397 Attrs.getRetAttributes()));
3403 if (Idx == NestIdx) {
3406 if (NestVal->
getType() != NestTy)
3407 NestVal = Builder->CreateBitCast(NestVal, NestTy,
"nest");
3408 NewArgs.push_back(NestVal);
3417 NewArgs.push_back(*I);
3422 Idx + (Idx >= NestIdx),
B));
3432 NewAttrs.
push_back(AttributeSet::get(FTy->getContext(),
3433 Attrs.getFnAttributes()));
3439 std::vector<Type*> NewTypes;
3440 NewTypes.reserve(FTy->getNumParams()+1);
3447 E = FTy->param_end();
3452 NewTypes.push_back(NestTy);
3458 NewTypes.push_back(*I);
3474 AttributeSet::get(FTy->getContext(), NewAttrs);
3480 if (
InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3482 II->getNormalDest(), II->getUnwindDest(),
3483 NewArgs, OpBundles);
3484 cast<InvokeInst>(NewCaller)->setCallingConv(II->
getCallingConv());
3485 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
3488 cast<CallInst>(NewCaller)->setTailCallKind(
3489 cast<CallInst>(Caller)->getTailCallKind());
3490 cast<CallInst>(NewCaller)->setCallingConv(
3491 cast<CallInst>(Caller)->getCallingConv());
3492 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
3503 NestF->
getType() == PTy ? NestF :
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT,"arm-default-it","Generate IT block based on arch"), clEnumValN(RestrictedIT,"arm-restrict-it","Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT,"arm-no-restrict-it","Allow IT blocks based on ARMv7")))
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
const Value * getCalledValue() const
Get a pointer to the function that is invoked by this instruction.
void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
void push_back(const T &Elt)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
static void ValueIsDeleted(Value *V)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
static IntegerType * getInt1Ty(LLVMContext &C)
Value * SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr)
Given a function and iterators over arguments, fold the result or return null.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
uint64_t getZExtValue() const
Get zero extended value.
DiagnosticInfoOptimizationBase::Argument NV
STATISTIC(NumFunctions,"Total number of functions")
AttributeSet getParamAttributes(unsigned Index) const
The attributes for the specified index are returned.
A Module instance is used to store all the information related to an LLVM module. ...
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
match_zero m_Zero()
Match an arbitrary zero/null constant.
Instruction * visitVACopyInst(VACopyInst &I)
static ConstantAggregateZero * get(Type *Ty)
Type::subtype_iterator param_iterator
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
Type * getValueType() const
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine's calling convention.
bool isConvergent() const
Determine if the call is convergent.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
void setAttributes(AttributeSet PAL)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
const DataLayout & getDataLayout() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
This class wraps the llvm.memset intrinsic.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
An instruction for reading from memory.
static IntegerType * getInt64Ty(LLVMContext &C)
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
fneg_match< LHS > m_FNeg(const LHS &L)
Match a floating point negate.
Type * getElementType() const
void reserve(size_type N)
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
Instruction * visitVAStartInst(VAStartInst &I)
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth, Instruction *CxtI) const
bool isGCRelocate(ImmutableCallSite CS)
Type * getPointerElementType() const
unsigned arg_size() const
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
StringRef getName() const
Return a constant reference to the value's name.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
iterator begin()
Instruction iterator methods.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
opStatus divide(const APFloat &RHS, roundingMode RM)
bool match(Val *V, const Pattern &P)
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
bool doesNotThrow() const
Determine if the function cannot unwind.
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This is the base class for all instructions that perform data casts.
const APInt & getValue() const
Return the constant as an APInt value reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
This class wraps the llvm.memmove intrinsic.
Class to represent struct types.
ValTy * getCalledValue() const
getCalledValue - Return the pointer to function that is being called.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
unsigned getNumArgOperands() const
Return the number of call arguments.
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
static Constant * get(ArrayRef< Constant * > V)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
void setName(const Twine &Name)
Change the name of the value.
This file implements a class to represent arbitrary precision integral constant values and operations...
not_match< LHS > m_Not(const LHS &L)
LLVM_NODISCARD bool empty() const
bool doesNotThrow() const
Determine if the call cannot unwind.
All zero aggregate value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
APInt shl(unsigned shiftAmt) const
Left-shift function.
Class to represent function types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
CallingConv::ID getCallingConv() const
getCallingConv/setCallingConv - get or set the calling convention of the call.
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
This represents the llvm.va_start intrinsic.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Class to represent array types.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
const Function * getFunction() const
Return the function this instruction belongs to.
An instruction for storing to memory.
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
static void ValueIsRAUWd(Value *Old, Value *New)
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
void takeName(Value *V)
Transfer the name from V to this value.
This class represents a truncation of integer types.
static unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
Class to represent pointers.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static Value * simplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
The shuffle mask for a perm2*128 selects any two halves of two 256-bit source vectors, unless a zero bit is set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
unsigned getNumSlots() const
Return the number of slots used in this attribute list.
Type * getParamType(unsigned i) const
Parameter type accessors.
bool isLosslessCast() const
A lossless cast is one that does not alter the basic value.
iterator_range< IterTy > args() const
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
Subclasses of this class are all able to terminate a basic block.
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to .objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
This is an important class for using LLVM in a threaded context.
ConstantInt * getTrue()
Get the constant value for i1 true.
AttributeSet getSlotAttributes(unsigned Slot) const
Return the attributes at the given slot.
bool isVectorTy() const
True if this is an instance of VectorType.
AttributeSet addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
static Type * reduceToSingleValueType(Type *T)
Given an aggregate type which ultimately holds a single scalar element, like {{{type}}} or [1 x type]...
This is an important base class in LLVM.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
ConstantFP - Floating Point Values [float, double].
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E's underlying type.
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
static const unsigned End
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
bool isGCResult(ImmutableCallSite CS)
uint64_t getNumElements() const
Value * getOperand(unsigned i) const
self_iterator getIterator()
Class to represent integer types.
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
void setAlignment(unsigned Align)
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
bool isPointerTy() const
True if this is an instance of PointerType.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVMContext & getContext() const
All values hold a context through their type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
User::op_iterator arg_iterator
arg_iterator - The type of iterator to use when looping over actual arguments at this call site...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
AttributeSet getAttributes() const
getAttributes/setAttributes - get or set the parameter attributes of the call.
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
static Type * getHalfTy(LLVMContext &C)
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
This is the shared class of boolean and integer constants.
InstrTy * getInstruction() const
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions that feed it, giving the original input.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static CallInst * Create(Value *Func, ArrayRef< Value * > Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Type * getType() const
All values are typed, get the type of this value.
TailCallKind getTailCallKind() const
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * stripPointerCasts()
Strip off pointer casts, all-zero GEPs, and aliases.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
static ConstantInt * getTrue(LLVMContext &Context)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
AttributeSet getAttributes() const
Return the attribute list for this Function.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
Class to represent vector types.
Class for arbitrary precision integers.
bool isIntegerTy() const
True if this is an instance of IntegerType.
iterator_range< user_iterator > users()
unsigned getVectorNumElements() const
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
bool isStructTy() const
True if this is an instance of StructType.
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
bool isAllOnesValue() const
Determine if all bits are set.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
opStatus
IEEE-754R 7: Default exception handling.
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
PointerType * getType() const
Global values are always pointers.
void setCalledFunction(Value *Fn)
Set the function called.
This class wraps the llvm.memcpy/memmove intrinsics.
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
unsigned getSlotIndex(unsigned Slot) const
Return the index for the given slot.
unsigned greater or equal
bool hasAttributes(unsigned Index) const
Return true if attribute exists at the given index.
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
bool hasOneUse() const
Return true if there is exactly one user of this value.
void setArgOperand(unsigned i, Value *v)
bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
unsigned getNumElements() const
Return the number of elements in the array or vector.
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
bool isKnownNonNullAt(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if this pointer couldn't possibly be null.
static IntrinsicInst * findInitTrampoline(Value *Callee)
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
const APFloat & getValueAPF() const
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
Type * getElementType() const
Return the element type of the array/vector.
iterator_range< op_iterator > arg_operands()
Iteration adapter for range-for loops.
static Value * emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1, InstCombiner::BuilderTy &Builder)
This represents the llvm.va_copy intrinsic.
Type * getReturnType() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Return true if the given value is known to be non-zero when defined.
CallingConv::ID getCallingConv() const
getCallingConv/setCallingConv - Get or set the calling convention of this function call...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
static Constant * getMul(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
This class represents an extension of floating point types.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
static IntegerType * getInt8Ty(LLVMContext &C)
void setCalledFunction(Value *V)
setCalledFunction - Set the callee to the specified value.
bool isEmpty() const
Return true if there are no attributes.
const fltSemantics & getSemantics() const
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
const BasicBlock * getParent() const
bool doesNotThrow() const
Determine if the call cannot unwind.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
A wrapper class for inspecting calls to intrinsic functions.
bool isVoidTy() const
Return true if this is 'void'.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
AttributeSet getFnAttributes() const
The function attributes are returned.