21 using namespace llvm::PatternMatch;
23 #define DEBUG_TYPE "instcombine"
30 assert(I &&
"No instruction?");
31 assert(OpNo < I->getNumOperands() &&
"Operand index too large");
35 if (!OpC)
return false;
39 if ((~Demanded & OpC->
getValue()) == 0)
53 bool InstCombiner::SimplifyDemandedInstructionBits(
Instruction &Inst) {
55 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
58 Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
61 if (V == &Inst)
return true;
62 replaceInstUsesWith(Inst, V);
69 bool InstCombiner::SimplifyDemandedBits(
Use &U,
const APInt &DemandedMask,
73 Value *NewVal = SimplifyDemandedUseBits(U.
get(), DemandedMask, KnownZero,
74 KnownOne,
Depth, UserI);
75 if (!NewVal)
return false;
103 Value *InstCombiner::SimplifyDemandedUseBits(
Value *V,
APInt DemandedMask,
107 assert(V !=
nullptr &&
"Null pointer of Value???");
108 assert(Depth <= 6 &&
"Limit Search Depth");
115 "Value *V, DemandedMask, KnownZero and KnownOne "
116 "must have same BitWidth");
119 KnownOne = CI->getValue() & DemandedMask;
120 KnownZero = ~KnownOne & DemandedMask;
123 if (isa<ConstantPointerNull>(V)) {
126 KnownZero = DemandedMask;
132 if (DemandedMask == 0) {
133 if (isa<UndefValue>(V))
141 APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
142 APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
168 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
169 (DemandedMask & ~LHSKnownZero))
171 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
172 (DemandedMask & ~RHSKnownZero))
176 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
192 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
193 (DemandedMask & ~LHSKnownOne))
195 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
196 (DemandedMask & ~RHSKnownOne))
201 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
202 (DemandedMask & (~RHSKnownZero)))
204 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
205 (DemandedMask & (~LHSKnownZero)))
218 if ((DemandedMask & RHSKnownZero) == DemandedMask)
220 if ((DemandedMask & LHSKnownZero) == DemandedMask)
242 if (SimplifyDemandedBits(I->
getOperandUse(1), DemandedMask, RHSKnownZero,
243 RHSKnownOne, Depth + 1) ||
244 SimplifyDemandedBits(I->
getOperandUse(0), DemandedMask & ~RHSKnownZero,
245 LHSKnownZero, LHSKnownOne, Depth + 1))
247 assert(!(RHSKnownZero & RHSKnownOne) &&
"Bits known to be one AND zero?");
248 assert(!(LHSKnownZero & LHSKnownOne) &&
"Bits known to be one AND zero?");
252 if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
253 (RHSKnownOne & LHSKnownOne))) == DemandedMask)
258 if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
259 (DemandedMask & ~LHSKnownZero))
261 if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
262 (DemandedMask & ~RHSKnownZero))
266 if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
274 KnownOne = RHSKnownOne & LHSKnownOne;
276 KnownZero = RHSKnownZero | LHSKnownZero;
280 if (SimplifyDemandedBits(I->
getOperandUse(1), DemandedMask, RHSKnownZero,
281 RHSKnownOne, Depth + 1) ||
282 SimplifyDemandedBits(I->
getOperandUse(0), DemandedMask & ~RHSKnownOne,
283 LHSKnownZero, LHSKnownOne, Depth + 1))
285 assert(!(RHSKnownZero & RHSKnownOne) &&
"Bits known to be one AND zero?");
286 assert(!(LHSKnownZero & LHSKnownOne) &&
"Bits known to be one AND zero?");
290 if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
291 (RHSKnownOne | LHSKnownOne))) == DemandedMask)
296 if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
297 (DemandedMask & ~LHSKnownOne))
299 if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
300 (DemandedMask & ~RHSKnownOne))
305 if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
306 (DemandedMask & (~RHSKnownZero)))
308 if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
309 (DemandedMask & (~LHSKnownZero)))
317 KnownZero = RHSKnownZero & LHSKnownZero;
319 KnownOne = RHSKnownOne | LHSKnownOne;
322 if (SimplifyDemandedBits(I->
getOperandUse(1), DemandedMask, RHSKnownZero,
323 RHSKnownOne, Depth + 1) ||
324 SimplifyDemandedBits(I->
getOperandUse(0), DemandedMask, LHSKnownZero,
325 LHSKnownOne, Depth + 1))
327 assert(!(RHSKnownZero & RHSKnownOne) &&
"Bits known to be one AND zero?");
328 assert(!(LHSKnownZero & LHSKnownOne) &&
"Bits known to be one AND zero?");
331 APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
332 (RHSKnownOne & LHSKnownOne);
334 APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
335 (RHSKnownOne & LHSKnownZero);
339 if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
344 if ((DemandedMask & RHSKnownZero) == DemandedMask)
346 if ((DemandedMask & LHSKnownZero) == DemandedMask)
352 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
356 return InsertNewInstWith(Or, *I);
363 if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
365 if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
367 ~RHSKnownOne & DemandedMask);
369 return InsertNewInstWith(And, *I);
385 isa<ConstantInt>(LHSInst->getOperand(1)) &&
386 (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
387 ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
389 APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
394 InsertNewInstWith(NewAnd, *I);
398 Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
399 return InsertNewInstWith(NewXor, *I);
403 KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
405 KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
415 if (SimplifyDemandedBits(I->
getOperandUse(2), DemandedMask, RHSKnownZero,
416 RHSKnownOne, Depth + 1) ||
417 SimplifyDemandedBits(I->
getOperandUse(1), DemandedMask, LHSKnownZero,
418 LHSKnownOne, Depth + 1))
420 assert(!(RHSKnownZero & RHSKnownOne) &&
"Bits known to be one AND zero?");
421 assert(!(LHSKnownZero & LHSKnownOne) &&
"Bits known to be one AND zero?");
429 KnownOne = RHSKnownOne & LHSKnownOne;
430 KnownZero = RHSKnownZero & LHSKnownZero;
432 case Instruction::Trunc: {
434 DemandedMask = DemandedMask.
zext(truncBf);
435 KnownZero = KnownZero.
zext(truncBf);
436 KnownOne = KnownOne.
zext(truncBf);
437 if (SimplifyDemandedBits(I->
getOperandUse(0), DemandedMask, KnownZero,
438 KnownOne, Depth + 1))
440 DemandedMask = DemandedMask.
trunc(BitWidth);
441 KnownZero = KnownZero.
trunc(BitWidth);
442 KnownOne = KnownOne.
trunc(BitWidth);
443 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
446 case Instruction::BitCast:
453 if (DstVTy->getNumElements() != SrcVTy->getNumElements())
463 if (SimplifyDemandedBits(I->
getOperandUse(0), DemandedMask, KnownZero,
464 KnownOne, Depth + 1))
466 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
468 case Instruction::ZExt: {
472 DemandedMask = DemandedMask.
trunc(SrcBitWidth);
473 KnownZero = KnownZero.
trunc(SrcBitWidth);
474 KnownOne = KnownOne.
trunc(SrcBitWidth);
475 if (SimplifyDemandedBits(I->
getOperandUse(0), DemandedMask, KnownZero,
476 KnownOne, Depth + 1))
478 DemandedMask = DemandedMask.
zext(BitWidth);
479 KnownZero = KnownZero.
zext(BitWidth);
480 KnownOne = KnownOne.
zext(BitWidth);
481 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
486 case Instruction::SExt: {
490 APInt InputDemandedBits = DemandedMask &
496 if ((NewBits & DemandedMask) != 0)
497 InputDemandedBits.
setBit(SrcBitWidth-1);
499 InputDemandedBits = InputDemandedBits.
trunc(SrcBitWidth);
500 KnownZero = KnownZero.
trunc(SrcBitWidth);
501 KnownOne = KnownOne.
trunc(SrcBitWidth);
502 if (SimplifyDemandedBits(I->
getOperandUse(0), InputDemandedBits, KnownZero,
503 KnownOne, Depth + 1))
505 InputDemandedBits = InputDemandedBits.
zext(BitWidth);
506 KnownZero = KnownZero.
zext(BitWidth);
507 KnownOne = KnownOne.
zext(BitWidth);
508 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
515 if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
518 return InsertNewInstWith(NewCast, *I);
519 }
else if (KnownOne[SrcBitWidth-1]) {
525 case Instruction::Sub: {
528 unsigned NLZ = DemandedMask.countLeadingZeros();
533 if (SimplifyDemandedBits(I->
getOperandUse(0), DemandedFromOps,
534 LHSKnownZero, LHSKnownOne, Depth + 1) ||
537 LHSKnownZero, LHSKnownOne, Depth + 1)) {
553 case Instruction::Shl:
559 Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
560 KnownZero, KnownOne);
566 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
567 APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
571 if (IOp->hasNoSignedWrap())
573 else if (IOp->hasNoUnsignedWrap())
576 if (SimplifyDemandedBits(I->
getOperandUse(0), DemandedMaskIn, KnownZero,
577 KnownOne, Depth + 1))
579 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
580 KnownZero <<= ShiftAmt;
581 KnownOne <<= ShiftAmt;
587 case Instruction::LShr:
590 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
593 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
597 if (cast<LShrOperator>(I)->isExact())
600 if (SimplifyDemandedBits(I->
getOperandUse(0), DemandedMaskIn, KnownZero,
601 KnownOne, Depth + 1))
603 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
609 KnownZero |= HighBits;
613 case Instruction::AShr:
618 if (DemandedMask == 1) {
622 return InsertNewInstWith(NewVal, *I);
627 if (DemandedMask.isSignBit())
631 uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
634 APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
637 if (DemandedMask.countLeadingZeros() <= ShiftAmt)
638 DemandedMaskIn.
setBit(BitWidth-1);
642 if (cast<AShrOperator>(I)->isExact())
645 if (SimplifyDemandedBits(I->
getOperandUse(0), DemandedMaskIn, KnownZero,
646 KnownOne, Depth + 1))
648 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
661 if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
662 (HighBits & ~DemandedMask) == HighBits) {
666 NewVal->
setIsExact(cast<BinaryOperator>(I)->isExact());
667 return InsertNewInstWith(NewVal, *I);
668 }
else if ((KnownOne & SignBit) != 0) {
669 KnownOne |= HighBits;
673 case Instruction::SRem:
677 if (Rem->isAllOnesValue())
681 if (DemandedMask.ult(RA))
684 APInt LowBits = RA - 1;
686 if (SimplifyDemandedBits(I->
getOperandUse(0), Mask2, LHSKnownZero,
687 LHSKnownOne, Depth + 1))
691 KnownZero = LHSKnownZero & LowBits;
692 KnownOne = LHSKnownOne & LowBits;
696 if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
697 KnownZero |= ~LowBits;
701 if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
702 KnownOne |= ~LowBits;
704 assert(!(KnownZero & KnownOne) &&
"Bits known to be one AND zero?");
710 if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
711 APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
715 if (LHSKnownZero.isNegative())
716 KnownZero.setBit(KnownZero.getBitWidth() - 1);
719 case Instruction::URem: {
720 APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
722 if (SimplifyDemandedBits(I->
getOperandUse(0), AllOnes, KnownZero2,
723 KnownOne2, Depth + 1) ||
724 SimplifyDemandedBits(I->
getOperandUse(1), AllOnes, KnownZero2,
725 KnownOne2, Depth + 1))
728 unsigned Leaders = KnownZero2.countLeadingOnes();
729 Leaders = std::max(Leaders,
730 KnownZero2.countLeadingOnes());
736 switch (II->getIntrinsicID()) {
738 case Intrinsic::bswap: {
741 unsigned NLZ = DemandedMask.countLeadingZeros();
742 unsigned NTZ = DemandedMask.countTrailingZeros();
750 if (BitWidth-NLZ-NTZ == 8) {
751 unsigned ResultBit = NTZ;
752 unsigned InputBit = BitWidth-NTZ-8;
757 if (InputBit > ResultBit)
758 NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
761 NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
764 return InsertNewInstWith(NewVal, *I);
770 case Intrinsic::x86_mmx_pmovmskb:
771 case Intrinsic::x86_sse_movmsk_ps:
772 case Intrinsic::x86_sse2_movmsk_pd:
773 case Intrinsic::x86_sse2_pmovmskb_128:
774 case Intrinsic::x86_avx_movmsk_ps_256:
775 case Intrinsic::x86_avx_movmsk_pd_256:
776 case Intrinsic::x86_avx2_pmovmskb: {
780 if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
783 auto Arg = II->getArgOperand(0);
784 auto ArgType = cast<VectorType>(Arg->getType());
785 ArgWidth = ArgType->getNumElements();
791 if (DemandedElts == 0)
798 case Intrinsic::x86_sse42_crc32_64_64:
809 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
833 const APInt &DemandedMask,
837 const APInt &ShlOp1 = cast<ConstantInt>(Shl->
getOperand(1))->getValue();
838 const APInt &ShrOp1 = cast<ConstantInt>(Shr->
getOperand(1))->getValue();
839 if (!ShlOp1 || !ShrOp1)
845 if (ShlOp1.
uge(BitWidth) || ShrOp1.
uge(BitWidth))
853 KnownZero &= DemandedMask;
858 bool isLshr = (Shr->
getOpcode() == Instruction::LShr);
859 BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
860 (BitMask1.ashr(ShrAmt) << ShlAmt);
862 if (ShrAmt <= ShlAmt) {
863 BitMask2 <<= (ShlAmt - ShrAmt);
865 BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
866 BitMask2.ashr(ShrAmt - ShlAmt);
870 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
871 if (ShrAmt == ShlAmt)
878 if (ShrAmt < ShlAmt) {
880 New = BinaryOperator::CreateShl(VarX, Amt);
886 New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
888 if (cast<BinaryOperator>(Shr)->isExact())
892 return InsertNewInstWith(New, *Shl);
906 Value *InstCombiner::SimplifyDemandedVectorElts(
Value *V,
APInt DemandedElts,
911 assert((DemandedElts & ~EltMask) == 0 &&
"Invalid DemandedElts!");
913 if (isa<UndefValue>(V)) {
919 if (DemandedElts == 0) {
927 if (
Constant *
C = dyn_cast<Constant>(V)) {
933 Type *EltTy = cast<VectorType>(V->
getType())->getElementType();
937 for (
unsigned i = 0;
i != VWidth; ++
i) {
938 if (!DemandedElts[
i]) {
944 Constant *Elt =
C->getAggregateElement(i);
945 if (!Elt)
return nullptr;
947 if (isa<UndefValue>(Elt)) {
957 return NewCV !=
C ? NewCV :
nullptr;
976 DemandedElts = EltMask;
980 if (!I)
return nullptr;
982 bool MadeChange =
false;
983 APInt UndefElts2(VWidth, 0);
984 APInt UndefElts3(VWidth, 0);
989 case Instruction::InsertElement: {
996 TmpV = SimplifyDemandedVectorElts(I->
getOperand(0), DemandedElts,
997 UndefElts2, Depth + 1);
998 if (TmpV) { I->
setOperand(0, TmpV); MadeChange =
true; }
1005 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
1012 APInt DemandedElts2 = DemandedElts;
1014 TmpV = SimplifyDemandedVectorElts(I->
getOperand(0), DemandedElts2,
1015 UndefElts, Depth + 1);
1016 if (TmpV) { I->
setOperand(0, TmpV); MadeChange =
true; }
1019 UndefElts.clearBit(IdxNo);
1022 case Instruction::ShuffleVector: {
1024 unsigned LHSVWidth =
1026 APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
1027 for (
unsigned i = 0; i < VWidth; i++) {
1028 if (DemandedElts[i]) {
1030 if (MaskVal != -1u) {
1031 assert(MaskVal < LHSVWidth * 2 &&
1032 "shufflevector mask index out of range!");
1033 if (MaskVal < LHSVWidth)
1034 LeftDemanded.setBit(MaskVal);
1036 RightDemanded.setBit(MaskVal - LHSVWidth);
1041 APInt LHSUndefElts(LHSVWidth, 0);
1042 TmpV = SimplifyDemandedVectorElts(I->
getOperand(0), LeftDemanded,
1043 LHSUndefElts, Depth + 1);
1044 if (TmpV) { I->
setOperand(0, TmpV); MadeChange =
true; }
1046 APInt RHSUndefElts(LHSVWidth, 0);
1047 TmpV = SimplifyDemandedVectorElts(I->
getOperand(1), RightDemanded,
1048 RHSUndefElts, Depth + 1);
1049 if (TmpV) { I->
setOperand(1, TmpV); MadeChange =
true; }
1051 bool NewUndefElts =
false;
1052 unsigned LHSIdx = -1u, LHSValIdx = -1u;
1053 unsigned RHSIdx = -1u, RHSValIdx = -1u;
1054 bool LHSUniform =
true;
1055 bool RHSUniform =
true;
1056 for (
unsigned i = 0; i < VWidth; i++) {
1058 if (MaskVal == -1u) {
1059 UndefElts.setBit(i);
1060 }
else if (!DemandedElts[i]) {
1061 NewUndefElts =
true;
1062 UndefElts.setBit(i);
1063 }
else if (MaskVal < LHSVWidth) {
1064 if (LHSUndefElts[MaskVal]) {
1065 NewUndefElts =
true;
1066 UndefElts.setBit(i);
1068 LHSIdx = LHSIdx == -1u ? i : LHSVWidth;
1069 LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth;
1070 LHSUniform = LHSUniform && (MaskVal ==
i);
1073 if (RHSUndefElts[MaskVal - LHSVWidth]) {
1074 NewUndefElts =
true;
1075 UndefElts.setBit(i);
1077 RHSIdx = RHSIdx == -1u ? i : LHSVWidth;
1078 RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
1079 RHSUniform = RHSUniform && (MaskVal - LHSVWidth ==
i);
1094 if (LHSIdx < LHSVWidth && RHSUniform) {
1095 if (
auto *CV = dyn_cast<ConstantVector>(Shuffle->
getOperand(0))) {
1101 if (RHSIdx < LHSVWidth && LHSUniform) {
1102 if (
auto *CV = dyn_cast<ConstantVector>(Shuffle->
getOperand(1))) {
1113 InsertNewInstWith(New, *Shuffle);
1120 for (
unsigned i = 0; i < VWidth; ++
i) {
1133 APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
1135 for (
unsigned i = 0; i < VWidth; i++) {
1140 if (isa<ConstantExpr>(CElt))
1143 LeftDemanded.clearBit(i);
1145 RightDemanded.clearBit(i);
1149 TmpV = SimplifyDemandedVectorElts(I->
getOperand(1), LeftDemanded, UndefElts,
1151 if (TmpV) { I->
setOperand(1, TmpV); MadeChange =
true; }
1153 TmpV = SimplifyDemandedVectorElts(I->
getOperand(2), RightDemanded,
1154 UndefElts2, Depth + 1);
1155 if (TmpV) { I->
setOperand(2, TmpV); MadeChange =
true; }
1158 UndefElts &= UndefElts2;
1161 case Instruction::BitCast: {
1166 APInt InputDemandedElts(InVWidth, 0);
1167 UndefElts2 =
APInt(InVWidth, 0);
1170 if (VWidth == InVWidth) {
1174 InputDemandedElts = DemandedElts;
1175 }
else if ((VWidth % InVWidth) == 0) {
1179 Ratio = VWidth / InVWidth;
1180 for (
unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1181 if (DemandedElts[OutIdx])
1182 InputDemandedElts.setBit(OutIdx / Ratio);
1183 }
else if ((InVWidth % VWidth) == 0) {
1187 Ratio = InVWidth / VWidth;
1188 for (
unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1189 if (DemandedElts[InIdx / Ratio])
1190 InputDemandedElts.setBit(InIdx);
1197 TmpV = SimplifyDemandedVectorElts(I->
getOperand(0), InputDemandedElts,
1198 UndefElts2, Depth + 1);
1204 if (VWidth == InVWidth) {
1205 UndefElts = UndefElts2;
1206 }
else if ((VWidth % InVWidth) == 0) {
1210 for (
unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1211 if (UndefElts2[OutIdx / Ratio])
1212 UndefElts.setBit(OutIdx);
1213 }
else if ((InVWidth % VWidth) == 0) {
1217 for (
unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
1220 UndefElts.
setBit(OutIdx);
1231 case Instruction::Sub:
1232 case Instruction::Mul:
1234 TmpV = SimplifyDemandedVectorElts(I->
getOperand(0), DemandedElts, UndefElts,
1236 if (TmpV) { I->
setOperand(0, TmpV); MadeChange =
true; }
1237 TmpV = SimplifyDemandedVectorElts(I->
getOperand(1), DemandedElts,
1238 UndefElts2, Depth + 1);
1239 if (TmpV) { I->
setOperand(1, TmpV); MadeChange =
true; }
1243 UndefElts &= UndefElts2;
1245 case Instruction::FPTrunc:
1246 case Instruction::FPExt:
1247 TmpV = SimplifyDemandedVectorElts(I->
getOperand(0), DemandedElts, UndefElts,
1249 if (TmpV) { I->
setOperand(0, TmpV); MadeChange =
true; }
1258 case Intrinsic::x86_xop_vfrcz_ss:
1259 case Intrinsic::x86_xop_vfrcz_sd:
1264 if (!DemandedElts[0]) {
1271 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(0), DemandedElts,
1272 UndefElts, Depth + 1);
1273 if (TmpV) { II->
setArgOperand(0, TmpV); MadeChange =
true; }
1276 UndefElts = UndefElts[0];
1280 case Intrinsic::x86_sse_rcp_ss:
1281 case Intrinsic::x86_sse_rsqrt_ss:
1282 case Intrinsic::x86_sse_sqrt_ss:
1283 case Intrinsic::x86_sse2_sqrt_sd:
1284 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(0), DemandedElts,
1285 UndefElts, Depth + 1);
1286 if (TmpV) { II->
setArgOperand(0, TmpV); MadeChange =
true; }
1289 if (!DemandedElts[0]) {
1300 case Intrinsic::x86_sse_min_ss:
1301 case Intrinsic::x86_sse_max_ss:
1302 case Intrinsic::x86_sse_cmp_ss:
1303 case Intrinsic::x86_sse2_min_sd:
1304 case Intrinsic::x86_sse2_max_sd:
1305 case Intrinsic::x86_sse2_cmp_sd: {
1306 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(0), DemandedElts,
1307 UndefElts, Depth + 1);
1308 if (TmpV) { II->
setArgOperand(0, TmpV); MadeChange =
true; }
1311 if (!DemandedElts[0]) {
1318 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(1), DemandedElts,
1319 UndefElts2, Depth + 1);
1320 if (TmpV) { II->
setArgOperand(1, TmpV); MadeChange =
true; }
1325 UndefElts.clearBit(0);
1332 case Intrinsic::x86_sse41_round_ss:
1333 case Intrinsic::x86_sse41_round_sd: {
1335 APInt DemandedElts2 = DemandedElts;
1337 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(0), DemandedElts2,
1338 UndefElts, Depth + 1);
1339 if (TmpV) { II->
setArgOperand(0, TmpV); MadeChange =
true; }
1342 if (!DemandedElts[0]) {
1349 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(1), DemandedElts,
1350 UndefElts2, Depth + 1);
1351 if (TmpV) { II->
setArgOperand(1, TmpV); MadeChange =
true; }
1355 UndefElts.clearBit(0);
1356 UndefElts |= UndefElts2[0];
1363 case Intrinsic::x86_avx512_mask_add_ss_round:
1364 case Intrinsic::x86_avx512_mask_div_ss_round:
1365 case Intrinsic::x86_avx512_mask_mul_ss_round:
1366 case Intrinsic::x86_avx512_mask_sub_ss_round:
1367 case Intrinsic::x86_avx512_mask_max_ss_round:
1368 case Intrinsic::x86_avx512_mask_min_ss_round:
1369 case Intrinsic::x86_avx512_mask_add_sd_round:
1370 case Intrinsic::x86_avx512_mask_div_sd_round:
1371 case Intrinsic::x86_avx512_mask_mul_sd_round:
1372 case Intrinsic::x86_avx512_mask_sub_sd_round:
1373 case Intrinsic::x86_avx512_mask_max_sd_round:
1374 case Intrinsic::x86_avx512_mask_min_sd_round:
1375 case Intrinsic::x86_fma_vfmadd_ss:
1376 case Intrinsic::x86_fma_vfmsub_ss:
1377 case Intrinsic::x86_fma_vfnmadd_ss:
1378 case Intrinsic::x86_fma_vfnmsub_ss:
1379 case Intrinsic::x86_fma_vfmadd_sd:
1380 case Intrinsic::x86_fma_vfmsub_sd:
1381 case Intrinsic::x86_fma_vfnmadd_sd:
1382 case Intrinsic::x86_fma_vfnmsub_sd:
1383 case Intrinsic::x86_avx512_mask_vfmadd_ss:
1384 case Intrinsic::x86_avx512_mask_vfmadd_sd:
1385 case Intrinsic::x86_avx512_maskz_vfmadd_ss:
1386 case Intrinsic::x86_avx512_maskz_vfmadd_sd:
1387 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(0), DemandedElts,
1388 UndefElts, Depth + 1);
1389 if (TmpV) { II->
setArgOperand(0, TmpV); MadeChange =
true; }
1392 if (!DemandedElts[0]) {
1399 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(1), DemandedElts,
1400 UndefElts2, Depth + 1);
1401 if (TmpV) { II->
setArgOperand(1, TmpV); MadeChange =
true; }
1402 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(2), DemandedElts,
1403 UndefElts3, Depth + 1);
1404 if (TmpV) { II->
setArgOperand(2, TmpV); MadeChange =
true; }
1408 if (!UndefElts2[0] || !UndefElts3[0])
1409 UndefElts.clearBit(0);
1413 case Intrinsic::x86_avx512_mask3_vfmadd_ss:
1414 case Intrinsic::x86_avx512_mask3_vfmadd_sd:
1415 case Intrinsic::x86_avx512_mask3_vfmsub_ss:
1416 case Intrinsic::x86_avx512_mask3_vfmsub_sd:
1417 case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
1418 case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
1420 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(2), DemandedElts,
1421 UndefElts, Depth + 1);
1422 if (TmpV) { II->
setArgOperand(2, TmpV); MadeChange =
true; }
1425 if (!DemandedElts[0]) {
1432 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(0), DemandedElts,
1433 UndefElts2, Depth + 1);
1434 if (TmpV) { II->
setArgOperand(0, TmpV); MadeChange =
true; }
1435 TmpV = SimplifyDemandedVectorElts(II->
getArgOperand(1), DemandedElts,
1436 UndefElts3, Depth + 1);
1437 if (TmpV) { II->
setArgOperand(1, TmpV); MadeChange =
true; }
1441 if (!UndefElts2[0] || !UndefElts3[0])
1442 UndefElts.clearBit(0);
1446 case Intrinsic::x86_sse2_pmulu_dq:
1447 case Intrinsic::x86_sse41_pmuldq:
1448 case Intrinsic::x86_avx2_pmul_dq:
1449 case Intrinsic::x86_avx2_pmulu_dq:
1450 case Intrinsic::x86_avx512_pmul_dq_512:
1451 case Intrinsic::x86_avx512_pmulu_dq_512: {
1455 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
1457 APInt InnerDemandedElts(InnerVWidth, 0);
1458 for (
unsigned i = 0; i != VWidth; ++
i)
1459 if (DemandedElts[i])
1460 InnerDemandedElts.setBit(i * 2);
1462 UndefElts2 =
APInt(InnerVWidth, 0);
1463 TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2,
1465 if (TmpV) { II->
setArgOperand(0, TmpV); MadeChange =
true; }
1467 UndefElts3 =
APInt(InnerVWidth, 0);
1468 TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3,
1470 if (TmpV) { II->
setArgOperand(1, TmpV); MadeChange =
true; }
1477 case Intrinsic::x86_sse4a_extrq:
1478 case Intrinsic::x86_sse4a_extrqi:
1479 case Intrinsic::x86_sse4a_insertq:
1480 case Intrinsic::x86_sse4a_insertqi:
1487 return MadeChange ? I :
nullptr;
void clearAllBits()
Set every bit to 0.
const Use & getOperandUse(unsigned i) const
void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
void push_back(const T &Elt)
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
static APInt getSignBit(unsigned BitWidth)
Get the SignBit for a specific bit width.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
uint64_t getZExtValue() const
Get zero extended value.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
void setBit(unsigned bitPosition)
Set a given bit to 1.
This class represents zero extension of integer types.
static ConstantAggregateZero * get(Type *Ty)
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
This instruction constructs a fixed permutation of two input vectors.
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, APInt Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
StringRef getName() const
Return a constant reference to the value's name.
bool match(Val *V, const Pattern &P)
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
const APInt & getValue() const
Return the constant as an APInt value reference.
BinOp2_match< LHS, RHS, Instruction::LShr, Instruction::AShr > m_Shr(const LHS &L, const RHS &R)
Matches LShr or AShr.
A Use represents the edge between a Value definition and its users.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static Constant * get(ArrayRef< Constant * > V)
APInt lshr(const APInt &LHS, unsigned shiftAmt)
Logical right-shift function.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag...
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
void takeName(Value *V)
Transfer the name from V to this value.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
APInt trunc(unsigned width) const
Truncate to new width.
The instances of the Type class are immutable: once they are created, they are never changed...
bool isVectorTy() const
True if this is an instance of VectorType.
This is an important base class in LLVM.
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
uint64_t getNumElements() const
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getIntegerBitWidth() const
Constant Vector Declarations.
unsigned countPopulation() const
Count the number of bits set.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVMContext & getContext() const
All values hold a context through their type.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
This is the shared class of boolean and integer constants.
SelectPatternFlavor Flavor
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Type * getType() const
All values are typed, get the type of this value.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void setOperand(unsigned i, Value *Val)
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
Class to represent vector types.
Class for arbitrary precision integers.
unsigned getVectorNumElements() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isAllOnesValue() const
Determine if all bits are set.
static int getMaskValue(Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
static IntegerType * getInt32Ty(LLVMContext &C)
void clearBit(unsigned bitPosition)
Set a given bit to 0.
bool hasOneUse() const
Return true if there is exactly one user of this value.
void setArgOperand(unsigned i, Value *v)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
VectorType * getType() const
Overload to return most specific vector type.
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
This file provides internal interfaces used to implement the InstCombine.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
APInt zext(unsigned width) const
Zero extend to a new width.
APInt abs() const
Get the absolute value;.
A wrapper class for inspecting calls to intrinsic functions.