18#include "llvm/IR/IntrinsicsX86.h"
26#define DEBUG_TYPE "x86tti"
31 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
35 assert(V &&
"Vector must be foldable");
43 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
64 if (isa<ConstantAggregateZero>(Mask))
72 II.getType(),
Ptr,
Align(1), BoolMask, ZeroVec);
88 if (isa<ConstantAggregateZero>(Mask)) {
95 if (
II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
101 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
117 bool LogicalShift =
false;
118 bool ShiftLeft =
false;
121 switch (
II.getIntrinsicID()) {
124 case Intrinsic::x86_sse2_psrai_d:
125 case Intrinsic::x86_sse2_psrai_w:
126 case Intrinsic::x86_avx2_psrai_d:
127 case Intrinsic::x86_avx2_psrai_w:
128 case Intrinsic::x86_avx512_psrai_q_128:
129 case Intrinsic::x86_avx512_psrai_q_256:
130 case Intrinsic::x86_avx512_psrai_d_512:
131 case Intrinsic::x86_avx512_psrai_q_512:
132 case Intrinsic::x86_avx512_psrai_w_512:
135 case Intrinsic::x86_sse2_psra_d:
136 case Intrinsic::x86_sse2_psra_w:
137 case Intrinsic::x86_avx2_psra_d:
138 case Intrinsic::x86_avx2_psra_w:
139 case Intrinsic::x86_avx512_psra_q_128:
140 case Intrinsic::x86_avx512_psra_q_256:
141 case Intrinsic::x86_avx512_psra_d_512:
142 case Intrinsic::x86_avx512_psra_q_512:
143 case Intrinsic::x86_avx512_psra_w_512:
144 LogicalShift =
false;
147 case Intrinsic::x86_sse2_psrli_d:
148 case Intrinsic::x86_sse2_psrli_q:
149 case Intrinsic::x86_sse2_psrli_w:
150 case Intrinsic::x86_avx2_psrli_d:
151 case Intrinsic::x86_avx2_psrli_q:
152 case Intrinsic::x86_avx2_psrli_w:
153 case Intrinsic::x86_avx512_psrli_d_512:
154 case Intrinsic::x86_avx512_psrli_q_512:
155 case Intrinsic::x86_avx512_psrli_w_512:
158 case Intrinsic::x86_sse2_psrl_d:
159 case Intrinsic::x86_sse2_psrl_q:
160 case Intrinsic::x86_sse2_psrl_w:
161 case Intrinsic::x86_avx2_psrl_d:
162 case Intrinsic::x86_avx2_psrl_q:
163 case Intrinsic::x86_avx2_psrl_w:
164 case Intrinsic::x86_avx512_psrl_d_512:
165 case Intrinsic::x86_avx512_psrl_q_512:
166 case Intrinsic::x86_avx512_psrl_w_512:
170 case Intrinsic::x86_sse2_pslli_d:
171 case Intrinsic::x86_sse2_pslli_q:
172 case Intrinsic::x86_sse2_pslli_w:
173 case Intrinsic::x86_avx2_pslli_d:
174 case Intrinsic::x86_avx2_pslli_q:
175 case Intrinsic::x86_avx2_pslli_w:
176 case Intrinsic::x86_avx512_pslli_d_512:
177 case Intrinsic::x86_avx512_pslli_q_512:
178 case Intrinsic::x86_avx512_pslli_w_512:
181 case Intrinsic::x86_sse2_psll_d:
182 case Intrinsic::x86_sse2_psll_q:
183 case Intrinsic::x86_sse2_psll_w:
184 case Intrinsic::x86_avx2_psll_d:
185 case Intrinsic::x86_avx2_psll_q:
186 case Intrinsic::x86_avx2_psll_w:
187 case Intrinsic::x86_avx512_psll_d_512:
188 case Intrinsic::x86_avx512_psll_q_512:
189 case Intrinsic::x86_avx512_psll_w_512:
194 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
196 Value *Vec =
II.getArgOperand(0);
197 Value *Amt =
II.getArgOperand(1);
198 auto *VT = cast<FixedVectorType>(Vec->
getType());
199 Type *SVT = VT->getElementType();
201 unsigned VWidth = VT->getNumElements();
214 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
221 Amt = ConstantInt::get(SVT,
BitWidth - 1);
228 cast<VectorType>(AmtVT)->getElementType() == SVT &&
229 "Unexpected shift-by-scalar type");
230 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
234 Amt, DemandedLower,
II.getDataLayout());
236 Amt, DemandedUpper,
II.getDataLayout());
241 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
248 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
255 cast<VectorType>(AmtVT)->getElementType() == SVT &&
256 "Unexpected shift-by-scalar type");
260 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
261 unsigned SubEltIdx = (NumSubElts - 1) - i;
262 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
299 bool LogicalShift =
false;
300 bool ShiftLeft =
false;
302 switch (
II.getIntrinsicID()) {
305 case Intrinsic::x86_avx2_psrav_d:
306 case Intrinsic::x86_avx2_psrav_d_256:
307 case Intrinsic::x86_avx512_psrav_q_128:
308 case Intrinsic::x86_avx512_psrav_q_256:
309 case Intrinsic::x86_avx512_psrav_d_512:
310 case Intrinsic::x86_avx512_psrav_q_512:
311 case Intrinsic::x86_avx512_psrav_w_128:
312 case Intrinsic::x86_avx512_psrav_w_256:
313 case Intrinsic::x86_avx512_psrav_w_512:
314 LogicalShift =
false;
317 case Intrinsic::x86_avx2_psrlv_d:
318 case Intrinsic::x86_avx2_psrlv_d_256:
319 case Intrinsic::x86_avx2_psrlv_q:
320 case Intrinsic::x86_avx2_psrlv_q_256:
321 case Intrinsic::x86_avx512_psrlv_d_512:
322 case Intrinsic::x86_avx512_psrlv_q_512:
323 case Intrinsic::x86_avx512_psrlv_w_128:
324 case Intrinsic::x86_avx512_psrlv_w_256:
325 case Intrinsic::x86_avx512_psrlv_w_512:
329 case Intrinsic::x86_avx2_psllv_d:
330 case Intrinsic::x86_avx2_psllv_d_256:
331 case Intrinsic::x86_avx2_psllv_q:
332 case Intrinsic::x86_avx2_psllv_q_256:
333 case Intrinsic::x86_avx512_psllv_d_512:
334 case Intrinsic::x86_avx512_psllv_q_512:
335 case Intrinsic::x86_avx512_psllv_w_128:
336 case Intrinsic::x86_avx512_psllv_w_256:
337 case Intrinsic::x86_avx512_psllv_w_512:
342 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
344 Value *Vec =
II.getArgOperand(0);
345 Value *Amt =
II.getArgOperand(1);
346 auto *VT = cast<FixedVectorType>(
II.getType());
347 Type *SVT = VT->getElementType();
348 int NumElts = VT->getNumElements();
356 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
362 auto *CShift = dyn_cast<Constant>(Amt);
368 bool AnyOutOfRange =
false;
370 for (
int I = 0;
I < NumElts; ++
I) {
371 auto *CElt = CShift->getAggregateElement(
I);
372 if (isa_and_nonnull<UndefValue>(CElt)) {
377 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
384 APInt ShiftVal = COp->getValue();
386 AnyOutOfRange = LogicalShift;
399 for (
int Idx : ShiftAmts) {
403 assert(LogicalShift &&
"Logical shift expected");
404 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
416 for (
int Idx : ShiftAmts) {
435 Value *Arg0 =
II.getArgOperand(0);
436 Value *Arg1 =
II.getArgOperand(1);
437 Type *ResTy =
II.getType();
440 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
443 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
445 unsigned NumSrcElts = ArgTy->getNumElements();
447 "Unexpected packing types");
449 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
451 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
452 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
453 "Unexpected packing types");
456 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
461 APInt MinValue, MaxValue;
487 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
488 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
489 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
490 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
491 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
502 Value *Arg0 =
II.getArgOperand(0);
503 Value *Arg1 =
II.getArgOperand(1);
504 auto *ResTy = cast<FixedVectorType>(
II.getType());
505 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
506 assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&
507 "Unexpected PMULH types");
508 assert((!IsRounding || IsSigned) &&
"PMULHRS instruction must be signed");
511 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
515 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
521 return IsSigned ? Builder.
CreateAShr(Arg1, 15)
524 return IsSigned ? Builder.
CreateAShr(Arg0, 15)
529 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
534 IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
560 Value *Arg0 =
II.getArgOperand(0);
561 Value *Arg1 =
II.getArgOperand(1);
562 auto *ResTy = cast<FixedVectorType>(
II.getType());
563 [[maybe_unused]]
auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
565 unsigned NumDstElts = ResTy->getNumElements();
566 assert(ArgTy->getNumElements() == (2 * NumDstElts) &&
567 ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
568 "Unexpected PMADD types");
571 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
575 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
579 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
588 for (
unsigned I = 0;
I != NumDstElts; ++
I) {
599 IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
600 LHSLo = Builder.
CreateCast(LHSCast, LHSLo, ResTy);
601 LHSHi = Builder.
CreateCast(LHSCast, LHSHi, ResTy);
602 RHSLo = Builder.
CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);
603 RHSHi = Builder.
CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);
613 Value *Arg =
II.getArgOperand(0);
614 Type *ResTy =
II.getType();
617 if (isa<UndefValue>(Arg))
622 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb)
625 auto *ArgTy = cast<FixedVectorType>(Arg->
getType());
632 unsigned NumElts = ArgTy->getNumElements();
644 Value *CarryIn =
II.getArgOperand(0);
645 Value *Op1 =
II.getArgOperand(1);
646 Value *Op2 =
II.getArgOperand(2);
649 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
650 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
651 "Unexpected types for x86 addcarry");
672 auto *ArgImm = dyn_cast<ConstantInt>(
II.getArgOperand(3));
673 if (!ArgImm || ArgImm->getValue().uge(256))
676 Value *ArgA =
II.getArgOperand(0);
677 Value *ArgB =
II.getArgOperand(1);
678 Value *ArgC =
II.getArgOperand(2);
682 auto Or = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
683 return {Builder.
CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
685 auto Xor = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
686 return {Builder.
CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
688 auto And = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
689 return {Builder.
CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
691 auto Not = [&](
auto V) -> std::pair<Value *, uint8_t> {
692 return {Builder.
CreateNot(V.first), ~V.second};
694 auto Nor = [&](
auto Lhs,
auto Rhs) {
return Not(
Or(Lhs, Rhs)); };
695 auto Xnor = [&](
auto Lhs,
auto Rhs) {
return Not(
Xor(Lhs, Rhs)); };
696 auto Nand = [&](
auto Lhs,
auto Rhs) {
return Not(
And(Lhs, Rhs)); };
702 bool ABIsConst = AIsConst && BIsConst;
703 bool ACIsConst = AIsConst && CIsConst;
704 bool BCIsConst = BIsConst && CIsConst;
705 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
711 std::pair<Value *, uint8_t>
A = {ArgA, 0xf0};
712 std::pair<Value *, uint8_t>
B = {ArgB, 0xcc};
713 std::pair<Value *, uint8_t>
C = {ArgC, 0xaa};
714 std::pair<Value *, uint8_t> Res = {
nullptr, 0};
721 uint8_t Imm = ArgImm->getValue().getZExtValue();
728 Res = Nor(
Or(
A,
B),
C);
748 Res = Nor(
A, Xnor(
B,
C));
756 Res = Nor(
A, Nand(
B,
C));
764 Res = Nor(
A, Not(
C));
768 Res = Nor(
A, Nor(
C, Not(
B)));
772 Res = Nor(
A, Not(
B));
776 Res = Nor(
A, Nor(
B, Not(
C)));
780 Res = Nor(
A, Nor(
B,
C));
795 Res = Nor(Xnor(
A,
C),
B);
803 Res = Nor(Xnor(
A,
B),
C);
819 Res = Nor(Xnor(
A,
B), Xnor(
A,
C));
823 Res =
And(Nand(
A,
B), Xnor(
B,
C));
847 Res = Nand(
A,
Or(
B,
C));
851 Res = Nor(Nand(
A,
C),
B);
859 Res = Nor(
B, Not(
C));
863 Res = Nor(
B, Nor(
C, Not(
A)));
867 Res = Nor(Xnor(
A,
B),
Xor(
A,
C));
871 Res =
Xor(
A, Nand(Nand(
A,
B),
C));
899 Res = Nor(Xnor(
A,
B), Nor(
B,
C));
911 Res = Nand(
A,
Or(
B, Not(
C)));
915 Res = Nor(
B, Not(
A));
919 Res = Nor(Nor(
A, Not(
C)),
B);
923 Res = Nor(Nor(
A,
C),
B);
942 Res = Nand(
Or(
A,
C),
B);
946 Res = Nor(Xnor(
A,
B), Nor(
A,
C));
958 Res = Nand(
Or(
A, Not(
C)),
B);
977 Res = Nor(Nand(
A,
B),
C);
985 Res = Nor(
Xor(
A,
B), Xnor(
A,
C));
989 Res =
Xor(
A, Nand(Nand(
A,
C),
B));
993 Res = Nor(
C, Not(
B));
997 Res = Nor(Nor(
B, Not(
A)),
C);
1017 Res = Nor(Xnor(
A,
C), Nor(
B,
C));
1025 Res =
And(Nand(
A,
C),
B);
1037 Res = Nand(
A, Nand(
B, Not(
C)));
1041 Res = Nor(
C, Not(
A));
1045 Res = Nor(Nor(
A, Not(
B)),
C);
1057 Res = Nor(Nor(
A,
B),
C);
1068 Res = Nand(
Or(
A,
B),
C);
1072 Res = Nor(Nor(
A,
B), Xnor(
A,
C));
1091 Res = Nand(
Or(
A, Not(
B)),
C);
1111 Res = Nor(Nor(
A,
C), Xnor(
B,
C));
1119 Res = Nor(Nor(
A,
B), Xnor(
B,
C));
1138 Res =
Xor(Xnor(
A,
B),
C);
1162 Res = Nand(
A, Xnor(
B,
C));
1166 Res =
And(
A, Nand(
B,
C));
1178 Res = Nand(Nand(
A, Not(
C)),
B);
1186 Res = Nand(Nand(
A, Not(
B)),
C);
1210 Res = Nand(Xnor(
A,
C),
B);
1218 Res = Nand(Xnor(
A,
B),
C);
1226 Res = Nand(
And(
A,
B),
C);
1238 Res =
And(Xnor(
A,
B),
C);
1242 Res = Nor(
Xor(
A,
B), Nor(
C, Not(
A)));
1246 Res =
And(Xnor(
A,
C),
B);
1250 Res = Nor(
Xor(
A,
C), Nor(
B, Not(
A)));
1254 Res =
Xor(Nor(Xnor(
A,
B), Nor(
B,
C)),
C);
1258 Res =
Xor(
A, Nand(
B,
C));
1265 Res =
Xor(
B, Nor(Nor(
B, Not(
A)),
C));
1269 Res =
And(Nand(
A, Not(
B)),
C);
1277 Res =
And(Nand(
A, Not(
C)),
B);
1289 Res = Nand(
A, Nand(
B,
C));
1293 Res =
And(
A, Xnor(
B,
C));
1297 Res = Nor(Nor(
A, Not(
B)),
Xor(
B,
C));
1301 Res =
Xor(Nor(Xnor(
A,
B), Nor(
A,
C)),
C);
1305 Res =
Xor(Nand(
A,
C),
B);
1309 Res = Nor(Nor(
A,
B),
Xor(Xnor(
A,
B),
C));
1313 Res =
Xor(Nand(
A,
B),
C);
1325 Res = Nor(Nor(
A,
B),
Xor(
B,
C));
1333 Res =
Xor(Nor(
B, Not(
A)),
C);
1337 Res =
Or(Nor(
A,
B), Xnor(
B,
C));
1341 Res =
Xor(
B, Nor(
C, Not(
A)));
1345 Res =
Or(Nor(
A,
C), Xnor(
B,
C));
1353 Res = Nand(
A,
Xor(
B,
C));
1360 Res =
Xor(
A, Nor(Nor(
A, Not(
B)),
C));
1372 Res =
Xor(
A, Nor(Nor(
A,
B),
C));
1380 Res =
Xor(Nor(
A, Not(
B)),
C);
1384 Res =
Or(Nor(
A,
B), Xnor(
A,
C));
1399 Res =
Or(Nor(
A,
B),
C);
1403 Res =
Xor(Nor(Xnor(
B,
C),
A),
C);
1411 Res =
Or(Nor(
A, Not(
B)),
C);
1415 Res =
Or(
C, Not(
A));
1419 Res =
And(
A, Nand(
B, Not(
C)));
1431 Res = Nand(Nand(
A,
C),
B);
1435 Res =
Xor(
A, Nor(
C, Not(
B)));
1439 Res =
Or(Xnor(
A,
C), Nor(
B,
C));
1447 Res = Nand(
Xor(
A,
C),
B);
1451 Res =
Xor(Nor(Xnor(
A,
C),
B),
C);
1459 Res =
Or(Nor(
B, Not(
A)),
C);
1463 Res =
Or(
C, Not(
B));
1479 Res =
Or(Nand(
A,
B),
C);
1486 Res =
Xor(
A, Nor(Nor(
A, Not(
C)),
B));
1490 Res =
Xor(
A, Nor(Nor(
A,
C),
B));
1506 Res =
Xor(Nor(
A, Not(
C)),
B);
1510 Res =
Or(Xnor(
A,
B), Nor(
A,
C));
1522 Res =
Xor(
B, Nor(
A, Xnor(
B,
C)));
1533 Res =
Or(Nor(
A,
C),
B);
1537 Res =
Or(Nor(
A, Not(
C)),
B);
1541 Res =
Or(
B, Not(
A));
1553 Res =
Xor(
A, Nor(
B, Not(
C)));
1557 Res =
Or(Xnor(
A,
B), Nor(
B,
C));
1565 Res = Nand(Nand(
A,
B),
C);
1573 Res = Nand(
Xor(
A,
B),
C);
1577 Res =
Xor(Nor(Xnor(
A,
B),
C),
B);
1593 Res =
Or(
B, Nor(
C, Not(
A)));
1597 Res =
Or(
B, Not(
C));
1605 Res =
Or(Nand(
A,
C),
B);
1617 Res =
Xor(
A, Nor(Xnor(
A,
C),
B));
1625 Res =
Xor(
A, Nor(Xnor(
A,
B),
C));
1637 Res =
Or(Xnor(
A,
B), Xnor(
A,
C));
1653 Res =
Or(Xnor(
A,
B),
C);
1661 Res =
Or(Xnor(
A,
C),
B);
1668 Res = Nand(
A, Nor(
B,
C));
1675 Res =
Or(
A, Nor(
B,
C));
1679 Res =
Or(
A, Nor(
B, Not(
C)));
1683 Res =
Or(
A, Not(
B));
1687 Res =
Or(
A, Nor(
C, Not(
B)));
1691 Res =
Or(
A, Not(
C));
1699 Res =
Or(
A, Nand(
B,
C));
1707 Res =
Or(
A, Xnor(
B,
C));
1714 Res = Nand(Nor(
A,
C),
B);
1721 Res = Nand(Nor(
A,
B),
C);
1732 assert((Res.first ==
nullptr || Res.second == Imm) &&
1733 "Simplification of ternary logic does not verify!");
1739 auto *CInt = dyn_cast<ConstantInt>(
II.getArgOperand(2));
1743 auto *VecTy = cast<FixedVectorType>(
II.getType());
1744 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
1751 uint8_t Imm = CInt->getZExtValue();
1753 uint8_t DestLane = (Imm >> 4) & 0x3;
1754 uint8_t SourceLane = (Imm >> 6) & 0x3;
1764 int ShuffleMask[4] = {0, 1, 2, 3};
1767 Value *V1 =
II.getArgOperand(1);
1772 if ((
II.getArgOperand(0) ==
II.getArgOperand(1)) ||
1773 (ZMask & (1 << DestLane))) {
1777 ShuffleMask[DestLane] = SourceLane;
1779 for (
unsigned i = 0; i < 4; ++i)
1780 if ((ZMask >> i) & 0x1)
1781 ShuffleMask[i] = i + 4;
1788 ShuffleMask[DestLane] = SourceLane + 4;
1799 auto LowConstantHighUndef = [&](
uint64_t Val) {
1801 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1807 auto *C0 = dyn_cast<Constant>(Op0);
1809 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1813 if (CILength && CIIndex) {
1838 if ((
Length % 8) == 0 && (Index % 8) == 0) {
1847 for (
int i = 0; i != (int)
Length; ++i)
1849 for (
int i =
Length; i != 8; ++i)
1851 for (
int i = 8; i != 16; ++i)
1863 APInt Elt = CI0->getValue();
1870 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1871 Value *Args[] = {Op0, CILength, CIIndex};
1872 return Builder.
CreateIntrinsic(Intrinsic::x86_sse4a_extrqi, {}, Args);
1877 if (CI0 && CI0->isZero())
1878 return LowConstantHighUndef(0);
1913 if ((
Length % 8) == 0 && (Index % 8) == 0) {
1922 for (
int i = 0; i != (int)Index; ++i)
1924 for (
int i = 0; i != (int)
Length; ++i)
1926 for (
int i = Index +
Length; i != 8; ++i)
1928 for (
int i = 8; i != 16; ++i)
1938 auto *C0 = dyn_cast<Constant>(Op0);
1939 auto *C1 = dyn_cast<Constant>(Op1);
1941 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1944 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1949 APInt V00 = CI00->getValue();
1950 APInt V10 = CI10->getValue();
1954 APInt Val = V00 | V10;
1963 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1966 Constant *CIIndex = ConstantInt::get(IntTy8, Index,
false);
1968 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1969 return Builder.
CreateIntrinsic(Intrinsic::x86_sse4a_insertqi, {}, Args);
1978 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
1982 auto *VecTy = cast<FixedVectorType>(
II.getType());
1983 unsigned NumElts = VecTy->getNumElements();
1984 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1985 "Unexpected number of elements in shuffle mask!");
1992 for (
unsigned I = 0;
I < NumElts; ++
I) {
1993 Constant *COp = V->getAggregateElement(
I);
1994 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1997 if (isa<UndefValue>(COp)) {
2002 int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
2011 Index = ((Index < 0) ? NumElts : Index & 0x0F) + (
I & 0xF0);
2015 auto V1 =
II.getArgOperand(0);
2023 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2027 auto *VecTy = cast<FixedVectorType>(
II.getType());
2028 unsigned NumElts = VecTy->getNumElements();
2029 bool IsPD = VecTy->getScalarType()->isDoubleTy();
2030 unsigned NumLaneElts = IsPD ? 2 : 4;
2031 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
2037 for (
unsigned I = 0;
I < NumElts; ++
I) {
2038 Constant *COp = V->getAggregateElement(
I);
2039 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2042 if (isa<UndefValue>(COp)) {
2047 APInt Index = cast<ConstantInt>(COp)->getValue();
2048 Index = Index.zextOrTrunc(32).getLoBits(2);
2053 Index.lshrInPlace(1);
2058 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
2060 Indexes[
I] = Index.getZExtValue();
2063 auto V1 =
II.getArgOperand(0);
2070 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2074 auto *VecTy = cast<FixedVectorType>(
II.getType());
2075 unsigned Size = VecTy->getNumElements();
2077 "Unexpected shuffle mask size");
2082 for (
unsigned I = 0;
I <
Size; ++
I) {
2083 Constant *COp = V->getAggregateElement(
I);
2084 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2087 if (isa<UndefValue>(COp)) {
2092 uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
2097 auto V1 =
II.getArgOperand(0);
2104 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2108 auto *VecTy = cast<FixedVectorType>(
II.getType());
2109 unsigned Size = VecTy->getNumElements();
2112 "Unexpected shuffle mask size");
2117 for (
unsigned I = 0;
I <
Size; ++
I) {
2118 Constant *COp = V->getAggregateElement(
I);
2119 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2122 if (isa<UndefValue>(COp)) {
2127 uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
2128 Index &= (2 *
Size) - 1;
2132 auto V1 =
II.getArgOperand(0);
2133 auto V2 =
II.getArgOperand(2);
2140 auto *VecTy = cast<FixedVectorType>(
II->getType());
2141 unsigned EltSizeInBits = VecTy->getScalarSizeInBits();
2142 unsigned NumElts = VecTy->getNumElements();
2144 "Unexpected shuffle mask size");
2146 unsigned IdxSizeInBits =
Log2_32(IsBinary ? (2 * NumElts) : NumElts);
2153std::optional<Instruction *>
2155 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2156 unsigned DemandedWidth) {
2157 APInt UndefElts(Width, 0);
2164 case Intrinsic::x86_bmi_bextr_32:
2165 case Intrinsic::x86_bmi_bextr_64:
2166 case Intrinsic::x86_tbm_bextri_u32:
2167 case Intrinsic::x86_tbm_bextri_u64:
2169 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2173 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2179 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2180 uint64_t Result = InC->getZExtValue() >> Shift;
2183 Result &= maskTrailingOnes<uint64_t>(
Length);
2185 ConstantInt::get(
II.getType(), Result));
2192 case Intrinsic::x86_bmi_bzhi_32:
2193 case Intrinsic::x86_bmi_bzhi_64:
2195 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2196 uint64_t Index =
C->getZExtValue() & 0xff;
2197 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2205 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2206 uint64_t Result = InC->getZExtValue();
2207 Result &= maskTrailingOnes<uint64_t>(Index);
2209 ConstantInt::get(
II.getType(), Result));
2214 case Intrinsic::x86_bmi_pext_32:
2215 case Intrinsic::x86_bmi_pext_64:
2216 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2217 if (MaskC->isNullValue()) {
2220 if (MaskC->isAllOnesValue()) {
2224 unsigned MaskIdx, MaskLen;
2225 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2229 Value *Input =
II.getArgOperand(0);
2231 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2236 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2237 uint64_t Src = SrcC->getZExtValue();
2238 uint64_t Mask = MaskC->getZExtValue();
2245 if (BitToTest & Src)
2254 ConstantInt::get(
II.getType(), Result));
2258 case Intrinsic::x86_bmi_pdep_32:
2259 case Intrinsic::x86_bmi_pdep_64:
2260 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2261 if (MaskC->isNullValue()) {
2264 if (MaskC->isAllOnesValue()) {
2268 unsigned MaskIdx, MaskLen;
2269 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2273 Value *Input =
II.getArgOperand(0);
2274 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2280 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2281 uint64_t Src = SrcC->getZExtValue();
2282 uint64_t Mask = MaskC->getZExtValue();
2289 if (BitToTest & Src)
2298 ConstantInt::get(
II.getType(), Result));
2303 case Intrinsic::x86_sse_cvtss2si:
2304 case Intrinsic::x86_sse_cvtss2si64:
2305 case Intrinsic::x86_sse_cvttss2si:
2306 case Intrinsic::x86_sse_cvttss2si64:
2307 case Intrinsic::x86_sse2_cvtsd2si:
2308 case Intrinsic::x86_sse2_cvtsd2si64:
2309 case Intrinsic::x86_sse2_cvttsd2si:
2310 case Intrinsic::x86_sse2_cvttsd2si64:
2311 case Intrinsic::x86_avx512_vcvtss2si32:
2312 case Intrinsic::x86_avx512_vcvtss2si64:
2313 case Intrinsic::x86_avx512_vcvtss2usi32:
2314 case Intrinsic::x86_avx512_vcvtss2usi64:
2315 case Intrinsic::x86_avx512_vcvtsd2si32:
2316 case Intrinsic::x86_avx512_vcvtsd2si64:
2317 case Intrinsic::x86_avx512_vcvtsd2usi32:
2318 case Intrinsic::x86_avx512_vcvtsd2usi64:
2319 case Intrinsic::x86_avx512_cvttss2si:
2320 case Intrinsic::x86_avx512_cvttss2si64:
2321 case Intrinsic::x86_avx512_cvttss2usi:
2322 case Intrinsic::x86_avx512_cvttss2usi64:
2323 case Intrinsic::x86_avx512_cvttsd2si:
2324 case Intrinsic::x86_avx512_cvttsd2si64:
2325 case Intrinsic::x86_avx512_cvttsd2usi:
2326 case Intrinsic::x86_avx512_cvttsd2usi64: {
2329 Value *Arg =
II.getArgOperand(0);
2330 unsigned VWidth = cast<FixedVectorType>(Arg->
getType())->getNumElements();
2331 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2337 case Intrinsic::x86_mmx_pmovmskb:
2338 case Intrinsic::x86_sse_movmsk_ps:
2339 case Intrinsic::x86_sse2_movmsk_pd:
2340 case Intrinsic::x86_sse2_pmovmskb_128:
2341 case Intrinsic::x86_avx_movmsk_pd_256:
2342 case Intrinsic::x86_avx_movmsk_ps_256:
2343 case Intrinsic::x86_avx2_pmovmskb:
2349 case Intrinsic::x86_sse_comieq_ss:
2350 case Intrinsic::x86_sse_comige_ss:
2351 case Intrinsic::x86_sse_comigt_ss:
2352 case Intrinsic::x86_sse_comile_ss:
2353 case Intrinsic::x86_sse_comilt_ss:
2354 case Intrinsic::x86_sse_comineq_ss:
2355 case Intrinsic::x86_sse_ucomieq_ss:
2356 case Intrinsic::x86_sse_ucomige_ss:
2357 case Intrinsic::x86_sse_ucomigt_ss:
2358 case Intrinsic::x86_sse_ucomile_ss:
2359 case Intrinsic::x86_sse_ucomilt_ss:
2360 case Intrinsic::x86_sse_ucomineq_ss:
2361 case Intrinsic::x86_sse2_comieq_sd:
2362 case Intrinsic::x86_sse2_comige_sd:
2363 case Intrinsic::x86_sse2_comigt_sd:
2364 case Intrinsic::x86_sse2_comile_sd:
2365 case Intrinsic::x86_sse2_comilt_sd:
2366 case Intrinsic::x86_sse2_comineq_sd:
2367 case Intrinsic::x86_sse2_ucomieq_sd:
2368 case Intrinsic::x86_sse2_ucomige_sd:
2369 case Intrinsic::x86_sse2_ucomigt_sd:
2370 case Intrinsic::x86_sse2_ucomile_sd:
2371 case Intrinsic::x86_sse2_ucomilt_sd:
2372 case Intrinsic::x86_sse2_ucomineq_sd:
2373 case Intrinsic::x86_avx512_vcomi_ss:
2374 case Intrinsic::x86_avx512_vcomi_sd:
2375 case Intrinsic::x86_avx512_mask_cmp_ss:
2376 case Intrinsic::x86_avx512_mask_cmp_sd: {
2379 bool MadeChange =
false;
2380 Value *Arg0 =
II.getArgOperand(0);
2381 Value *Arg1 =
II.getArgOperand(1);
2382 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2383 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2387 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2397 case Intrinsic::x86_avx512_add_ps_512:
2398 case Intrinsic::x86_avx512_div_ps_512:
2399 case Intrinsic::x86_avx512_mul_ps_512:
2400 case Intrinsic::x86_avx512_sub_ps_512:
2401 case Intrinsic::x86_avx512_add_pd_512:
2402 case Intrinsic::x86_avx512_div_pd_512:
2403 case Intrinsic::x86_avx512_mul_pd_512:
2404 case Intrinsic::x86_avx512_sub_pd_512:
2407 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2408 if (R->getValue() == 4) {
2409 Value *Arg0 =
II.getArgOperand(0);
2410 Value *Arg1 =
II.getArgOperand(1);
2416 case Intrinsic::x86_avx512_add_ps_512:
2417 case Intrinsic::x86_avx512_add_pd_512:
2420 case Intrinsic::x86_avx512_sub_ps_512:
2421 case Intrinsic::x86_avx512_sub_pd_512:
2424 case Intrinsic::x86_avx512_mul_ps_512:
2425 case Intrinsic::x86_avx512_mul_pd_512:
2428 case Intrinsic::x86_avx512_div_ps_512:
2429 case Intrinsic::x86_avx512_div_pd_512:
2439 case Intrinsic::x86_avx512_mask_add_ss_round:
2440 case Intrinsic::x86_avx512_mask_div_ss_round:
2441 case Intrinsic::x86_avx512_mask_mul_ss_round:
2442 case Intrinsic::x86_avx512_mask_sub_ss_round:
2443 case Intrinsic::x86_avx512_mask_add_sd_round:
2444 case Intrinsic::x86_avx512_mask_div_sd_round:
2445 case Intrinsic::x86_avx512_mask_mul_sd_round:
2446 case Intrinsic::x86_avx512_mask_sub_sd_round:
2449 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(4))) {
2450 if (R->getValue() == 4) {
2452 Value *Arg0 =
II.getArgOperand(0);
2453 Value *Arg1 =
II.getArgOperand(1);
2461 case Intrinsic::x86_avx512_mask_add_ss_round:
2462 case Intrinsic::x86_avx512_mask_add_sd_round:
2465 case Intrinsic::x86_avx512_mask_sub_ss_round:
2466 case Intrinsic::x86_avx512_mask_sub_sd_round:
2469 case Intrinsic::x86_avx512_mask_mul_ss_round:
2470 case Intrinsic::x86_avx512_mask_mul_sd_round:
2473 case Intrinsic::x86_avx512_mask_div_ss_round:
2474 case Intrinsic::x86_avx512_mask_div_sd_round:
2480 Value *Mask =
II.getArgOperand(3);
2481 auto *
C = dyn_cast<ConstantInt>(Mask);
2483 if (!
C || !
C->getValue()[0]) {
2487 cast<IntegerType>(Mask->getType())->
getBitWidth());
2507 case Intrinsic::x86_sse2_psrai_d:
2508 case Intrinsic::x86_sse2_psrai_w:
2509 case Intrinsic::x86_avx2_psrai_d:
2510 case Intrinsic::x86_avx2_psrai_w:
2511 case Intrinsic::x86_avx512_psrai_q_128:
2512 case Intrinsic::x86_avx512_psrai_q_256:
2513 case Intrinsic::x86_avx512_psrai_d_512:
2514 case Intrinsic::x86_avx512_psrai_q_512:
2515 case Intrinsic::x86_avx512_psrai_w_512:
2516 case Intrinsic::x86_sse2_psrli_d:
2517 case Intrinsic::x86_sse2_psrli_q:
2518 case Intrinsic::x86_sse2_psrli_w:
2519 case Intrinsic::x86_avx2_psrli_d:
2520 case Intrinsic::x86_avx2_psrli_q:
2521 case Intrinsic::x86_avx2_psrli_w:
2522 case Intrinsic::x86_avx512_psrli_d_512:
2523 case Intrinsic::x86_avx512_psrli_q_512:
2524 case Intrinsic::x86_avx512_psrli_w_512:
2525 case Intrinsic::x86_sse2_pslli_d:
2526 case Intrinsic::x86_sse2_pslli_q:
2527 case Intrinsic::x86_sse2_pslli_w:
2528 case Intrinsic::x86_avx2_pslli_d:
2529 case Intrinsic::x86_avx2_pslli_q:
2530 case Intrinsic::x86_avx2_pslli_w:
2531 case Intrinsic::x86_avx512_pslli_d_512:
2532 case Intrinsic::x86_avx512_pslli_q_512:
2533 case Intrinsic::x86_avx512_pslli_w_512:
2539 case Intrinsic::x86_sse2_psra_d:
2540 case Intrinsic::x86_sse2_psra_w:
2541 case Intrinsic::x86_avx2_psra_d:
2542 case Intrinsic::x86_avx2_psra_w:
2543 case Intrinsic::x86_avx512_psra_q_128:
2544 case Intrinsic::x86_avx512_psra_q_256:
2545 case Intrinsic::x86_avx512_psra_d_512:
2546 case Intrinsic::x86_avx512_psra_q_512:
2547 case Intrinsic::x86_avx512_psra_w_512:
2548 case Intrinsic::x86_sse2_psrl_d:
2549 case Intrinsic::x86_sse2_psrl_q:
2550 case Intrinsic::x86_sse2_psrl_w:
2551 case Intrinsic::x86_avx2_psrl_d:
2552 case Intrinsic::x86_avx2_psrl_q:
2553 case Intrinsic::x86_avx2_psrl_w:
2554 case Intrinsic::x86_avx512_psrl_d_512:
2555 case Intrinsic::x86_avx512_psrl_q_512:
2556 case Intrinsic::x86_avx512_psrl_w_512:
2557 case Intrinsic::x86_sse2_psll_d:
2558 case Intrinsic::x86_sse2_psll_q:
2559 case Intrinsic::x86_sse2_psll_w:
2560 case Intrinsic::x86_avx2_psll_d:
2561 case Intrinsic::x86_avx2_psll_q:
2562 case Intrinsic::x86_avx2_psll_w:
2563 case Intrinsic::x86_avx512_psll_d_512:
2564 case Intrinsic::x86_avx512_psll_q_512:
2565 case Intrinsic::x86_avx512_psll_w_512: {
2572 Value *Arg1 =
II.getArgOperand(1);
2574 "Unexpected packed shift size");
2575 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
2577 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2583 case Intrinsic::x86_avx2_psllv_d:
2584 case Intrinsic::x86_avx2_psllv_d_256:
2585 case Intrinsic::x86_avx2_psllv_q:
2586 case Intrinsic::x86_avx2_psllv_q_256:
2587 case Intrinsic::x86_avx512_psllv_d_512:
2588 case Intrinsic::x86_avx512_psllv_q_512:
2589 case Intrinsic::x86_avx512_psllv_w_128:
2590 case Intrinsic::x86_avx512_psllv_w_256:
2591 case Intrinsic::x86_avx512_psllv_w_512:
2592 case Intrinsic::x86_avx2_psrav_d:
2593 case Intrinsic::x86_avx2_psrav_d_256:
2594 case Intrinsic::x86_avx512_psrav_q_128:
2595 case Intrinsic::x86_avx512_psrav_q_256:
2596 case Intrinsic::x86_avx512_psrav_d_512:
2597 case Intrinsic::x86_avx512_psrav_q_512:
2598 case Intrinsic::x86_avx512_psrav_w_128:
2599 case Intrinsic::x86_avx512_psrav_w_256:
2600 case Intrinsic::x86_avx512_psrav_w_512:
2601 case Intrinsic::x86_avx2_psrlv_d:
2602 case Intrinsic::x86_avx2_psrlv_d_256:
2603 case Intrinsic::x86_avx2_psrlv_q:
2604 case Intrinsic::x86_avx2_psrlv_q_256:
2605 case Intrinsic::x86_avx512_psrlv_d_512:
2606 case Intrinsic::x86_avx512_psrlv_q_512:
2607 case Intrinsic::x86_avx512_psrlv_w_128:
2608 case Intrinsic::x86_avx512_psrlv_w_256:
2609 case Intrinsic::x86_avx512_psrlv_w_512:
2615 case Intrinsic::x86_sse2_packssdw_128:
2616 case Intrinsic::x86_sse2_packsswb_128:
2617 case Intrinsic::x86_avx2_packssdw:
2618 case Intrinsic::x86_avx2_packsswb:
2619 case Intrinsic::x86_avx512_packssdw_512:
2620 case Intrinsic::x86_avx512_packsswb_512:
2626 case Intrinsic::x86_sse2_packuswb_128:
2627 case Intrinsic::x86_sse41_packusdw:
2628 case Intrinsic::x86_avx2_packusdw:
2629 case Intrinsic::x86_avx2_packuswb:
2630 case Intrinsic::x86_avx512_packusdw_512:
2631 case Intrinsic::x86_avx512_packuswb_512:
2637 case Intrinsic::x86_sse2_pmulh_w:
2638 case Intrinsic::x86_avx2_pmulh_w:
2639 case Intrinsic::x86_avx512_pmulh_w_512:
2645 case Intrinsic::x86_sse2_pmulhu_w:
2646 case Intrinsic::x86_avx2_pmulhu_w:
2647 case Intrinsic::x86_avx512_pmulhu_w_512:
2653 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2654 case Intrinsic::x86_avx2_pmul_hr_sw:
2655 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2661 case Intrinsic::x86_sse2_pmadd_wd:
2662 case Intrinsic::x86_avx2_pmadd_wd:
2663 case Intrinsic::x86_avx512_pmaddw_d_512:
2669 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2670 case Intrinsic::x86_avx2_pmadd_ub_sw:
2671 case Intrinsic::x86_avx512_pmaddubs_w_512:
2677 case Intrinsic::x86_pclmulqdq:
2678 case Intrinsic::x86_pclmulqdq_256:
2679 case Intrinsic::x86_pclmulqdq_512: {
2680 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2681 unsigned Imm =
C->getZExtValue();
2683 bool MadeChange =
false;
2684 Value *Arg0 =
II.getArgOperand(0);
2685 Value *Arg1 =
II.getArgOperand(1);
2687 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2689 APInt UndefElts1(VWidth, 0);
2690 APInt DemandedElts1 =
2698 APInt UndefElts2(VWidth, 0);
2699 APInt DemandedElts2 =
2721 case Intrinsic::x86_sse41_insertps:
2727 case Intrinsic::x86_sse4a_extrq: {
2728 Value *Op0 =
II.getArgOperand(0);
2729 Value *Op1 =
II.getArgOperand(1);
2730 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2731 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2734 VWidth1 == 16 &&
"Unexpected operand sizes");
2737 auto *C1 = dyn_cast<Constant>(Op1);
2739 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2742 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2752 bool MadeChange =
false;
2753 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2757 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2767 case Intrinsic::x86_sse4a_extrqi: {
2770 Value *Op0 =
II.getArgOperand(0);
2771 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2773 "Unexpected operand size");
2776 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(1));
2777 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2786 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2792 case Intrinsic::x86_sse4a_insertq: {
2793 Value *Op0 =
II.getArgOperand(0);
2794 Value *Op1 =
II.getArgOperand(1);
2795 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2798 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
2799 "Unexpected operand size");
2802 auto *C1 = dyn_cast<Constant>(Op1);
2804 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2809 const APInt &V11 = CI11->getValue();
2819 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2825 case Intrinsic::x86_sse4a_insertqi: {
2829 Value *Op0 =
II.getArgOperand(0);
2830 Value *Op1 =
II.getArgOperand(1);
2831 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2832 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2835 VWidth1 == 2 &&
"Unexpected operand sizes");
2838 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2839 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(3));
2842 if (CILength && CIIndex) {
2843 APInt Len = CILength->getValue().zextOrTrunc(6);
2844 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2852 bool MadeChange =
false;
2853 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2857 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2867 case Intrinsic::x86_sse41_pblendvb:
2868 case Intrinsic::x86_sse41_blendvps:
2869 case Intrinsic::x86_sse41_blendvpd:
2870 case Intrinsic::x86_avx_blendv_ps_256:
2871 case Intrinsic::x86_avx_blendv_pd_256:
2872 case Intrinsic::x86_avx2_pblendvb: {
2874 Value *Op0 =
II.getArgOperand(0);
2875 Value *Op1 =
II.getArgOperand(1);
2876 Value *Mask =
II.getArgOperand(2);
2882 if (isa<ConstantAggregateZero>(Mask)) {
2887 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2898 Value *MaskSrc =
nullptr;
2901 m_Mask(ShuffleMask))))) {
2903 int NumElts = cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2906 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
2917 auto *MaskTy = cast<FixedVectorType>(Mask->getType());
2918 auto *OpTy = cast<FixedVectorType>(
II.getType());
2919 unsigned NumMaskElts = MaskTy->getNumElements();
2920 unsigned NumOperandElts = OpTy->getNumElements();
2924 unsigned NumMaskSrcElts =
2925 cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2926 NumMaskElts = (ShuffleMask.
size() * NumMaskElts) / NumMaskSrcElts;
2928 if (NumMaskElts > NumOperandElts)
2936 assert(MaskTy->getPrimitiveSizeInBits() ==
2937 OpTy->getPrimitiveSizeInBits() &&
2938 "Not expecting mask and operands with different sizes");
2940 if (NumMaskElts == NumOperandElts) {
2946 if (NumMaskElts < NumOperandElts) {
2957 case Intrinsic::x86_ssse3_pshuf_b_128:
2958 case Intrinsic::x86_avx2_pshuf_b:
2959 case Intrinsic::x86_avx512_pshuf_b_512: {
2970 case Intrinsic::x86_avx_vpermilvar_ps:
2971 case Intrinsic::x86_avx_vpermilvar_ps_256:
2972 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
2983 case Intrinsic::x86_avx_vpermilvar_pd:
2984 case Intrinsic::x86_avx_vpermilvar_pd_256:
2985 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
2996 case Intrinsic::x86_avx2_permd:
2997 case Intrinsic::x86_avx2_permps:
2998 case Intrinsic::x86_avx512_permvar_df_256:
2999 case Intrinsic::x86_avx512_permvar_df_512:
3000 case Intrinsic::x86_avx512_permvar_di_256:
3001 case Intrinsic::x86_avx512_permvar_di_512:
3002 case Intrinsic::x86_avx512_permvar_hi_128:
3003 case Intrinsic::x86_avx512_permvar_hi_256:
3004 case Intrinsic::x86_avx512_permvar_hi_512:
3005 case Intrinsic::x86_avx512_permvar_qi_128:
3006 case Intrinsic::x86_avx512_permvar_qi_256:
3007 case Intrinsic::x86_avx512_permvar_qi_512:
3008 case Intrinsic::x86_avx512_permvar_sf_512:
3009 case Intrinsic::x86_avx512_permvar_si_512:
3017 case Intrinsic::x86_avx512_vpermi2var_d_128:
3018 case Intrinsic::x86_avx512_vpermi2var_d_256:
3019 case Intrinsic::x86_avx512_vpermi2var_d_512:
3020 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3021 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3022 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3023 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3024 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3025 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3026 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3027 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3028 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3029 case Intrinsic::x86_avx512_vpermi2var_q_128:
3030 case Intrinsic::x86_avx512_vpermi2var_q_256:
3031 case Intrinsic::x86_avx512_vpermi2var_q_512:
3032 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3033 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3034 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3042 case Intrinsic::x86_avx_maskload_ps:
3043 case Intrinsic::x86_avx_maskload_pd:
3044 case Intrinsic::x86_avx_maskload_ps_256:
3045 case Intrinsic::x86_avx_maskload_pd_256:
3046 case Intrinsic::x86_avx2_maskload_d:
3047 case Intrinsic::x86_avx2_maskload_q:
3048 case Intrinsic::x86_avx2_maskload_d_256:
3049 case Intrinsic::x86_avx2_maskload_q_256:
3055 case Intrinsic::x86_sse2_maskmov_dqu:
3056 case Intrinsic::x86_avx_maskstore_ps:
3057 case Intrinsic::x86_avx_maskstore_pd:
3058 case Intrinsic::x86_avx_maskstore_ps_256:
3059 case Intrinsic::x86_avx_maskstore_pd_256:
3060 case Intrinsic::x86_avx2_maskstore_d:
3061 case Intrinsic::x86_avx2_maskstore_q:
3062 case Intrinsic::x86_avx2_maskstore_d_256:
3063 case Intrinsic::x86_avx2_maskstore_q_256:
3069 case Intrinsic::x86_addcarry_32:
3070 case Intrinsic::x86_addcarry_64:
3076 case Intrinsic::x86_avx512_pternlog_d_128:
3077 case Intrinsic::x86_avx512_pternlog_d_256:
3078 case Intrinsic::x86_avx512_pternlog_d_512:
3079 case Intrinsic::x86_avx512_pternlog_q_128:
3080 case Intrinsic::x86_avx512_pternlog_q_256:
3081 case Intrinsic::x86_avx512_pternlog_q_512:
3089 return std::nullopt;
3094 bool &KnownBitsComputed)
const {
3095 switch (
II.getIntrinsicID()) {
3098 case Intrinsic::x86_mmx_pmovmskb:
3099 case Intrinsic::x86_sse_movmsk_ps:
3100 case Intrinsic::x86_sse2_movmsk_pd:
3101 case Intrinsic::x86_sse2_pmovmskb_128:
3102 case Intrinsic::x86_avx_movmsk_ps_256:
3103 case Intrinsic::x86_avx_movmsk_pd_256:
3104 case Intrinsic::x86_avx2_pmovmskb: {
3108 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
3111 auto *ArgType = cast<FixedVectorType>(
II.getArgOperand(0)->getType());
3112 ArgWidth = ArgType->getNumElements();
3118 Type *VTy =
II.getType();
3119 if (DemandedElts.
isZero()) {
3125 KnownBitsComputed =
true;
3129 return std::nullopt;
3136 simplifyAndSetOp)
const {
3137 unsigned VWidth = cast<FixedVectorType>(
II.getType())->getNumElements();
3138 switch (
II.getIntrinsicID()) {
3141 case Intrinsic::x86_xop_vfrcz_ss:
3142 case Intrinsic::x86_xop_vfrcz_sd:
3147 if (!DemandedElts[0]) {
3154 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3157 UndefElts = UndefElts[0];
3161 case Intrinsic::x86_sse_rcp_ss:
3162 case Intrinsic::x86_sse_rsqrt_ss:
3163 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3166 if (!DemandedElts[0]) {
3168 return II.getArgOperand(0);
3177 case Intrinsic::x86_sse_min_ss:
3178 case Intrinsic::x86_sse_max_ss:
3179 case Intrinsic::x86_sse_cmp_ss:
3180 case Intrinsic::x86_sse2_min_sd:
3181 case Intrinsic::x86_sse2_max_sd:
3182 case Intrinsic::x86_sse2_cmp_sd: {
3183 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3186 if (!DemandedElts[0]) {
3188 return II.getArgOperand(0);
3193 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3205 case Intrinsic::x86_sse41_round_ss:
3206 case Intrinsic::x86_sse41_round_sd: {
3208 APInt DemandedElts2 = DemandedElts;
3210 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3213 if (!DemandedElts[0]) {
3215 return II.getArgOperand(0);
3220 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3225 UndefElts |= UndefElts2[0];
3232 case Intrinsic::x86_avx512_mask_add_ss_round:
3233 case Intrinsic::x86_avx512_mask_div_ss_round:
3234 case Intrinsic::x86_avx512_mask_mul_ss_round:
3235 case Intrinsic::x86_avx512_mask_sub_ss_round:
3236 case Intrinsic::x86_avx512_mask_max_ss_round:
3237 case Intrinsic::x86_avx512_mask_min_ss_round:
3238 case Intrinsic::x86_avx512_mask_add_sd_round:
3239 case Intrinsic::x86_avx512_mask_div_sd_round:
3240 case Intrinsic::x86_avx512_mask_mul_sd_round:
3241 case Intrinsic::x86_avx512_mask_sub_sd_round:
3242 case Intrinsic::x86_avx512_mask_max_sd_round:
3243 case Intrinsic::x86_avx512_mask_min_sd_round:
3244 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3247 if (!DemandedElts[0]) {
3249 return II.getArgOperand(0);
3254 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3255 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3259 if (!UndefElts2[0] || !UndefElts3[0])
3264 case Intrinsic::x86_sse3_addsub_pd:
3265 case Intrinsic::x86_sse3_addsub_ps:
3266 case Intrinsic::x86_avx_addsub_pd_256:
3267 case Intrinsic::x86_avx_addsub_ps_256: {
3272 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3273 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3274 if (IsSubOnly || IsAddOnly) {
3275 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3278 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3280 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3283 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3284 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3285 UndefElts &= UndefElts2;
3290 case Intrinsic::x86_avx2_psllv_d:
3291 case Intrinsic::x86_avx2_psllv_d_256:
3292 case Intrinsic::x86_avx2_psllv_q:
3293 case Intrinsic::x86_avx2_psllv_q_256:
3294 case Intrinsic::x86_avx2_psrlv_d:
3295 case Intrinsic::x86_avx2_psrlv_d_256:
3296 case Intrinsic::x86_avx2_psrlv_q:
3297 case Intrinsic::x86_avx2_psrlv_q_256:
3298 case Intrinsic::x86_avx2_psrav_d:
3299 case Intrinsic::x86_avx2_psrav_d_256: {
3300 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3301 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3302 UndefElts &= UndefElts2;
3306 case Intrinsic::x86_sse2_pmulh_w:
3307 case Intrinsic::x86_avx2_pmulh_w:
3308 case Intrinsic::x86_avx512_pmulh_w_512:
3309 case Intrinsic::x86_sse2_pmulhu_w:
3310 case Intrinsic::x86_avx2_pmulhu_w:
3311 case Intrinsic::x86_avx512_pmulhu_w_512:
3312 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3313 case Intrinsic::x86_avx2_pmul_hr_sw:
3314 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3315 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3316 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3321 case Intrinsic::x86_sse2_packssdw_128:
3322 case Intrinsic::x86_sse2_packsswb_128:
3323 case Intrinsic::x86_sse2_packuswb_128:
3324 case Intrinsic::x86_sse41_packusdw:
3325 case Intrinsic::x86_avx2_packssdw:
3326 case Intrinsic::x86_avx2_packsswb:
3327 case Intrinsic::x86_avx2_packusdw:
3328 case Intrinsic::x86_avx2_packuswb:
3329 case Intrinsic::x86_avx512_packssdw_512:
3330 case Intrinsic::x86_avx512_packsswb_512:
3331 case Intrinsic::x86_avx512_packusdw_512:
3332 case Intrinsic::x86_avx512_packuswb_512: {
3333 auto *Ty0 =
II.getArgOperand(0)->getType();
3334 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3335 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3337 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3338 unsigned VWidthPerLane = VWidth / NumLanes;
3339 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3345 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3346 APInt OpDemandedElts(InnerVWidth, 0);
3347 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3348 unsigned LaneIdx = Lane * VWidthPerLane;
3349 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3350 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3351 if (DemandedElts[
Idx])
3352 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3357 APInt OpUndefElts(InnerVWidth, 0);
3358 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3361 OpUndefElts = OpUndefElts.
zext(VWidth);
3362 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3363 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3364 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3365 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3366 UndefElts |= LaneElts;
3372 case Intrinsic::x86_sse2_pmadd_wd:
3373 case Intrinsic::x86_avx2_pmadd_wd:
3374 case Intrinsic::x86_avx512_pmaddw_d_512:
3375 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3376 case Intrinsic::x86_avx2_pmadd_ub_sw:
3377 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3379 auto *ArgTy =
II.getArgOperand(0)->getType();
3380 unsigned InnerVWidth = cast<FixedVectorType>(ArgTy)->getNumElements();
3381 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3383 APInt Op0UndefElts(InnerVWidth, 0);
3384 APInt Op1UndefElts(InnerVWidth, 0);
3385 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3386 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3392 case Intrinsic::x86_ssse3_pshuf_b_128:
3393 case Intrinsic::x86_avx2_pshuf_b:
3394 case Intrinsic::x86_avx512_pshuf_b_512:
3396 case Intrinsic::x86_avx_vpermilvar_ps:
3397 case Intrinsic::x86_avx_vpermilvar_ps_256:
3398 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3399 case Intrinsic::x86_avx_vpermilvar_pd:
3400 case Intrinsic::x86_avx_vpermilvar_pd_256:
3401 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3403 case Intrinsic::x86_avx2_permd:
3404 case Intrinsic::x86_avx2_permps: {
3405 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3411 case Intrinsic::x86_sse4a_extrq:
3412 case Intrinsic::x86_sse4a_extrqi:
3413 case Intrinsic::x86_sse4a_insertq:
3414 case Intrinsic::x86_sse4a_insertqi:
3418 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static Value * simplifyX86vpermv3(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86pmulh(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned, bool IsRounding)
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86pmadd(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsPMADDWD)
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static bool simplifyX86VPERMMask(Instruction *II, bool IsBinary, InstCombiner &IC)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * getExtendedElementVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q)=0
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.