18#include "llvm/IR/IntrinsicsX86.h"
25#define DEBUG_TYPE "x86tti"
30 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
34 assert(V &&
"Vector must be foldable");
42 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
64 if (isa<ConstantAggregateZero>(Mask))
72 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
94 if (isa<ConstantAggregateZero>(Mask)) {
107 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
123 bool LogicalShift =
false;
124 bool ShiftLeft =
false;
130 case Intrinsic::x86_sse2_psrai_d:
131 case Intrinsic::x86_sse2_psrai_w:
132 case Intrinsic::x86_avx2_psrai_d:
133 case Intrinsic::x86_avx2_psrai_w:
134 case Intrinsic::x86_avx512_psrai_q_128:
135 case Intrinsic::x86_avx512_psrai_q_256:
136 case Intrinsic::x86_avx512_psrai_d_512:
137 case Intrinsic::x86_avx512_psrai_q_512:
138 case Intrinsic::x86_avx512_psrai_w_512:
141 case Intrinsic::x86_sse2_psra_d:
142 case Intrinsic::x86_sse2_psra_w:
143 case Intrinsic::x86_avx2_psra_d:
144 case Intrinsic::x86_avx2_psra_w:
145 case Intrinsic::x86_avx512_psra_q_128:
146 case Intrinsic::x86_avx512_psra_q_256:
147 case Intrinsic::x86_avx512_psra_d_512:
148 case Intrinsic::x86_avx512_psra_q_512:
149 case Intrinsic::x86_avx512_psra_w_512:
150 LogicalShift =
false;
153 case Intrinsic::x86_sse2_psrli_d:
154 case Intrinsic::x86_sse2_psrli_q:
155 case Intrinsic::x86_sse2_psrli_w:
156 case Intrinsic::x86_avx2_psrli_d:
157 case Intrinsic::x86_avx2_psrli_q:
158 case Intrinsic::x86_avx2_psrli_w:
159 case Intrinsic::x86_avx512_psrli_d_512:
160 case Intrinsic::x86_avx512_psrli_q_512:
161 case Intrinsic::x86_avx512_psrli_w_512:
164 case Intrinsic::x86_sse2_psrl_d:
165 case Intrinsic::x86_sse2_psrl_q:
166 case Intrinsic::x86_sse2_psrl_w:
167 case Intrinsic::x86_avx2_psrl_d:
168 case Intrinsic::x86_avx2_psrl_q:
169 case Intrinsic::x86_avx2_psrl_w:
170 case Intrinsic::x86_avx512_psrl_d_512:
171 case Intrinsic::x86_avx512_psrl_q_512:
172 case Intrinsic::x86_avx512_psrl_w_512:
176 case Intrinsic::x86_sse2_pslli_d:
177 case Intrinsic::x86_sse2_pslli_q:
178 case Intrinsic::x86_sse2_pslli_w:
179 case Intrinsic::x86_avx2_pslli_d:
180 case Intrinsic::x86_avx2_pslli_q:
181 case Intrinsic::x86_avx2_pslli_w:
182 case Intrinsic::x86_avx512_pslli_d_512:
183 case Intrinsic::x86_avx512_pslli_q_512:
184 case Intrinsic::x86_avx512_pslli_w_512:
187 case Intrinsic::x86_sse2_psll_d:
188 case Intrinsic::x86_sse2_psll_q:
189 case Intrinsic::x86_sse2_psll_w:
190 case Intrinsic::x86_avx2_psll_d:
191 case Intrinsic::x86_avx2_psll_q:
192 case Intrinsic::x86_avx2_psll_w:
193 case Intrinsic::x86_avx512_psll_d_512:
194 case Intrinsic::x86_avx512_psll_q_512:
195 case Intrinsic::x86_avx512_psll_w_512:
200 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
204 auto *VT = cast<FixedVectorType>(Vec->
getType());
205 Type *SVT = VT->getElementType();
207 unsigned VWidth = VT->getNumElements();
220 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
227 Amt = ConstantInt::get(SVT,
BitWidth - 1);
234 cast<VectorType>(AmtVT)->getElementType() == SVT &&
235 "Unexpected shift-by-scalar type");
236 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
247 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
254 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
261 cast<VectorType>(AmtVT)->getElementType() == SVT &&
262 "Unexpected shift-by-scalar type");
266 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
267 unsigned SubEltIdx = (NumSubElts - 1) - i;
268 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
305 bool LogicalShift =
false;
306 bool ShiftLeft =
false;
311 case Intrinsic::x86_avx2_psrav_d:
312 case Intrinsic::x86_avx2_psrav_d_256:
313 case Intrinsic::x86_avx512_psrav_q_128:
314 case Intrinsic::x86_avx512_psrav_q_256:
315 case Intrinsic::x86_avx512_psrav_d_512:
316 case Intrinsic::x86_avx512_psrav_q_512:
317 case Intrinsic::x86_avx512_psrav_w_128:
318 case Intrinsic::x86_avx512_psrav_w_256:
319 case Intrinsic::x86_avx512_psrav_w_512:
320 LogicalShift =
false;
323 case Intrinsic::x86_avx2_psrlv_d:
324 case Intrinsic::x86_avx2_psrlv_d_256:
325 case Intrinsic::x86_avx2_psrlv_q:
326 case Intrinsic::x86_avx2_psrlv_q_256:
327 case Intrinsic::x86_avx512_psrlv_d_512:
328 case Intrinsic::x86_avx512_psrlv_q_512:
329 case Intrinsic::x86_avx512_psrlv_w_128:
330 case Intrinsic::x86_avx512_psrlv_w_256:
331 case Intrinsic::x86_avx512_psrlv_w_512:
335 case Intrinsic::x86_avx2_psllv_d:
336 case Intrinsic::x86_avx2_psllv_d_256:
337 case Intrinsic::x86_avx2_psllv_q:
338 case Intrinsic::x86_avx2_psllv_q_256:
339 case Intrinsic::x86_avx512_psllv_d_512:
340 case Intrinsic::x86_avx512_psllv_q_512:
341 case Intrinsic::x86_avx512_psllv_w_128:
342 case Intrinsic::x86_avx512_psllv_w_256:
343 case Intrinsic::x86_avx512_psllv_w_512:
348 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
352 auto *VT = cast<FixedVectorType>(II.
getType());
353 Type *SVT = VT->getElementType();
354 int NumElts = VT->getNumElements();
362 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
368 auto *CShift = dyn_cast<Constant>(Amt);
374 bool AnyOutOfRange =
false;
376 for (
int I = 0;
I < NumElts; ++
I) {
377 auto *CElt = CShift->getAggregateElement(
I);
378 if (isa_and_nonnull<UndefValue>(CElt)) {
383 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
390 APInt ShiftVal = COp->getValue();
392 AnyOutOfRange = LogicalShift;
405 for (
int Idx : ShiftAmts) {
409 assert(LogicalShift &&
"Logical shift expected");
410 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
422 for (
int Idx : ShiftAmts) {
446 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
449 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
451 unsigned NumSrcElts = ArgTy->getNumElements();
452 assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
453 "Unexpected packing types");
455 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
457 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
458 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
459 "Unexpected packing types");
462 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
467 APInt MinValue, MaxValue;
493 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
494 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
496 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
497 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
511 if (isa<UndefValue>(Arg))
514 auto *ArgTy = dyn_cast<FixedVectorType>(Arg->
getType());
524 unsigned NumElts = ArgTy->getNumElements();
541 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
542 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
543 "Unexpected types for x86 addcarry");
565 if (!ArgImm || ArgImm->getValue().uge(256))
574 auto Or = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
575 return {Builder.
CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
577 auto Xor = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
578 return {Builder.
CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
580 auto And = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
581 return {Builder.
CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
583 auto Not = [&](
auto V) -> std::pair<Value *, uint8_t> {
584 return {Builder.
CreateNot(V.first), ~V.second};
586 auto Nor = [&](
auto Lhs,
auto Rhs) {
return Not(
Or(Lhs, Rhs)); };
587 auto Xnor = [&](
auto Lhs,
auto Rhs) {
return Not(
Xor(Lhs, Rhs)); };
588 auto Nand = [&](
auto Lhs,
auto Rhs) {
return Not(
And(Lhs, Rhs)); };
594 bool ABIsConst = AIsConst && BIsConst;
595 bool ACIsConst = AIsConst && CIsConst;
596 bool BCIsConst = BIsConst && CIsConst;
597 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
603 std::pair<Value *, uint8_t>
A = {ArgA, 0xf0};
604 std::pair<Value *, uint8_t>
B = {ArgB, 0xcc};
605 std::pair<Value *, uint8_t>
C = {ArgC, 0xaa};
606 std::pair<Value *, uint8_t> Res = {
nullptr, 0};
613 uint8_t Imm = ArgImm->getValue().getZExtValue();
620 Res = Nor(
Or(
A,
B),
C);
640 Res = Nor(
A, Xnor(
B,
C));
648 Res = Nor(
A, Nand(
B,
C));
656 Res = Nor(
A, Not(
C));
660 Res = Nor(
A, Nor(
C, Not(
B)));
664 Res = Nor(
A, Not(
B));
668 Res = Nor(
A, Nor(
B, Not(
C)));
672 Res = Nor(
A, Nor(
B,
C));
687 Res = Nor(Xnor(
A,
C),
B);
695 Res = Nor(Xnor(
A,
B),
C);
711 Res = Nor(Xnor(
A,
B), Xnor(
A,
C));
715 Res =
And(Nand(
A,
B), Xnor(
B,
C));
739 Res = Nand(
A,
Or(
B,
C));
743 Res = Nor(Nand(
A,
C),
B);
751 Res = Nor(
B, Not(
C));
755 Res = Nor(
B, Nor(
C, Not(
A)));
759 Res = Nor(Xnor(
A,
B),
Xor(
A,
C));
763 Res =
Xor(
A, Nand(Nand(
A,
B),
C));
791 Res = Nor(Xnor(
A,
B), Nor(
B,
C));
803 Res = Nand(
A,
Or(
B, Not(
C)));
807 Res = Nor(
B, Not(
A));
811 Res = Nor(Nor(
A, Not(
C)),
B);
815 Res = Nor(Nor(
A,
C),
B);
834 Res = Nand(
Or(
A,
C),
B);
838 Res = Nor(Xnor(
A,
B), Nor(
A,
C));
850 Res = Nand(
Or(
A, Not(
C)),
B);
869 Res = Nor(Nand(
A,
B),
C);
877 Res = Nor(
Xor(
A,
B), Xnor(
A,
C));
881 Res =
Xor(
A, Nand(Nand(
A,
C),
B));
885 Res = Nor(
C, Not(
B));
889 Res = Nor(Nor(
B, Not(
A)),
C);
909 Res = Nor(Xnor(
A,
C), Nor(
B,
C));
929 Res = Nand(
A, Nand(
B, Not(
C)));
933 Res = Nor(
C, Not(
A));
937 Res = Nor(Nor(
A, Not(
B)),
C);
949 Res = Nor(Nor(
A,
B),
C);
960 Res = Nand(
Or(
A,
B),
C);
964 Res = Nor(Nor(
A,
B), Xnor(
A,
C));
983 Res = Nand(
Or(
A, Not(
B)),
C);
1003 Res = Nor(Nor(
A,
C), Xnor(
B,
C));
1011 Res = Nor(Nor(
A,
B), Xnor(
B,
C));
1030 Res =
Xor(Xnor(
A,
B),
C);
1054 Res = Nand(
A, Xnor(
B,
C));
1058 Res =
And(
A, Nand(
B,
C));
1070 Res = Nand(Nand(
A, Not(
C)),
B);
1078 Res = Nand(Nand(
A, Not(
B)),
C);
1102 Res = Nand(Xnor(
A,
C),
B);
1110 Res = Nand(Xnor(
A,
B),
C);
1118 Res = Nand(
And(
A,
B),
C);
1130 Res =
And(Xnor(
A,
B),
C);
1134 Res = Nor(
Xor(
A,
B), Nor(
C, Not(
A)));
1138 Res =
And(Xnor(
A,
C),
B);
1142 Res = Nor(
Xor(
A,
C), Nor(
B, Not(
A)));
1146 Res =
Xor(Nor(Xnor(
A,
B), Nor(
B,
C)),
C);
1150 Res =
Xor(
A, Nand(
B,
C));
1157 Res =
Xor(
B, Nor(Nor(
B, Not(
A)),
C));
1161 Res =
And(Nand(
A, Not(
B)),
C);
1169 Res =
And(Nand(
A, Not(
C)),
B);
1181 Res = Nand(
A, Nand(
B,
C));
1185 Res =
And(
A, Xnor(
B,
C));
1189 Res = Nor(Nor(
A, Not(
B)),
Xor(
B,
C));
1193 Res =
Xor(Nor(Xnor(
A,
B), Nor(
A,
C)),
C);
1197 Res =
Xor(Nand(
A,
C),
B);
1201 Res = Nor(Nor(
A,
B),
Xor(Xnor(
A,
B),
C));
1205 Res =
Xor(Nand(
A,
B),
C);
1217 Res = Nor(Nor(
A,
B),
Xor(
B,
C));
1225 Res =
Xor(Nor(
B, Not(
A)),
C);
1229 Res =
Or(Nor(
A,
B), Xnor(
B,
C));
1233 Res =
Xor(
B, Nor(
C, Not(
A)));
1237 Res =
Or(Nor(
A,
C), Xnor(
B,
C));
1245 Res = Nand(
A,
Xor(
B,
C));
1252 Res =
Xor(
A, Nor(Nor(
A, Not(
B)),
C));
1264 Res =
Xor(
A, Nor(Nor(
A,
B),
C));
1272 Res =
Xor(Nor(
A, Not(
B)),
C);
1276 Res =
Or(Nor(
A,
B), Xnor(
A,
C));
1291 Res =
Or(Nor(
A,
B),
C);
1295 Res =
Xor(Nor(Xnor(
B,
C),
A),
C);
1303 Res =
Or(Nor(
A, Not(
B)),
C);
1307 Res =
Or(
C, Not(
A));
1311 Res =
And(
A, Nand(
B, Not(
C)));
1323 Res = Nand(Nand(
A,
C),
B);
1327 Res =
Xor(
A, Nor(
C, Not(
B)));
1331 Res =
Or(Xnor(
A,
C), Nor(
B,
C));
1339 Res = Nand(
Xor(
A,
C),
B);
1343 Res =
Xor(Nor(Xnor(
A,
C),
B),
C);
1351 Res =
Or(Nor(
B, Not(
A)),
C);
1355 Res =
Or(
C, Not(
B));
1371 Res =
Or(Nand(
A,
B),
C);
1378 Res =
Xor(
A, Nor(Nor(
A, Not(
C)),
B));
1382 Res =
Xor(
A, Nor(Nor(
A,
C),
B));
1398 Res =
Xor(Nor(
A, Not(
C)),
B);
1402 Res =
Or(Xnor(
A,
B), Nor(
A,
C));
1414 Res =
Xor(
B, Nor(
A, Xnor(
B,
C)));
1425 Res =
Or(Nor(
A,
C),
B);
1429 Res =
Or(Nor(
A, Not(
C)),
B);
1433 Res =
Or(
B, Not(
A));
1445 Res =
Xor(
A, Nor(
B, Not(
C)));
1449 Res =
Or(Xnor(
A,
B), Nor(
B,
C));
1457 Res = Nand(Nand(
A,
B),
C);
1465 Res = Nand(
Xor(
A,
B),
C);
1469 Res =
Xor(Nor(Xnor(
A,
B),
C),
B);
1485 Res =
Or(
B, Nor(
C, Not(
A)));
1489 Res =
Or(
B, Not(
C));
1497 Res =
Or(Nand(
A,
C),
B);
1509 Res =
Xor(
A, Nor(Xnor(
A,
C),
B));
1517 Res =
Xor(
A, Nor(Xnor(
A,
B),
C));
1529 Res =
Or(Xnor(
A,
B), Xnor(
A,
C));
1545 Res =
Or(Xnor(
A,
B),
C);
1553 Res =
Or(Xnor(
A,
C),
B);
1560 Res = Nand(
A, Nor(
B,
C));
1567 Res =
Or(
A, Nor(
B,
C));
1571 Res =
Or(
A, Nor(
B, Not(
C)));
1575 Res =
Or(
A, Not(
B));
1579 Res =
Or(
A, Nor(
C, Not(
B)));
1583 Res =
Or(
A, Not(
C));
1591 Res =
Or(
A, Nand(
B,
C));
1599 Res =
Or(
A, Xnor(
B,
C));
1606 Res = Nand(Nor(
A,
C),
B);
1613 Res = Nand(Nor(
A,
B),
C);
1624 assert((Res.first ==
nullptr || Res.second == Imm) &&
1625 "Simplification of ternary logic does not verify!");
1635 auto *VecTy = cast<FixedVectorType>(II.
getType());
1636 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
1643 uint8_t Imm = CInt->getZExtValue();
1644 uint8_t ZMask = Imm & 0xf;
1645 uint8_t DestLane = (Imm >> 4) & 0x3;
1646 uint8_t SourceLane = (Imm >> 6) & 0x3;
1656 int ShuffleMask[4] = {0, 1, 2, 3};
1665 (ZMask & (1 << DestLane))) {
1669 ShuffleMask[DestLane] = SourceLane;
1671 for (
unsigned i = 0; i < 4; ++i)
1672 if ((ZMask >> i) & 0x1)
1673 ShuffleMask[i] = i + 4;
1680 ShuffleMask[DestLane] = SourceLane + 4;
1691 auto LowConstantHighUndef = [&](
uint64_t Val) {
1693 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1699 auto *C0 = dyn_cast<Constant>(Op0);
1701 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1705 if (CILength && CIIndex) {
1739 for (
int i = 0; i != (int)
Length; ++i)
1741 for (
int i =
Length; i != 8; ++i)
1743 for (
int i = 8; i != 16; ++i)
1755 APInt Elt = CI0->getValue();
1763 Value *Args[] = {Op0, CILength, CIIndex};
1771 if (CI0 && CI0->isZero())
1772 return LowConstantHighUndef(0);
1816 for (
int i = 0; i != (int)
Index; ++i)
1818 for (
int i = 0; i != (int)
Length; ++i)
1822 for (
int i = 8; i != 16; ++i)
1832 auto *C0 = dyn_cast<Constant>(Op0);
1833 auto *C1 = dyn_cast<Constant>(Op1);
1835 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1838 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1843 APInt V00 = CI00->getValue();
1844 APInt V10 = CI10->getValue();
1848 APInt Val = V00 | V10;
1862 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1878 auto *VecTy = cast<FixedVectorType>(II.
getType());
1879 unsigned NumElts = VecTy->getNumElements();
1880 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1881 "Unexpected number of elements in shuffle mask!");
1888 for (
unsigned I = 0;
I < NumElts; ++
I) {
1889 Constant *COp = V->getAggregateElement(
I);
1890 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1893 if (isa<UndefValue>(COp)) {
1898 int8_t
Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1923 auto *VecTy = cast<FixedVectorType>(II.
getType());
1924 unsigned NumElts = VecTy->getNumElements();
1925 bool IsPD = VecTy->getScalarType()->isDoubleTy();
1926 unsigned NumLaneElts = IsPD ? 2 : 4;
1927 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1933 for (
unsigned I = 0;
I < NumElts; ++
I) {
1934 Constant *COp = V->getAggregateElement(
I);
1935 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1938 if (isa<UndefValue>(COp)) {
1943 APInt Index = cast<ConstantInt>(COp)->getValue();
1954 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
1956 Indexes[
I] =
Index.getZExtValue();
1970 auto *VecTy = cast<FixedVectorType>(II.
getType());
1971 unsigned Size = VecTy->getNumElements();
1973 "Unexpected shuffle mask size");
1978 for (
unsigned I = 0;
I <
Size; ++
I) {
1979 Constant *COp = V->getAggregateElement(
I);
1980 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1983 if (isa<UndefValue>(COp)) {
1997std::optional<Instruction *>
1999 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2000 unsigned DemandedWidth) {
2001 APInt UndefElts(Width, 0);
2008 case Intrinsic::x86_bmi_bextr_32:
2009 case Intrinsic::x86_bmi_bextr_64:
2010 case Intrinsic::x86_tbm_bextri_u32:
2011 case Intrinsic::x86_tbm_bextri_u64:
2023 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2024 uint64_t Result = InC->getZExtValue() >> Shift;
2027 Result &= maskTrailingOnes<uint64_t>(
Length);
2029 ConstantInt::get(II.
getType(), Result));
2036 case Intrinsic::x86_bmi_bzhi_32:
2037 case Intrinsic::x86_bmi_bzhi_64:
2049 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2050 uint64_t Result = InC->getZExtValue();
2051 Result &= maskTrailingOnes<uint64_t>(
Index);
2053 ConstantInt::get(II.
getType(), Result));
2058 case Intrinsic::x86_bmi_pext_32:
2059 case Intrinsic::x86_bmi_pext_64:
2060 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
2061 if (MaskC->isNullValue()) {
2064 if (MaskC->isAllOnesValue()) {
2068 unsigned MaskIdx, MaskLen;
2069 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2075 Value *ShiftAmt = ConstantInt::get(II.
getType(), MaskIdx);
2080 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2081 uint64_t Src = SrcC->getZExtValue();
2082 uint64_t Mask = MaskC->getZExtValue();
2089 if (BitToTest & Src)
2098 ConstantInt::get(II.
getType(), Result));
2102 case Intrinsic::x86_bmi_pdep_32:
2103 case Intrinsic::x86_bmi_pdep_64:
2104 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
2105 if (MaskC->isNullValue()) {
2108 if (MaskC->isAllOnesValue()) {
2112 unsigned MaskIdx, MaskLen;
2113 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2118 Value *ShiftAmt = ConstantInt::get(II.
getType(), MaskIdx);
2124 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2125 uint64_t Src = SrcC->getZExtValue();
2126 uint64_t Mask = MaskC->getZExtValue();
2133 if (BitToTest & Src)
2142 ConstantInt::get(II.
getType(), Result));
2147 case Intrinsic::x86_sse_cvtss2si:
2148 case Intrinsic::x86_sse_cvtss2si64:
2149 case Intrinsic::x86_sse_cvttss2si:
2150 case Intrinsic::x86_sse_cvttss2si64:
2151 case Intrinsic::x86_sse2_cvtsd2si:
2152 case Intrinsic::x86_sse2_cvtsd2si64:
2153 case Intrinsic::x86_sse2_cvttsd2si:
2154 case Intrinsic::x86_sse2_cvttsd2si64:
2155 case Intrinsic::x86_avx512_vcvtss2si32:
2156 case Intrinsic::x86_avx512_vcvtss2si64:
2157 case Intrinsic::x86_avx512_vcvtss2usi32:
2158 case Intrinsic::x86_avx512_vcvtss2usi64:
2159 case Intrinsic::x86_avx512_vcvtsd2si32:
2160 case Intrinsic::x86_avx512_vcvtsd2si64:
2161 case Intrinsic::x86_avx512_vcvtsd2usi32:
2162 case Intrinsic::x86_avx512_vcvtsd2usi64:
2163 case Intrinsic::x86_avx512_cvttss2si:
2164 case Intrinsic::x86_avx512_cvttss2si64:
2165 case Intrinsic::x86_avx512_cvttss2usi:
2166 case Intrinsic::x86_avx512_cvttss2usi64:
2167 case Intrinsic::x86_avx512_cvttsd2si:
2168 case Intrinsic::x86_avx512_cvttsd2si64:
2169 case Intrinsic::x86_avx512_cvttsd2usi:
2170 case Intrinsic::x86_avx512_cvttsd2usi64: {
2174 unsigned VWidth = cast<FixedVectorType>(Arg->
getType())->getNumElements();
2175 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2181 case Intrinsic::x86_mmx_pmovmskb:
2182 case Intrinsic::x86_sse_movmsk_ps:
2183 case Intrinsic::x86_sse2_movmsk_pd:
2184 case Intrinsic::x86_sse2_pmovmskb_128:
2185 case Intrinsic::x86_avx_movmsk_pd_256:
2186 case Intrinsic::x86_avx_movmsk_ps_256:
2187 case Intrinsic::x86_avx2_pmovmskb:
2193 case Intrinsic::x86_sse_comieq_ss:
2194 case Intrinsic::x86_sse_comige_ss:
2195 case Intrinsic::x86_sse_comigt_ss:
2196 case Intrinsic::x86_sse_comile_ss:
2197 case Intrinsic::x86_sse_comilt_ss:
2198 case Intrinsic::x86_sse_comineq_ss:
2199 case Intrinsic::x86_sse_ucomieq_ss:
2200 case Intrinsic::x86_sse_ucomige_ss:
2201 case Intrinsic::x86_sse_ucomigt_ss:
2202 case Intrinsic::x86_sse_ucomile_ss:
2203 case Intrinsic::x86_sse_ucomilt_ss:
2204 case Intrinsic::x86_sse_ucomineq_ss:
2205 case Intrinsic::x86_sse2_comieq_sd:
2206 case Intrinsic::x86_sse2_comige_sd:
2207 case Intrinsic::x86_sse2_comigt_sd:
2208 case Intrinsic::x86_sse2_comile_sd:
2209 case Intrinsic::x86_sse2_comilt_sd:
2210 case Intrinsic::x86_sse2_comineq_sd:
2211 case Intrinsic::x86_sse2_ucomieq_sd:
2212 case Intrinsic::x86_sse2_ucomige_sd:
2213 case Intrinsic::x86_sse2_ucomigt_sd:
2214 case Intrinsic::x86_sse2_ucomile_sd:
2215 case Intrinsic::x86_sse2_ucomilt_sd:
2216 case Intrinsic::x86_sse2_ucomineq_sd:
2217 case Intrinsic::x86_avx512_vcomi_ss:
2218 case Intrinsic::x86_avx512_vcomi_sd:
2219 case Intrinsic::x86_avx512_mask_cmp_ss:
2220 case Intrinsic::x86_avx512_mask_cmp_sd: {
2223 bool MadeChange =
false;
2226 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2227 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2231 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2241 case Intrinsic::x86_avx512_add_ps_512:
2242 case Intrinsic::x86_avx512_div_ps_512:
2243 case Intrinsic::x86_avx512_mul_ps_512:
2244 case Intrinsic::x86_avx512_sub_ps_512:
2245 case Intrinsic::x86_avx512_add_pd_512:
2246 case Intrinsic::x86_avx512_div_pd_512:
2247 case Intrinsic::x86_avx512_mul_pd_512:
2248 case Intrinsic::x86_avx512_sub_pd_512:
2252 if (R->getValue() == 4) {
2260 case Intrinsic::x86_avx512_add_ps_512:
2261 case Intrinsic::x86_avx512_add_pd_512:
2264 case Intrinsic::x86_avx512_sub_ps_512:
2265 case Intrinsic::x86_avx512_sub_pd_512:
2268 case Intrinsic::x86_avx512_mul_ps_512:
2269 case Intrinsic::x86_avx512_mul_pd_512:
2272 case Intrinsic::x86_avx512_div_ps_512:
2273 case Intrinsic::x86_avx512_div_pd_512:
2283 case Intrinsic::x86_avx512_mask_add_ss_round:
2284 case Intrinsic::x86_avx512_mask_div_ss_round:
2285 case Intrinsic::x86_avx512_mask_mul_ss_round:
2286 case Intrinsic::x86_avx512_mask_sub_ss_round:
2287 case Intrinsic::x86_avx512_mask_add_sd_round:
2288 case Intrinsic::x86_avx512_mask_div_sd_round:
2289 case Intrinsic::x86_avx512_mask_mul_sd_round:
2290 case Intrinsic::x86_avx512_mask_sub_sd_round:
2294 if (R->getValue() == 4) {
2305 case Intrinsic::x86_avx512_mask_add_ss_round:
2306 case Intrinsic::x86_avx512_mask_add_sd_round:
2309 case Intrinsic::x86_avx512_mask_sub_ss_round:
2310 case Intrinsic::x86_avx512_mask_sub_sd_round:
2313 case Intrinsic::x86_avx512_mask_mul_ss_round:
2314 case Intrinsic::x86_avx512_mask_mul_sd_round:
2317 case Intrinsic::x86_avx512_mask_div_ss_round:
2318 case Intrinsic::x86_avx512_mask_div_sd_round:
2325 auto *
C = dyn_cast<ConstantInt>(Mask);
2327 if (!
C || !
C->getValue()[0]) {
2331 cast<IntegerType>(Mask->getType())->
getBitWidth());
2351 case Intrinsic::x86_sse2_psrai_d:
2352 case Intrinsic::x86_sse2_psrai_w:
2353 case Intrinsic::x86_avx2_psrai_d:
2354 case Intrinsic::x86_avx2_psrai_w:
2355 case Intrinsic::x86_avx512_psrai_q_128:
2356 case Intrinsic::x86_avx512_psrai_q_256:
2357 case Intrinsic::x86_avx512_psrai_d_512:
2358 case Intrinsic::x86_avx512_psrai_q_512:
2359 case Intrinsic::x86_avx512_psrai_w_512:
2360 case Intrinsic::x86_sse2_psrli_d:
2361 case Intrinsic::x86_sse2_psrli_q:
2362 case Intrinsic::x86_sse2_psrli_w:
2363 case Intrinsic::x86_avx2_psrli_d:
2364 case Intrinsic::x86_avx2_psrli_q:
2365 case Intrinsic::x86_avx2_psrli_w:
2366 case Intrinsic::x86_avx512_psrli_d_512:
2367 case Intrinsic::x86_avx512_psrli_q_512:
2368 case Intrinsic::x86_avx512_psrli_w_512:
2369 case Intrinsic::x86_sse2_pslli_d:
2370 case Intrinsic::x86_sse2_pslli_q:
2371 case Intrinsic::x86_sse2_pslli_w:
2372 case Intrinsic::x86_avx2_pslli_d:
2373 case Intrinsic::x86_avx2_pslli_q:
2374 case Intrinsic::x86_avx2_pslli_w:
2375 case Intrinsic::x86_avx512_pslli_d_512:
2376 case Intrinsic::x86_avx512_pslli_q_512:
2377 case Intrinsic::x86_avx512_pslli_w_512:
2383 case Intrinsic::x86_sse2_psra_d:
2384 case Intrinsic::x86_sse2_psra_w:
2385 case Intrinsic::x86_avx2_psra_d:
2386 case Intrinsic::x86_avx2_psra_w:
2387 case Intrinsic::x86_avx512_psra_q_128:
2388 case Intrinsic::x86_avx512_psra_q_256:
2389 case Intrinsic::x86_avx512_psra_d_512:
2390 case Intrinsic::x86_avx512_psra_q_512:
2391 case Intrinsic::x86_avx512_psra_w_512:
2392 case Intrinsic::x86_sse2_psrl_d:
2393 case Intrinsic::x86_sse2_psrl_q:
2394 case Intrinsic::x86_sse2_psrl_w:
2395 case Intrinsic::x86_avx2_psrl_d:
2396 case Intrinsic::x86_avx2_psrl_q:
2397 case Intrinsic::x86_avx2_psrl_w:
2398 case Intrinsic::x86_avx512_psrl_d_512:
2399 case Intrinsic::x86_avx512_psrl_q_512:
2400 case Intrinsic::x86_avx512_psrl_w_512:
2401 case Intrinsic::x86_sse2_psll_d:
2402 case Intrinsic::x86_sse2_psll_q:
2403 case Intrinsic::x86_sse2_psll_w:
2404 case Intrinsic::x86_avx2_psll_d:
2405 case Intrinsic::x86_avx2_psll_q:
2406 case Intrinsic::x86_avx2_psll_w:
2407 case Intrinsic::x86_avx512_psll_d_512:
2408 case Intrinsic::x86_avx512_psll_q_512:
2409 case Intrinsic::x86_avx512_psll_w_512: {
2418 "Unexpected packed shift size");
2419 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
2421 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2427 case Intrinsic::x86_avx2_psllv_d:
2428 case Intrinsic::x86_avx2_psllv_d_256:
2429 case Intrinsic::x86_avx2_psllv_q:
2430 case Intrinsic::x86_avx2_psllv_q_256:
2431 case Intrinsic::x86_avx512_psllv_d_512:
2432 case Intrinsic::x86_avx512_psllv_q_512:
2433 case Intrinsic::x86_avx512_psllv_w_128:
2434 case Intrinsic::x86_avx512_psllv_w_256:
2435 case Intrinsic::x86_avx512_psllv_w_512:
2436 case Intrinsic::x86_avx2_psrav_d:
2437 case Intrinsic::x86_avx2_psrav_d_256:
2438 case Intrinsic::x86_avx512_psrav_q_128:
2439 case Intrinsic::x86_avx512_psrav_q_256:
2440 case Intrinsic::x86_avx512_psrav_d_512:
2441 case Intrinsic::x86_avx512_psrav_q_512:
2442 case Intrinsic::x86_avx512_psrav_w_128:
2443 case Intrinsic::x86_avx512_psrav_w_256:
2444 case Intrinsic::x86_avx512_psrav_w_512:
2445 case Intrinsic::x86_avx2_psrlv_d:
2446 case Intrinsic::x86_avx2_psrlv_d_256:
2447 case Intrinsic::x86_avx2_psrlv_q:
2448 case Intrinsic::x86_avx2_psrlv_q_256:
2449 case Intrinsic::x86_avx512_psrlv_d_512:
2450 case Intrinsic::x86_avx512_psrlv_q_512:
2451 case Intrinsic::x86_avx512_psrlv_w_128:
2452 case Intrinsic::x86_avx512_psrlv_w_256:
2453 case Intrinsic::x86_avx512_psrlv_w_512:
2459 case Intrinsic::x86_sse2_packssdw_128:
2460 case Intrinsic::x86_sse2_packsswb_128:
2461 case Intrinsic::x86_avx2_packssdw:
2462 case Intrinsic::x86_avx2_packsswb:
2463 case Intrinsic::x86_avx512_packssdw_512:
2464 case Intrinsic::x86_avx512_packsswb_512:
2470 case Intrinsic::x86_sse2_packuswb_128:
2471 case Intrinsic::x86_sse41_packusdw:
2472 case Intrinsic::x86_avx2_packusdw:
2473 case Intrinsic::x86_avx2_packuswb:
2474 case Intrinsic::x86_avx512_packusdw_512:
2475 case Intrinsic::x86_avx512_packuswb_512:
2481 case Intrinsic::x86_pclmulqdq:
2482 case Intrinsic::x86_pclmulqdq_256:
2483 case Intrinsic::x86_pclmulqdq_512: {
2485 unsigned Imm =
C->getZExtValue();
2487 bool MadeChange =
false;
2491 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2493 APInt UndefElts1(VWidth, 0);
2494 APInt DemandedElts1 =
2502 APInt UndefElts2(VWidth, 0);
2503 APInt DemandedElts2 =
2525 case Intrinsic::x86_sse41_insertps:
2531 case Intrinsic::x86_sse4a_extrq: {
2534 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2535 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2538 VWidth1 == 16 &&
"Unexpected operand sizes");
2541 auto *C1 = dyn_cast<Constant>(Op1);
2543 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2546 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2556 bool MadeChange =
false;
2557 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2561 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2571 case Intrinsic::x86_sse4a_extrqi: {
2575 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2577 "Unexpected operand size");
2580 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(1));
2590 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2596 case Intrinsic::x86_sse4a_insertq: {
2599 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2602 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
2603 "Unexpected operand size");
2606 auto *C1 = dyn_cast<Constant>(Op1);
2608 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2613 const APInt &V11 = CI11->getValue();
2623 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2629 case Intrinsic::x86_sse4a_insertqi: {
2635 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2636 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2639 VWidth1 == 2 &&
"Unexpected operand sizes");
2642 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(2));
2646 if (CILength && CIIndex) {
2647 APInt Len = CILength->getValue().zextOrTrunc(6);
2648 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2656 bool MadeChange =
false;
2657 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2661 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2671 case Intrinsic::x86_sse41_pblendvb:
2672 case Intrinsic::x86_sse41_blendvps:
2673 case Intrinsic::x86_sse41_blendvpd:
2674 case Intrinsic::x86_avx_blendv_ps_256:
2675 case Intrinsic::x86_avx_blendv_pd_256:
2676 case Intrinsic::x86_avx2_pblendvb: {
2686 if (isa<ConstantAggregateZero>(Mask)) {
2691 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2704 assert(Mask->getType()->getPrimitiveSizeInBits() ==
2706 "Not expecting mask and operands with different sizes");
2708 unsigned NumMaskElts =
2709 cast<FixedVectorType>(Mask->getType())->getNumElements();
2710 unsigned NumOperandElts =
2711 cast<FixedVectorType>(II.
getType())->getNumElements();
2712 if (NumMaskElts == NumOperandElts) {
2718 if (NumMaskElts < NumOperandElts) {
2729 case Intrinsic::x86_ssse3_pshuf_b_128:
2730 case Intrinsic::x86_avx2_pshuf_b:
2731 case Intrinsic::x86_avx512_pshuf_b_512:
2737 case Intrinsic::x86_avx_vpermilvar_ps:
2738 case Intrinsic::x86_avx_vpermilvar_ps_256:
2739 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2740 case Intrinsic::x86_avx_vpermilvar_pd:
2741 case Intrinsic::x86_avx_vpermilvar_pd_256:
2742 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2748 case Intrinsic::x86_avx2_permd:
2749 case Intrinsic::x86_avx2_permps:
2750 case Intrinsic::x86_avx512_permvar_df_256:
2751 case Intrinsic::x86_avx512_permvar_df_512:
2752 case Intrinsic::x86_avx512_permvar_di_256:
2753 case Intrinsic::x86_avx512_permvar_di_512:
2754 case Intrinsic::x86_avx512_permvar_hi_128:
2755 case Intrinsic::x86_avx512_permvar_hi_256:
2756 case Intrinsic::x86_avx512_permvar_hi_512:
2757 case Intrinsic::x86_avx512_permvar_qi_128:
2758 case Intrinsic::x86_avx512_permvar_qi_256:
2759 case Intrinsic::x86_avx512_permvar_qi_512:
2760 case Intrinsic::x86_avx512_permvar_sf_512:
2761 case Intrinsic::x86_avx512_permvar_si_512:
2767 case Intrinsic::x86_avx_maskload_ps:
2768 case Intrinsic::x86_avx_maskload_pd:
2769 case Intrinsic::x86_avx_maskload_ps_256:
2770 case Intrinsic::x86_avx_maskload_pd_256:
2771 case Intrinsic::x86_avx2_maskload_d:
2772 case Intrinsic::x86_avx2_maskload_q:
2773 case Intrinsic::x86_avx2_maskload_d_256:
2774 case Intrinsic::x86_avx2_maskload_q_256:
2780 case Intrinsic::x86_sse2_maskmov_dqu:
2781 case Intrinsic::x86_avx_maskstore_ps:
2782 case Intrinsic::x86_avx_maskstore_pd:
2783 case Intrinsic::x86_avx_maskstore_ps_256:
2784 case Intrinsic::x86_avx_maskstore_pd_256:
2785 case Intrinsic::x86_avx2_maskstore_d:
2786 case Intrinsic::x86_avx2_maskstore_q:
2787 case Intrinsic::x86_avx2_maskstore_d_256:
2788 case Intrinsic::x86_avx2_maskstore_q_256:
2794 case Intrinsic::x86_addcarry_32:
2795 case Intrinsic::x86_addcarry_64:
2801 case Intrinsic::x86_avx512_pternlog_d_128:
2802 case Intrinsic::x86_avx512_pternlog_d_256:
2803 case Intrinsic::x86_avx512_pternlog_d_512:
2804 case Intrinsic::x86_avx512_pternlog_q_128:
2805 case Intrinsic::x86_avx512_pternlog_q_256:
2806 case Intrinsic::x86_avx512_pternlog_q_512:
2814 return std::nullopt;
2819 bool &KnownBitsComputed)
const {
2823 case Intrinsic::x86_mmx_pmovmskb:
2824 case Intrinsic::x86_sse_movmsk_ps:
2825 case Intrinsic::x86_sse2_movmsk_pd:
2826 case Intrinsic::x86_sse2_pmovmskb_128:
2827 case Intrinsic::x86_avx_movmsk_ps_256:
2828 case Intrinsic::x86_avx_movmsk_pd_256:
2829 case Intrinsic::x86_avx2_pmovmskb: {
2837 ArgWidth = ArgType->getNumElements();
2844 if (DemandedElts.
isZero()) {
2850 KnownBitsComputed =
true;
2854 return std::nullopt;
2861 simplifyAndSetOp)
const {
2862 unsigned VWidth = cast<FixedVectorType>(II.
getType())->getNumElements();
2866 case Intrinsic::x86_xop_vfrcz_ss:
2867 case Intrinsic::x86_xop_vfrcz_sd:
2872 if (!DemandedElts[0]) {
2879 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2882 UndefElts = UndefElts[0];
2886 case Intrinsic::x86_sse_rcp_ss:
2887 case Intrinsic::x86_sse_rsqrt_ss:
2888 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2891 if (!DemandedElts[0]) {
2902 case Intrinsic::x86_sse_min_ss:
2903 case Intrinsic::x86_sse_max_ss:
2904 case Intrinsic::x86_sse_cmp_ss:
2905 case Intrinsic::x86_sse2_min_sd:
2906 case Intrinsic::x86_sse2_max_sd:
2907 case Intrinsic::x86_sse2_cmp_sd: {
2908 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2911 if (!DemandedElts[0]) {
2918 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2930 case Intrinsic::x86_sse41_round_ss:
2931 case Intrinsic::x86_sse41_round_sd: {
2933 APInt DemandedElts2 = DemandedElts;
2935 simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
2938 if (!DemandedElts[0]) {
2945 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2950 UndefElts |= UndefElts2[0];
2957 case Intrinsic::x86_avx512_mask_add_ss_round:
2958 case Intrinsic::x86_avx512_mask_div_ss_round:
2959 case Intrinsic::x86_avx512_mask_mul_ss_round:
2960 case Intrinsic::x86_avx512_mask_sub_ss_round:
2961 case Intrinsic::x86_avx512_mask_max_ss_round:
2962 case Intrinsic::x86_avx512_mask_min_ss_round:
2963 case Intrinsic::x86_avx512_mask_add_sd_round:
2964 case Intrinsic::x86_avx512_mask_div_sd_round:
2965 case Intrinsic::x86_avx512_mask_mul_sd_round:
2966 case Intrinsic::x86_avx512_mask_sub_sd_round:
2967 case Intrinsic::x86_avx512_mask_max_sd_round:
2968 case Intrinsic::x86_avx512_mask_min_sd_round:
2969 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2972 if (!DemandedElts[0]) {
2979 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2980 simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
2984 if (!UndefElts2[0] || !UndefElts3[0])
2989 case Intrinsic::x86_sse3_addsub_pd:
2990 case Intrinsic::x86_sse3_addsub_ps:
2991 case Intrinsic::x86_avx_addsub_pd_256:
2992 case Intrinsic::x86_avx_addsub_ps_256: {
2997 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
2998 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
2999 if (IsSubOnly || IsAddOnly) {
3000 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3005 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3008 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3009 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3010 UndefElts &= UndefElts2;
3015 case Intrinsic::x86_avx2_psllv_d:
3016 case Intrinsic::x86_avx2_psllv_d_256:
3017 case Intrinsic::x86_avx2_psllv_q:
3018 case Intrinsic::x86_avx2_psllv_q_256:
3019 case Intrinsic::x86_avx2_psrlv_d:
3020 case Intrinsic::x86_avx2_psrlv_d_256:
3021 case Intrinsic::x86_avx2_psrlv_q:
3022 case Intrinsic::x86_avx2_psrlv_q_256:
3023 case Intrinsic::x86_avx2_psrav_d:
3024 case Intrinsic::x86_avx2_psrav_d_256: {
3025 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3026 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3027 UndefElts &= UndefElts2;
3031 case Intrinsic::x86_sse2_packssdw_128:
3032 case Intrinsic::x86_sse2_packsswb_128:
3033 case Intrinsic::x86_sse2_packuswb_128:
3034 case Intrinsic::x86_sse41_packusdw:
3035 case Intrinsic::x86_avx2_packssdw:
3036 case Intrinsic::x86_avx2_packsswb:
3037 case Intrinsic::x86_avx2_packusdw:
3038 case Intrinsic::x86_avx2_packuswb:
3039 case Intrinsic::x86_avx512_packssdw_512:
3040 case Intrinsic::x86_avx512_packsswb_512:
3041 case Intrinsic::x86_avx512_packusdw_512:
3042 case Intrinsic::x86_avx512_packuswb_512: {
3044 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3045 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3047 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3048 unsigned VWidthPerLane = VWidth / NumLanes;
3049 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3055 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3056 APInt OpDemandedElts(InnerVWidth, 0);
3057 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3058 unsigned LaneIdx = Lane * VWidthPerLane;
3059 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3060 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3061 if (DemandedElts[
Idx])
3062 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3067 APInt OpUndefElts(InnerVWidth, 0);
3068 simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
3071 OpUndefElts = OpUndefElts.
zext(VWidth);
3072 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3073 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3074 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3075 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3076 UndefElts |= LaneElts;
3083 case Intrinsic::x86_ssse3_pshuf_b_128:
3084 case Intrinsic::x86_avx2_pshuf_b:
3085 case Intrinsic::x86_avx512_pshuf_b_512:
3087 case Intrinsic::x86_avx_vpermilvar_ps:
3088 case Intrinsic::x86_avx_vpermilvar_ps_256:
3089 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3090 case Intrinsic::x86_avx_vpermilvar_pd:
3091 case Intrinsic::x86_avx_vpermilvar_pd_256:
3092 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3094 case Intrinsic::x86_avx2_permd:
3095 case Intrinsic::x86_avx2_permps: {
3096 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
3102 case Intrinsic::x86_sse4a_extrq:
3103 case Intrinsic::x86_sse4a_extrqi:
3104 case Intrinsic::x86_sse4a_insertq:
3105 case Intrinsic::x86_sse4a_insertqi:
3109 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents a no-op cast from one type to another.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.