18#include "llvm/IR/IntrinsicsX86.h"
25#define DEBUG_TYPE "x86tti"
30 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
41 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
63 if (isa<ConstantAggregateZero>(Mask))
71 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
93 if (isa<ConstantAggregateZero>(Mask)) {
106 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
122 bool LogicalShift =
false;
123 bool ShiftLeft =
false;
129 case Intrinsic::x86_sse2_psrai_d:
130 case Intrinsic::x86_sse2_psrai_w:
131 case Intrinsic::x86_avx2_psrai_d:
132 case Intrinsic::x86_avx2_psrai_w:
133 case Intrinsic::x86_avx512_psrai_q_128:
134 case Intrinsic::x86_avx512_psrai_q_256:
135 case Intrinsic::x86_avx512_psrai_d_512:
136 case Intrinsic::x86_avx512_psrai_q_512:
137 case Intrinsic::x86_avx512_psrai_w_512:
140 case Intrinsic::x86_sse2_psra_d:
141 case Intrinsic::x86_sse2_psra_w:
142 case Intrinsic::x86_avx2_psra_d:
143 case Intrinsic::x86_avx2_psra_w:
144 case Intrinsic::x86_avx512_psra_q_128:
145 case Intrinsic::x86_avx512_psra_q_256:
146 case Intrinsic::x86_avx512_psra_d_512:
147 case Intrinsic::x86_avx512_psra_q_512:
148 case Intrinsic::x86_avx512_psra_w_512:
149 LogicalShift =
false;
152 case Intrinsic::x86_sse2_psrli_d:
153 case Intrinsic::x86_sse2_psrli_q:
154 case Intrinsic::x86_sse2_psrli_w:
155 case Intrinsic::x86_avx2_psrli_d:
156 case Intrinsic::x86_avx2_psrli_q:
157 case Intrinsic::x86_avx2_psrli_w:
158 case Intrinsic::x86_avx512_psrli_d_512:
159 case Intrinsic::x86_avx512_psrli_q_512:
160 case Intrinsic::x86_avx512_psrli_w_512:
163 case Intrinsic::x86_sse2_psrl_d:
164 case Intrinsic::x86_sse2_psrl_q:
165 case Intrinsic::x86_sse2_psrl_w:
166 case Intrinsic::x86_avx2_psrl_d:
167 case Intrinsic::x86_avx2_psrl_q:
168 case Intrinsic::x86_avx2_psrl_w:
169 case Intrinsic::x86_avx512_psrl_d_512:
170 case Intrinsic::x86_avx512_psrl_q_512:
171 case Intrinsic::x86_avx512_psrl_w_512:
175 case Intrinsic::x86_sse2_pslli_d:
176 case Intrinsic::x86_sse2_pslli_q:
177 case Intrinsic::x86_sse2_pslli_w:
178 case Intrinsic::x86_avx2_pslli_d:
179 case Intrinsic::x86_avx2_pslli_q:
180 case Intrinsic::x86_avx2_pslli_w:
181 case Intrinsic::x86_avx512_pslli_d_512:
182 case Intrinsic::x86_avx512_pslli_q_512:
183 case Intrinsic::x86_avx512_pslli_w_512:
186 case Intrinsic::x86_sse2_psll_d:
187 case Intrinsic::x86_sse2_psll_q:
188 case Intrinsic::x86_sse2_psll_w:
189 case Intrinsic::x86_avx2_psll_d:
190 case Intrinsic::x86_avx2_psll_q:
191 case Intrinsic::x86_avx2_psll_w:
192 case Intrinsic::x86_avx512_psll_d_512:
193 case Intrinsic::x86_avx512_psll_q_512:
194 case Intrinsic::x86_avx512_psll_w_512:
199 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
203 auto *VT = cast<FixedVectorType>(Vec->
getType());
204 Type *SVT = VT->getElementType();
206 unsigned VWidth = VT->getNumElements();
219 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
233 cast<VectorType>(AmtVT)->getElementType() == SVT &&
234 "Unexpected shift-by-scalar type");
235 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
246 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
253 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
260 cast<VectorType>(AmtVT)->getElementType() == SVT &&
261 "Unexpected shift-by-scalar type");
265 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
266 unsigned SubEltIdx = (NumSubElts - 1) - i;
267 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
304 bool LogicalShift =
false;
305 bool ShiftLeft =
false;
310 case Intrinsic::x86_avx2_psrav_d:
311 case Intrinsic::x86_avx2_psrav_d_256:
312 case Intrinsic::x86_avx512_psrav_q_128:
313 case Intrinsic::x86_avx512_psrav_q_256:
314 case Intrinsic::x86_avx512_psrav_d_512:
315 case Intrinsic::x86_avx512_psrav_q_512:
316 case Intrinsic::x86_avx512_psrav_w_128:
317 case Intrinsic::x86_avx512_psrav_w_256:
318 case Intrinsic::x86_avx512_psrav_w_512:
319 LogicalShift =
false;
322 case Intrinsic::x86_avx2_psrlv_d:
323 case Intrinsic::x86_avx2_psrlv_d_256:
324 case Intrinsic::x86_avx2_psrlv_q:
325 case Intrinsic::x86_avx2_psrlv_q_256:
326 case Intrinsic::x86_avx512_psrlv_d_512:
327 case Intrinsic::x86_avx512_psrlv_q_512:
328 case Intrinsic::x86_avx512_psrlv_w_128:
329 case Intrinsic::x86_avx512_psrlv_w_256:
330 case Intrinsic::x86_avx512_psrlv_w_512:
334 case Intrinsic::x86_avx2_psllv_d:
335 case Intrinsic::x86_avx2_psllv_d_256:
336 case Intrinsic::x86_avx2_psllv_q:
337 case Intrinsic::x86_avx2_psllv_q_256:
338 case Intrinsic::x86_avx512_psllv_d_512:
339 case Intrinsic::x86_avx512_psllv_q_512:
340 case Intrinsic::x86_avx512_psllv_w_128:
341 case Intrinsic::x86_avx512_psllv_w_256:
342 case Intrinsic::x86_avx512_psllv_w_512:
347 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
351 auto *VT = cast<FixedVectorType>(II.
getType());
352 Type *SVT = VT->getElementType();
353 int NumElts = VT->getNumElements();
361 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
367 auto *CShift = dyn_cast<Constant>(Amt);
373 bool AnyOutOfRange =
false;
375 for (
int I = 0;
I < NumElts; ++
I) {
376 auto *CElt = CShift->getAggregateElement(
I);
377 if (isa_and_nonnull<UndefValue>(CElt)) {
382 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
389 APInt ShiftVal = COp->getValue();
391 AnyOutOfRange = LogicalShift;
404 for (
int Idx : ShiftAmts) {
408 assert(LogicalShift &&
"Logical shift expected");
409 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
421 for (
int Idx : ShiftAmts) {
445 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
448 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
450 unsigned NumSrcElts = ArgTy->getNumElements();
451 assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
452 "Unexpected packing types");
454 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
456 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
457 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
458 "Unexpected packing types");
461 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
466 APInt MinValue, MaxValue;
492 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
493 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
494 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
495 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
496 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
510 if (isa<UndefValue>(Arg))
513 auto *ArgTy = dyn_cast<FixedVectorType>(Arg->
getType());
523 unsigned NumElts = ArgTy->getNumElements();
540 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
541 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
542 "Unexpected types for x86 addcarry");
564 if (!ArgImm || ArgImm->getValue().uge(256))
573 auto Or = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
574 return {Builder.
CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
576 auto Xor = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
577 return {Builder.
CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
579 auto And = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
580 return {Builder.
CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
582 auto Not = [&](
auto V) -> std::pair<Value *, uint8_t> {
583 return {Builder.
CreateNot(V.first), ~V.second};
585 auto Nor = [&](
auto Lhs,
auto Rhs) {
return Not(
Or(Lhs, Rhs)); };
586 auto Xnor = [&](
auto Lhs,
auto Rhs) {
return Not(
Xor(Lhs, Rhs)); };
587 auto Nand = [&](
auto Lhs,
auto Rhs) {
return Not(
And(Lhs, Rhs)); };
593 bool ABIsConst = AIsConst && BIsConst;
594 bool ACIsConst = AIsConst && CIsConst;
595 bool BCIsConst = BIsConst && CIsConst;
596 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
602 std::pair<Value *, uint8_t>
A = {ArgA, 0xf0};
603 std::pair<Value *, uint8_t>
B = {ArgB, 0xcc};
604 std::pair<Value *, uint8_t>
C = {ArgC, 0xaa};
605 std::pair<Value *, uint8_t> Res = {
nullptr, 0};
612 uint8_t Imm = ArgImm->getValue().getZExtValue();
619 Res = Nor(
Or(
A,
B),
C);
639 Res = Nor(
A, Xnor(
B,
C));
647 Res = Nor(
A, Nand(
B,
C));
655 Res = Nor(
A, Not(
C));
659 Res = Nor(
A, Nor(
C, Not(
B)));
663 Res = Nor(
A, Not(
B));
667 Res = Nor(
A, Nor(
B, Not(
C)));
671 Res = Nor(
A, Nor(
B,
C));
686 Res = Nor(Xnor(
A,
C),
B);
694 Res = Nor(Xnor(
A,
B),
C);
710 Res = Nor(Xnor(
A,
B), Xnor(
A,
C));
714 Res =
And(Nand(
A,
B), Xnor(
B,
C));
738 Res = Nand(
A,
Or(
B,
C));
742 Res = Nor(Nand(
A,
C),
B);
750 Res = Nor(
B, Not(
C));
754 Res = Nor(
B, Nor(
C, Not(
A)));
758 Res = Nor(Xnor(
A,
B),
Xor(
A,
C));
762 Res =
Xor(
A, Nand(Nand(
A,
B),
C));
790 Res = Nor(Xnor(
A,
B), Nor(
B,
C));
802 Res = Nand(
A,
Or(
B, Not(
C)));
806 Res = Nor(
B, Not(
A));
810 Res = Nor(Nor(
A, Not(
C)),
B);
814 Res = Nor(Nor(
A,
C),
B);
833 Res = Nand(
Or(
A,
C),
B);
837 Res = Nor(Xnor(
A,
B), Nor(
A,
C));
849 Res = Nand(
Or(
A, Not(
C)),
B);
868 Res = Nor(Nand(
A,
B),
C);
876 Res = Nor(
Xor(
A,
B), Xnor(
A,
C));
880 Res =
Xor(
A, Nand(Nand(
A,
C),
B));
884 Res = Nor(
C, Not(
B));
888 Res = Nor(Nor(
B, Not(
A)),
C);
908 Res = Nor(Xnor(
A,
C), Nor(
B,
C));
928 Res = Nand(
A, Nand(
B, Not(
C)));
932 Res = Nor(
C, Not(
A));
936 Res = Nor(Nor(
A, Not(
B)),
C);
948 Res = Nor(Nor(
A,
B),
C);
959 Res = Nand(
Or(
A,
B),
C);
963 Res = Nor(Nor(
A,
B), Xnor(
A,
C));
982 Res = Nand(
Or(
A, Not(
B)),
C);
1002 Res = Nor(Nor(
A,
C), Xnor(
B,
C));
1010 Res = Nor(Nor(
A,
B), Xnor(
B,
C));
1029 Res =
Xor(Xnor(
A,
B),
C);
1053 Res = Nand(
A, Xnor(
B,
C));
1057 Res =
And(
A, Nand(
B,
C));
1069 Res = Nand(Nand(
A, Not(
C)),
B);
1077 Res = Nand(Nand(
A, Not(
B)),
C);
1101 Res = Nand(Xnor(
A,
C),
B);
1109 Res = Nand(Xnor(
A,
B),
C);
1117 Res = Nand(
And(
A,
B),
C);
1129 Res =
And(Xnor(
A,
B),
C);
1133 Res = Nor(
Xor(
A,
B), Nor(
C, Not(
A)));
1137 Res =
And(Xnor(
A,
C),
B);
1141 Res = Nor(
Xor(
A,
C), Nor(
B, Not(
A)));
1145 Res =
Xor(Nor(Xnor(
A,
B), Nor(
B,
C)),
C);
1149 Res =
Xor(
A, Nand(
B,
C));
1156 Res =
Xor(
B, Nor(Nor(
B, Not(
A)),
C));
1160 Res =
And(Nand(
A, Not(
B)),
C);
1168 Res =
And(Nand(
A, Not(
C)),
B);
1180 Res = Nand(
A, Nand(
B,
C));
1184 Res =
And(
A, Xnor(
B,
C));
1188 Res = Nor(Nor(
A, Not(
B)),
Xor(
B,
C));
1192 Res =
Xor(Nor(Xnor(
A,
B), Nor(
A,
C)),
C);
1196 Res =
Xor(Nand(
A,
C),
B);
1200 Res = Nor(Nor(
A,
B),
Xor(Xnor(
A,
B),
C));
1204 Res =
Xor(Nand(
A,
B),
C);
1216 Res = Nor(Nor(
A,
B),
Xor(
B,
C));
1224 Res =
Xor(Nor(
B, Not(
A)),
C);
1228 Res =
Or(Nor(
A,
B), Xnor(
B,
C));
1232 Res =
Xor(
B, Nor(
C, Not(
A)));
1236 Res =
Or(Nor(
A,
C), Xnor(
B,
C));
1244 Res = Nand(
A,
Xor(
B,
C));
1251 Res =
Xor(
A, Nor(Nor(
A, Not(
B)),
C));
1263 Res =
Xor(
A, Nor(Nor(
A,
B),
C));
1271 Res =
Xor(Nor(
A, Not(
B)),
C);
1275 Res =
Or(Nor(
A,
B), Xnor(
A,
C));
1290 Res =
Or(Nor(
A,
B),
C);
1294 Res =
Xor(Nor(Xnor(
B,
C),
A),
C);
1302 Res =
Or(Nor(
A, Not(
B)),
C);
1306 Res =
Or(
C, Not(
A));
1310 Res =
And(
A, Nand(
B, Not(
C)));
1322 Res = Nand(Nand(
A,
C),
B);
1326 Res =
Xor(
A, Nor(
C, Not(
B)));
1330 Res =
Or(Xnor(
A,
C), Nor(
B,
C));
1338 Res = Nand(
Xor(
A,
C),
B);
1342 Res =
Xor(Nor(Xnor(
A,
C),
B),
C);
1350 Res =
Or(Nor(
B, Not(
A)),
C);
1354 Res =
Or(
C, Not(
B));
1370 Res =
Or(Nand(
A,
B),
C);
1377 Res =
Xor(
A, Nor(Nor(
A, Not(
C)),
B));
1381 Res =
Xor(
A, Nor(Nor(
A,
C),
B));
1397 Res =
Xor(Nor(
A, Not(
C)),
B);
1401 Res =
Or(Xnor(
A,
B), Nor(
A,
C));
1413 Res =
Xor(
B, Nor(
A, Xnor(
B,
C)));
1424 Res =
Or(Nor(
A,
C),
B);
1428 Res =
Or(Nor(
A, Not(
C)),
B);
1432 Res =
Or(
B, Not(
A));
1444 Res =
Xor(
A, Nor(
B, Not(
C)));
1448 Res =
Or(Xnor(
A,
B), Nor(
B,
C));
1456 Res = Nand(Nand(
A,
B),
C);
1464 Res = Nand(
Xor(
A,
B),
C);
1468 Res =
Xor(Nor(Xnor(
A,
B),
C),
B);
1484 Res =
Or(
B, Nor(
C, Not(
A)));
1488 Res =
Or(
B, Not(
C));
1496 Res =
Or(Nand(
A,
C),
B);
1508 Res =
Xor(
A, Nor(Xnor(
A,
C),
B));
1516 Res =
Xor(
A, Nor(Xnor(
A,
B),
C));
1528 Res =
Or(Xnor(
A,
B), Xnor(
A,
C));
1544 Res =
Or(Xnor(
A,
B),
C);
1552 Res =
Or(Xnor(
A,
C),
B);
1559 Res = Nand(
A, Nor(
B,
C));
1566 Res =
Or(
A, Nor(
B,
C));
1570 Res =
Or(
A, Nor(
B, Not(
C)));
1574 Res =
Or(
A, Not(
B));
1578 Res =
Or(
A, Nor(
C, Not(
B)));
1582 Res =
Or(
A, Not(
C));
1590 Res =
Or(
A, Nand(
B,
C));
1598 Res =
Or(
A, Xnor(
B,
C));
1605 Res = Nand(Nor(
A,
C),
B);
1612 Res = Nand(Nor(
A,
B),
C);
1623 assert((Res.first ==
nullptr || Res.second == Imm) &&
1624 "Simplification of ternary logic does not verify!");
1634 auto *VecTy = cast<FixedVectorType>(II.
getType());
1635 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
1642 uint8_t Imm = CInt->getZExtValue();
1643 uint8_t ZMask = Imm & 0xf;
1644 uint8_t DestLane = (Imm >> 4) & 0x3;
1645 uint8_t SourceLane = (Imm >> 6) & 0x3;
1655 int ShuffleMask[4] = {0, 1, 2, 3};
1664 (ZMask & (1 << DestLane))) {
1668 ShuffleMask[DestLane] = SourceLane;
1670 for (
unsigned i = 0; i < 4; ++i)
1671 if ((ZMask >> i) & 0x1)
1672 ShuffleMask[i] = i + 4;
1679 ShuffleMask[DestLane] = SourceLane + 4;
1690 auto LowConstantHighUndef = [&](
uint64_t Val) {
1698 auto *C0 = dyn_cast<Constant>(Op0);
1700 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1704 if (CILength && CIIndex) {
1738 for (
int i = 0; i != (int)
Length; ++i)
1740 for (
int i =
Length; i != 8; ++i)
1742 for (
int i = 8; i != 16; ++i)
1754 APInt Elt = CI0->getValue();
1762 Value *Args[] = {Op0, CILength, CIIndex};
1770 if (CI0 && CI0->isZero())
1771 return LowConstantHighUndef(0);
1815 for (
int i = 0; i != (int)
Index; ++i)
1817 for (
int i = 0; i != (int)
Length; ++i)
1821 for (
int i = 8; i != 16; ++i)
1831 auto *C0 = dyn_cast<Constant>(Op0);
1832 auto *C1 = dyn_cast<Constant>(Op1);
1834 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1837 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1842 APInt V00 = CI00->getValue();
1843 APInt V10 = CI10->getValue();
1847 APInt Val = V00 | V10;
1861 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1877 auto *VecTy = cast<FixedVectorType>(II.
getType());
1878 unsigned NumElts = VecTy->getNumElements();
1879 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1880 "Unexpected number of elements in shuffle mask!");
1887 for (
unsigned I = 0;
I < NumElts; ++
I) {
1888 Constant *COp = V->getAggregateElement(
I);
1889 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1892 if (isa<UndefValue>(COp)) {
1897 int8_t
Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1922 auto *VecTy = cast<FixedVectorType>(II.
getType());
1923 unsigned NumElts = VecTy->getNumElements();
1924 bool IsPD = VecTy->getScalarType()->isDoubleTy();
1925 unsigned NumLaneElts = IsPD ? 2 : 4;
1926 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1932 for (
unsigned I = 0;
I < NumElts; ++
I) {
1933 Constant *COp = V->getAggregateElement(
I);
1934 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1937 if (isa<UndefValue>(COp)) {
1942 APInt Index = cast<ConstantInt>(COp)->getValue();
1953 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
1955 Indexes[
I] =
Index.getZExtValue();
1969 auto *VecTy = cast<FixedVectorType>(II.
getType());
1970 unsigned Size = VecTy->getNumElements();
1972 "Unexpected shuffle mask size");
1977 for (
unsigned I = 0;
I <
Size; ++
I) {
1978 Constant *COp = V->getAggregateElement(
I);
1979 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1982 if (isa<UndefValue>(COp)) {
1996std::optional<Instruction *>
1998 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
1999 unsigned DemandedWidth) {
2000 APInt UndefElts(Width, 0);
2007 case Intrinsic::x86_bmi_bextr_32:
2008 case Intrinsic::x86_bmi_bextr_64:
2009 case Intrinsic::x86_tbm_bextri_u32:
2010 case Intrinsic::x86_tbm_bextri_u64:
2022 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2023 uint64_t Result = InC->getZExtValue() >> Shift;
2026 Result &= maskTrailingOnes<uint64_t>(
Length);
2035 case Intrinsic::x86_bmi_bzhi_32:
2036 case Intrinsic::x86_bmi_bzhi_64:
2048 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2049 uint64_t Result = InC->getZExtValue();
2050 Result &= maskTrailingOnes<uint64_t>(
Index);
2057 case Intrinsic::x86_bmi_pext_32:
2058 case Intrinsic::x86_bmi_pext_64:
2059 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
2060 if (MaskC->isNullValue()) {
2063 if (MaskC->isAllOnesValue()) {
2067 unsigned MaskIdx, MaskLen;
2068 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2079 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2080 uint64_t Src = SrcC->getZExtValue();
2081 uint64_t Mask = MaskC->getZExtValue();
2088 if (BitToTest & Src)
2101 case Intrinsic::x86_bmi_pdep_32:
2102 case Intrinsic::x86_bmi_pdep_64:
2103 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
2104 if (MaskC->isNullValue()) {
2107 if (MaskC->isAllOnesValue()) {
2111 unsigned MaskIdx, MaskLen;
2112 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2123 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
2124 uint64_t Src = SrcC->getZExtValue();
2125 uint64_t Mask = MaskC->getZExtValue();
2132 if (BitToTest & Src)
2146 case Intrinsic::x86_sse_cvtss2si:
2147 case Intrinsic::x86_sse_cvtss2si64:
2148 case Intrinsic::x86_sse_cvttss2si:
2149 case Intrinsic::x86_sse_cvttss2si64:
2150 case Intrinsic::x86_sse2_cvtsd2si:
2151 case Intrinsic::x86_sse2_cvtsd2si64:
2152 case Intrinsic::x86_sse2_cvttsd2si:
2153 case Intrinsic::x86_sse2_cvttsd2si64:
2154 case Intrinsic::x86_avx512_vcvtss2si32:
2155 case Intrinsic::x86_avx512_vcvtss2si64:
2156 case Intrinsic::x86_avx512_vcvtss2usi32:
2157 case Intrinsic::x86_avx512_vcvtss2usi64:
2158 case Intrinsic::x86_avx512_vcvtsd2si32:
2159 case Intrinsic::x86_avx512_vcvtsd2si64:
2160 case Intrinsic::x86_avx512_vcvtsd2usi32:
2161 case Intrinsic::x86_avx512_vcvtsd2usi64:
2162 case Intrinsic::x86_avx512_cvttss2si:
2163 case Intrinsic::x86_avx512_cvttss2si64:
2164 case Intrinsic::x86_avx512_cvttss2usi:
2165 case Intrinsic::x86_avx512_cvttss2usi64:
2166 case Intrinsic::x86_avx512_cvttsd2si:
2167 case Intrinsic::x86_avx512_cvttsd2si64:
2168 case Intrinsic::x86_avx512_cvttsd2usi:
2169 case Intrinsic::x86_avx512_cvttsd2usi64: {
2173 unsigned VWidth = cast<FixedVectorType>(Arg->
getType())->getNumElements();
2174 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2180 case Intrinsic::x86_mmx_pmovmskb:
2181 case Intrinsic::x86_sse_movmsk_ps:
2182 case Intrinsic::x86_sse2_movmsk_pd:
2183 case Intrinsic::x86_sse2_pmovmskb_128:
2184 case Intrinsic::x86_avx_movmsk_pd_256:
2185 case Intrinsic::x86_avx_movmsk_ps_256:
2186 case Intrinsic::x86_avx2_pmovmskb:
2192 case Intrinsic::x86_sse_comieq_ss:
2193 case Intrinsic::x86_sse_comige_ss:
2194 case Intrinsic::x86_sse_comigt_ss:
2195 case Intrinsic::x86_sse_comile_ss:
2196 case Intrinsic::x86_sse_comilt_ss:
2197 case Intrinsic::x86_sse_comineq_ss:
2198 case Intrinsic::x86_sse_ucomieq_ss:
2199 case Intrinsic::x86_sse_ucomige_ss:
2200 case Intrinsic::x86_sse_ucomigt_ss:
2201 case Intrinsic::x86_sse_ucomile_ss:
2202 case Intrinsic::x86_sse_ucomilt_ss:
2203 case Intrinsic::x86_sse_ucomineq_ss:
2204 case Intrinsic::x86_sse2_comieq_sd:
2205 case Intrinsic::x86_sse2_comige_sd:
2206 case Intrinsic::x86_sse2_comigt_sd:
2207 case Intrinsic::x86_sse2_comile_sd:
2208 case Intrinsic::x86_sse2_comilt_sd:
2209 case Intrinsic::x86_sse2_comineq_sd:
2210 case Intrinsic::x86_sse2_ucomieq_sd:
2211 case Intrinsic::x86_sse2_ucomige_sd:
2212 case Intrinsic::x86_sse2_ucomigt_sd:
2213 case Intrinsic::x86_sse2_ucomile_sd:
2214 case Intrinsic::x86_sse2_ucomilt_sd:
2215 case Intrinsic::x86_sse2_ucomineq_sd:
2216 case Intrinsic::x86_avx512_vcomi_ss:
2217 case Intrinsic::x86_avx512_vcomi_sd:
2218 case Intrinsic::x86_avx512_mask_cmp_ss:
2219 case Intrinsic::x86_avx512_mask_cmp_sd: {
2222 bool MadeChange =
false;
2225 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2226 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2230 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2240 case Intrinsic::x86_avx512_add_ps_512:
2241 case Intrinsic::x86_avx512_div_ps_512:
2242 case Intrinsic::x86_avx512_mul_ps_512:
2243 case Intrinsic::x86_avx512_sub_ps_512:
2244 case Intrinsic::x86_avx512_add_pd_512:
2245 case Intrinsic::x86_avx512_div_pd_512:
2246 case Intrinsic::x86_avx512_mul_pd_512:
2247 case Intrinsic::x86_avx512_sub_pd_512:
2251 if (R->getValue() == 4) {
2259 case Intrinsic::x86_avx512_add_ps_512:
2260 case Intrinsic::x86_avx512_add_pd_512:
2263 case Intrinsic::x86_avx512_sub_ps_512:
2264 case Intrinsic::x86_avx512_sub_pd_512:
2267 case Intrinsic::x86_avx512_mul_ps_512:
2268 case Intrinsic::x86_avx512_mul_pd_512:
2271 case Intrinsic::x86_avx512_div_ps_512:
2272 case Intrinsic::x86_avx512_div_pd_512:
2282 case Intrinsic::x86_avx512_mask_add_ss_round:
2283 case Intrinsic::x86_avx512_mask_div_ss_round:
2284 case Intrinsic::x86_avx512_mask_mul_ss_round:
2285 case Intrinsic::x86_avx512_mask_sub_ss_round:
2286 case Intrinsic::x86_avx512_mask_add_sd_round:
2287 case Intrinsic::x86_avx512_mask_div_sd_round:
2288 case Intrinsic::x86_avx512_mask_mul_sd_round:
2289 case Intrinsic::x86_avx512_mask_sub_sd_round:
2293 if (R->getValue() == 4) {
2304 case Intrinsic::x86_avx512_mask_add_ss_round:
2305 case Intrinsic::x86_avx512_mask_add_sd_round:
2308 case Intrinsic::x86_avx512_mask_sub_ss_round:
2309 case Intrinsic::x86_avx512_mask_sub_sd_round:
2312 case Intrinsic::x86_avx512_mask_mul_ss_round:
2313 case Intrinsic::x86_avx512_mask_mul_sd_round:
2316 case Intrinsic::x86_avx512_mask_div_ss_round:
2317 case Intrinsic::x86_avx512_mask_div_sd_round:
2324 auto *
C = dyn_cast<ConstantInt>(Mask);
2326 if (!
C || !
C->getValue()[0]) {
2330 cast<IntegerType>(Mask->getType())->
getBitWidth());
2350 case Intrinsic::x86_sse2_psrai_d:
2351 case Intrinsic::x86_sse2_psrai_w:
2352 case Intrinsic::x86_avx2_psrai_d:
2353 case Intrinsic::x86_avx2_psrai_w:
2354 case Intrinsic::x86_avx512_psrai_q_128:
2355 case Intrinsic::x86_avx512_psrai_q_256:
2356 case Intrinsic::x86_avx512_psrai_d_512:
2357 case Intrinsic::x86_avx512_psrai_q_512:
2358 case Intrinsic::x86_avx512_psrai_w_512:
2359 case Intrinsic::x86_sse2_psrli_d:
2360 case Intrinsic::x86_sse2_psrli_q:
2361 case Intrinsic::x86_sse2_psrli_w:
2362 case Intrinsic::x86_avx2_psrli_d:
2363 case Intrinsic::x86_avx2_psrli_q:
2364 case Intrinsic::x86_avx2_psrli_w:
2365 case Intrinsic::x86_avx512_psrli_d_512:
2366 case Intrinsic::x86_avx512_psrli_q_512:
2367 case Intrinsic::x86_avx512_psrli_w_512:
2368 case Intrinsic::x86_sse2_pslli_d:
2369 case Intrinsic::x86_sse2_pslli_q:
2370 case Intrinsic::x86_sse2_pslli_w:
2371 case Intrinsic::x86_avx2_pslli_d:
2372 case Intrinsic::x86_avx2_pslli_q:
2373 case Intrinsic::x86_avx2_pslli_w:
2374 case Intrinsic::x86_avx512_pslli_d_512:
2375 case Intrinsic::x86_avx512_pslli_q_512:
2376 case Intrinsic::x86_avx512_pslli_w_512:
2382 case Intrinsic::x86_sse2_psra_d:
2383 case Intrinsic::x86_sse2_psra_w:
2384 case Intrinsic::x86_avx2_psra_d:
2385 case Intrinsic::x86_avx2_psra_w:
2386 case Intrinsic::x86_avx512_psra_q_128:
2387 case Intrinsic::x86_avx512_psra_q_256:
2388 case Intrinsic::x86_avx512_psra_d_512:
2389 case Intrinsic::x86_avx512_psra_q_512:
2390 case Intrinsic::x86_avx512_psra_w_512:
2391 case Intrinsic::x86_sse2_psrl_d:
2392 case Intrinsic::x86_sse2_psrl_q:
2393 case Intrinsic::x86_sse2_psrl_w:
2394 case Intrinsic::x86_avx2_psrl_d:
2395 case Intrinsic::x86_avx2_psrl_q:
2396 case Intrinsic::x86_avx2_psrl_w:
2397 case Intrinsic::x86_avx512_psrl_d_512:
2398 case Intrinsic::x86_avx512_psrl_q_512:
2399 case Intrinsic::x86_avx512_psrl_w_512:
2400 case Intrinsic::x86_sse2_psll_d:
2401 case Intrinsic::x86_sse2_psll_q:
2402 case Intrinsic::x86_sse2_psll_w:
2403 case Intrinsic::x86_avx2_psll_d:
2404 case Intrinsic::x86_avx2_psll_q:
2405 case Intrinsic::x86_avx2_psll_w:
2406 case Intrinsic::x86_avx512_psll_d_512:
2407 case Intrinsic::x86_avx512_psll_q_512:
2408 case Intrinsic::x86_avx512_psll_w_512: {
2417 "Unexpected packed shift size");
2418 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
2420 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2426 case Intrinsic::x86_avx2_psllv_d:
2427 case Intrinsic::x86_avx2_psllv_d_256:
2428 case Intrinsic::x86_avx2_psllv_q:
2429 case Intrinsic::x86_avx2_psllv_q_256:
2430 case Intrinsic::x86_avx512_psllv_d_512:
2431 case Intrinsic::x86_avx512_psllv_q_512:
2432 case Intrinsic::x86_avx512_psllv_w_128:
2433 case Intrinsic::x86_avx512_psllv_w_256:
2434 case Intrinsic::x86_avx512_psllv_w_512:
2435 case Intrinsic::x86_avx2_psrav_d:
2436 case Intrinsic::x86_avx2_psrav_d_256:
2437 case Intrinsic::x86_avx512_psrav_q_128:
2438 case Intrinsic::x86_avx512_psrav_q_256:
2439 case Intrinsic::x86_avx512_psrav_d_512:
2440 case Intrinsic::x86_avx512_psrav_q_512:
2441 case Intrinsic::x86_avx512_psrav_w_128:
2442 case Intrinsic::x86_avx512_psrav_w_256:
2443 case Intrinsic::x86_avx512_psrav_w_512:
2444 case Intrinsic::x86_avx2_psrlv_d:
2445 case Intrinsic::x86_avx2_psrlv_d_256:
2446 case Intrinsic::x86_avx2_psrlv_q:
2447 case Intrinsic::x86_avx2_psrlv_q_256:
2448 case Intrinsic::x86_avx512_psrlv_d_512:
2449 case Intrinsic::x86_avx512_psrlv_q_512:
2450 case Intrinsic::x86_avx512_psrlv_w_128:
2451 case Intrinsic::x86_avx512_psrlv_w_256:
2452 case Intrinsic::x86_avx512_psrlv_w_512:
2458 case Intrinsic::x86_sse2_packssdw_128:
2459 case Intrinsic::x86_sse2_packsswb_128:
2460 case Intrinsic::x86_avx2_packssdw:
2461 case Intrinsic::x86_avx2_packsswb:
2462 case Intrinsic::x86_avx512_packssdw_512:
2463 case Intrinsic::x86_avx512_packsswb_512:
2469 case Intrinsic::x86_sse2_packuswb_128:
2470 case Intrinsic::x86_sse41_packusdw:
2471 case Intrinsic::x86_avx2_packusdw:
2472 case Intrinsic::x86_avx2_packuswb:
2473 case Intrinsic::x86_avx512_packusdw_512:
2474 case Intrinsic::x86_avx512_packuswb_512:
2480 case Intrinsic::x86_pclmulqdq:
2481 case Intrinsic::x86_pclmulqdq_256:
2482 case Intrinsic::x86_pclmulqdq_512: {
2484 unsigned Imm =
C->getZExtValue();
2486 bool MadeChange =
false;
2490 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2492 APInt UndefElts1(VWidth, 0);
2493 APInt DemandedElts1 =
2501 APInt UndefElts2(VWidth, 0);
2502 APInt DemandedElts2 =
2524 case Intrinsic::x86_sse41_insertps:
2530 case Intrinsic::x86_sse4a_extrq: {
2533 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2534 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2537 VWidth1 == 16 &&
"Unexpected operand sizes");
2540 auto *C1 = dyn_cast<Constant>(Op1);
2542 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2545 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2555 bool MadeChange =
false;
2556 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2560 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2570 case Intrinsic::x86_sse4a_extrqi: {
2574 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2576 "Unexpected operand size");
2579 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(1));
2589 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2595 case Intrinsic::x86_sse4a_insertq: {
2598 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2601 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
2602 "Unexpected operand size");
2605 auto *C1 = dyn_cast<Constant>(Op1);
2607 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2612 const APInt &V11 = CI11->getValue();
2622 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2628 case Intrinsic::x86_sse4a_insertqi: {
2634 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2635 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2638 VWidth1 == 2 &&
"Unexpected operand sizes");
2641 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(2));
2645 if (CILength && CIIndex) {
2646 APInt Len = CILength->getValue().zextOrTrunc(6);
2647 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2655 bool MadeChange =
false;
2656 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2660 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2670 case Intrinsic::x86_sse41_pblendvb:
2671 case Intrinsic::x86_sse41_blendvps:
2672 case Intrinsic::x86_sse41_blendvpd:
2673 case Intrinsic::x86_avx_blendv_ps_256:
2674 case Intrinsic::x86_avx_blendv_pd_256:
2675 case Intrinsic::x86_avx2_pblendvb: {
2685 if (isa<ConstantAggregateZero>(Mask)) {
2690 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2702 assert(Mask->getType()->getPrimitiveSizeInBits() ==
2704 "Not expecting mask and operands with different sizes");
2706 unsigned NumMaskElts =
2707 cast<FixedVectorType>(Mask->getType())->getNumElements();
2708 unsigned NumOperandElts =
2709 cast<FixedVectorType>(II.
getType())->getNumElements();
2710 if (NumMaskElts == NumOperandElts) {
2716 if (NumMaskElts < NumOperandElts) {
2727 case Intrinsic::x86_ssse3_pshuf_b_128:
2728 case Intrinsic::x86_avx2_pshuf_b:
2729 case Intrinsic::x86_avx512_pshuf_b_512:
2735 case Intrinsic::x86_avx_vpermilvar_ps:
2736 case Intrinsic::x86_avx_vpermilvar_ps_256:
2737 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2738 case Intrinsic::x86_avx_vpermilvar_pd:
2739 case Intrinsic::x86_avx_vpermilvar_pd_256:
2740 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2746 case Intrinsic::x86_avx2_permd:
2747 case Intrinsic::x86_avx2_permps:
2748 case Intrinsic::x86_avx512_permvar_df_256:
2749 case Intrinsic::x86_avx512_permvar_df_512:
2750 case Intrinsic::x86_avx512_permvar_di_256:
2751 case Intrinsic::x86_avx512_permvar_di_512:
2752 case Intrinsic::x86_avx512_permvar_hi_128:
2753 case Intrinsic::x86_avx512_permvar_hi_256:
2754 case Intrinsic::x86_avx512_permvar_hi_512:
2755 case Intrinsic::x86_avx512_permvar_qi_128:
2756 case Intrinsic::x86_avx512_permvar_qi_256:
2757 case Intrinsic::x86_avx512_permvar_qi_512:
2758 case Intrinsic::x86_avx512_permvar_sf_512:
2759 case Intrinsic::x86_avx512_permvar_si_512:
2765 case Intrinsic::x86_avx_maskload_ps:
2766 case Intrinsic::x86_avx_maskload_pd:
2767 case Intrinsic::x86_avx_maskload_ps_256:
2768 case Intrinsic::x86_avx_maskload_pd_256:
2769 case Intrinsic::x86_avx2_maskload_d:
2770 case Intrinsic::x86_avx2_maskload_q:
2771 case Intrinsic::x86_avx2_maskload_d_256:
2772 case Intrinsic::x86_avx2_maskload_q_256:
2778 case Intrinsic::x86_sse2_maskmov_dqu:
2779 case Intrinsic::x86_avx_maskstore_ps:
2780 case Intrinsic::x86_avx_maskstore_pd:
2781 case Intrinsic::x86_avx_maskstore_ps_256:
2782 case Intrinsic::x86_avx_maskstore_pd_256:
2783 case Intrinsic::x86_avx2_maskstore_d:
2784 case Intrinsic::x86_avx2_maskstore_q:
2785 case Intrinsic::x86_avx2_maskstore_d_256:
2786 case Intrinsic::x86_avx2_maskstore_q_256:
2792 case Intrinsic::x86_addcarry_32:
2793 case Intrinsic::x86_addcarry_64:
2799 case Intrinsic::x86_avx512_pternlog_d_128:
2800 case Intrinsic::x86_avx512_pternlog_d_256:
2801 case Intrinsic::x86_avx512_pternlog_d_512:
2802 case Intrinsic::x86_avx512_pternlog_q_128:
2803 case Intrinsic::x86_avx512_pternlog_q_256:
2804 case Intrinsic::x86_avx512_pternlog_q_512:
2812 return std::nullopt;
2817 bool &KnownBitsComputed)
const {
2821 case Intrinsic::x86_mmx_pmovmskb:
2822 case Intrinsic::x86_sse_movmsk_ps:
2823 case Intrinsic::x86_sse2_movmsk_pd:
2824 case Intrinsic::x86_sse2_pmovmskb_128:
2825 case Intrinsic::x86_avx_movmsk_ps_256:
2826 case Intrinsic::x86_avx_movmsk_pd_256:
2827 case Intrinsic::x86_avx2_pmovmskb: {
2835 ArgWidth = ArgType->getNumElements();
2842 if (DemandedElts.
isZero()) {
2848 KnownBitsComputed =
true;
2852 return std::nullopt;
2859 simplifyAndSetOp)
const {
2860 unsigned VWidth = cast<FixedVectorType>(II.
getType())->getNumElements();
2864 case Intrinsic::x86_xop_vfrcz_ss:
2865 case Intrinsic::x86_xop_vfrcz_sd:
2870 if (!DemandedElts[0]) {
2877 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2880 UndefElts = UndefElts[0];
2884 case Intrinsic::x86_sse_rcp_ss:
2885 case Intrinsic::x86_sse_rsqrt_ss:
2886 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2889 if (!DemandedElts[0]) {
2900 case Intrinsic::x86_sse_min_ss:
2901 case Intrinsic::x86_sse_max_ss:
2902 case Intrinsic::x86_sse_cmp_ss:
2903 case Intrinsic::x86_sse2_min_sd:
2904 case Intrinsic::x86_sse2_max_sd:
2905 case Intrinsic::x86_sse2_cmp_sd: {
2906 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2909 if (!DemandedElts[0]) {
2916 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2928 case Intrinsic::x86_sse41_round_ss:
2929 case Intrinsic::x86_sse41_round_sd: {
2931 APInt DemandedElts2 = DemandedElts;
2933 simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
2936 if (!DemandedElts[0]) {
2943 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2948 UndefElts |= UndefElts2[0];
2955 case Intrinsic::x86_avx512_mask_add_ss_round:
2956 case Intrinsic::x86_avx512_mask_div_ss_round:
2957 case Intrinsic::x86_avx512_mask_mul_ss_round:
2958 case Intrinsic::x86_avx512_mask_sub_ss_round:
2959 case Intrinsic::x86_avx512_mask_max_ss_round:
2960 case Intrinsic::x86_avx512_mask_min_ss_round:
2961 case Intrinsic::x86_avx512_mask_add_sd_round:
2962 case Intrinsic::x86_avx512_mask_div_sd_round:
2963 case Intrinsic::x86_avx512_mask_mul_sd_round:
2964 case Intrinsic::x86_avx512_mask_sub_sd_round:
2965 case Intrinsic::x86_avx512_mask_max_sd_round:
2966 case Intrinsic::x86_avx512_mask_min_sd_round:
2967 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2970 if (!DemandedElts[0]) {
2977 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2978 simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
2982 if (!UndefElts2[0] || !UndefElts3[0])
2987 case Intrinsic::x86_sse3_addsub_pd:
2988 case Intrinsic::x86_sse3_addsub_ps:
2989 case Intrinsic::x86_avx_addsub_pd_256:
2990 case Intrinsic::x86_avx_addsub_ps_256: {
2995 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
2996 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
2997 if (IsSubOnly || IsAddOnly) {
2998 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3003 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3006 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3007 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3008 UndefElts &= UndefElts2;
3013 case Intrinsic::x86_avx2_psllv_d:
3014 case Intrinsic::x86_avx2_psllv_d_256:
3015 case Intrinsic::x86_avx2_psllv_q:
3016 case Intrinsic::x86_avx2_psllv_q_256:
3017 case Intrinsic::x86_avx2_psrlv_d:
3018 case Intrinsic::x86_avx2_psrlv_d_256:
3019 case Intrinsic::x86_avx2_psrlv_q:
3020 case Intrinsic::x86_avx2_psrlv_q_256:
3021 case Intrinsic::x86_avx2_psrav_d:
3022 case Intrinsic::x86_avx2_psrav_d_256: {
3023 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3024 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3025 UndefElts &= UndefElts2;
3029 case Intrinsic::x86_sse2_packssdw_128:
3030 case Intrinsic::x86_sse2_packsswb_128:
3031 case Intrinsic::x86_sse2_packuswb_128:
3032 case Intrinsic::x86_sse41_packusdw:
3033 case Intrinsic::x86_avx2_packssdw:
3034 case Intrinsic::x86_avx2_packsswb:
3035 case Intrinsic::x86_avx2_packusdw:
3036 case Intrinsic::x86_avx2_packuswb:
3037 case Intrinsic::x86_avx512_packssdw_512:
3038 case Intrinsic::x86_avx512_packsswb_512:
3039 case Intrinsic::x86_avx512_packusdw_512:
3040 case Intrinsic::x86_avx512_packuswb_512: {
3042 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3043 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3045 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3046 unsigned VWidthPerLane = VWidth / NumLanes;
3047 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3053 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3054 APInt OpDemandedElts(InnerVWidth, 0);
3055 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3056 unsigned LaneIdx = Lane * VWidthPerLane;
3057 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3058 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3059 if (DemandedElts[
Idx])
3060 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3065 APInt OpUndefElts(InnerVWidth, 0);
3066 simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
3069 OpUndefElts = OpUndefElts.
zext(VWidth);
3070 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3071 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3072 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3073 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3074 UndefElts |= LaneElts;
3081 case Intrinsic::x86_ssse3_pshuf_b_128:
3082 case Intrinsic::x86_avx2_pshuf_b:
3083 case Intrinsic::x86_avx512_pshuf_b_512:
3085 case Intrinsic::x86_avx_vpermilvar_ps:
3086 case Intrinsic::x86_avx_vpermilvar_ps_256:
3087 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3088 case Intrinsic::x86_avx_vpermilvar_pd:
3089 case Intrinsic::x86_avx_vpermilvar_pd_256:
3090 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3092 case Intrinsic::x86_avx2_permd:
3093 case Intrinsic::x86_avx2_permps: {
3094 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
3100 case Intrinsic::x86_sse4a_extrq:
3101 case Intrinsic::x86_sse4a_extrqi:
3102 case Intrinsic::x86_sse4a_insertq:
3103 case Intrinsic::x86_sse4a_insertqi:
3107 return std::nullopt;
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * getBoolVecFromMask(Value *Mask)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents a no-op cast from one type to another.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
CastInst_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.