18#include "llvm/IR/IntrinsicsX86.h"
25#define DEBUG_TYPE "x86tti"
30 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
34 assert(V &&
"Vector must be foldable");
42 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
64 if (isa<ConstantAggregateZero>(Mask))
72 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
73 PointerType *VecPtrTy = PointerType::get(
II.getType(), AddrSpace);
78 II.getType(), PtrCast,
Align(1), BoolMask, ZeroVec);
94 if (isa<ConstantAggregateZero>(Mask)) {
101 if (
II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
107 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
123 bool LogicalShift =
false;
124 bool ShiftLeft =
false;
127 switch (
II.getIntrinsicID()) {
130 case Intrinsic::x86_sse2_psrai_d:
131 case Intrinsic::x86_sse2_psrai_w:
132 case Intrinsic::x86_avx2_psrai_d:
133 case Intrinsic::x86_avx2_psrai_w:
134 case Intrinsic::x86_avx512_psrai_q_128:
135 case Intrinsic::x86_avx512_psrai_q_256:
136 case Intrinsic::x86_avx512_psrai_d_512:
137 case Intrinsic::x86_avx512_psrai_q_512:
138 case Intrinsic::x86_avx512_psrai_w_512:
141 case Intrinsic::x86_sse2_psra_d:
142 case Intrinsic::x86_sse2_psra_w:
143 case Intrinsic::x86_avx2_psra_d:
144 case Intrinsic::x86_avx2_psra_w:
145 case Intrinsic::x86_avx512_psra_q_128:
146 case Intrinsic::x86_avx512_psra_q_256:
147 case Intrinsic::x86_avx512_psra_d_512:
148 case Intrinsic::x86_avx512_psra_q_512:
149 case Intrinsic::x86_avx512_psra_w_512:
150 LogicalShift =
false;
153 case Intrinsic::x86_sse2_psrli_d:
154 case Intrinsic::x86_sse2_psrli_q:
155 case Intrinsic::x86_sse2_psrli_w:
156 case Intrinsic::x86_avx2_psrli_d:
157 case Intrinsic::x86_avx2_psrli_q:
158 case Intrinsic::x86_avx2_psrli_w:
159 case Intrinsic::x86_avx512_psrli_d_512:
160 case Intrinsic::x86_avx512_psrli_q_512:
161 case Intrinsic::x86_avx512_psrli_w_512:
164 case Intrinsic::x86_sse2_psrl_d:
165 case Intrinsic::x86_sse2_psrl_q:
166 case Intrinsic::x86_sse2_psrl_w:
167 case Intrinsic::x86_avx2_psrl_d:
168 case Intrinsic::x86_avx2_psrl_q:
169 case Intrinsic::x86_avx2_psrl_w:
170 case Intrinsic::x86_avx512_psrl_d_512:
171 case Intrinsic::x86_avx512_psrl_q_512:
172 case Intrinsic::x86_avx512_psrl_w_512:
176 case Intrinsic::x86_sse2_pslli_d:
177 case Intrinsic::x86_sse2_pslli_q:
178 case Intrinsic::x86_sse2_pslli_w:
179 case Intrinsic::x86_avx2_pslli_d:
180 case Intrinsic::x86_avx2_pslli_q:
181 case Intrinsic::x86_avx2_pslli_w:
182 case Intrinsic::x86_avx512_pslli_d_512:
183 case Intrinsic::x86_avx512_pslli_q_512:
184 case Intrinsic::x86_avx512_pslli_w_512:
187 case Intrinsic::x86_sse2_psll_d:
188 case Intrinsic::x86_sse2_psll_q:
189 case Intrinsic::x86_sse2_psll_w:
190 case Intrinsic::x86_avx2_psll_d:
191 case Intrinsic::x86_avx2_psll_q:
192 case Intrinsic::x86_avx2_psll_w:
193 case Intrinsic::x86_avx512_psll_d_512:
194 case Intrinsic::x86_avx512_psll_q_512:
195 case Intrinsic::x86_avx512_psll_w_512:
200 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
202 Value *Vec =
II.getArgOperand(0);
203 Value *Amt =
II.getArgOperand(1);
204 auto *VT = cast<FixedVectorType>(Vec->
getType());
205 Type *SVT = VT->getElementType();
207 unsigned VWidth = VT->getNumElements();
220 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
227 Amt = ConstantInt::get(SVT,
BitWidth - 1);
234 cast<VectorType>(AmtVT)->getElementType() == SVT &&
235 "Unexpected shift-by-scalar type");
236 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
240 Amt, DemandedLower,
II.getDataLayout());
242 Amt, DemandedUpper,
II.getDataLayout());
247 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
254 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
261 cast<VectorType>(AmtVT)->getElementType() == SVT &&
262 "Unexpected shift-by-scalar type");
266 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
267 unsigned SubEltIdx = (NumSubElts - 1) - i;
268 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
305 bool LogicalShift =
false;
306 bool ShiftLeft =
false;
308 switch (
II.getIntrinsicID()) {
311 case Intrinsic::x86_avx2_psrav_d:
312 case Intrinsic::x86_avx2_psrav_d_256:
313 case Intrinsic::x86_avx512_psrav_q_128:
314 case Intrinsic::x86_avx512_psrav_q_256:
315 case Intrinsic::x86_avx512_psrav_d_512:
316 case Intrinsic::x86_avx512_psrav_q_512:
317 case Intrinsic::x86_avx512_psrav_w_128:
318 case Intrinsic::x86_avx512_psrav_w_256:
319 case Intrinsic::x86_avx512_psrav_w_512:
320 LogicalShift =
false;
323 case Intrinsic::x86_avx2_psrlv_d:
324 case Intrinsic::x86_avx2_psrlv_d_256:
325 case Intrinsic::x86_avx2_psrlv_q:
326 case Intrinsic::x86_avx2_psrlv_q_256:
327 case Intrinsic::x86_avx512_psrlv_d_512:
328 case Intrinsic::x86_avx512_psrlv_q_512:
329 case Intrinsic::x86_avx512_psrlv_w_128:
330 case Intrinsic::x86_avx512_psrlv_w_256:
331 case Intrinsic::x86_avx512_psrlv_w_512:
335 case Intrinsic::x86_avx2_psllv_d:
336 case Intrinsic::x86_avx2_psllv_d_256:
337 case Intrinsic::x86_avx2_psllv_q:
338 case Intrinsic::x86_avx2_psllv_q_256:
339 case Intrinsic::x86_avx512_psllv_d_512:
340 case Intrinsic::x86_avx512_psllv_q_512:
341 case Intrinsic::x86_avx512_psllv_w_128:
342 case Intrinsic::x86_avx512_psllv_w_256:
343 case Intrinsic::x86_avx512_psllv_w_512:
348 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
350 Value *Vec =
II.getArgOperand(0);
351 Value *Amt =
II.getArgOperand(1);
352 auto *VT = cast<FixedVectorType>(
II.getType());
353 Type *SVT = VT->getElementType();
354 int NumElts = VT->getNumElements();
362 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
368 auto *CShift = dyn_cast<Constant>(Amt);
374 bool AnyOutOfRange =
false;
376 for (
int I = 0;
I < NumElts; ++
I) {
377 auto *CElt = CShift->getAggregateElement(
I);
378 if (isa_and_nonnull<UndefValue>(CElt)) {
383 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
390 APInt ShiftVal = COp->getValue();
392 AnyOutOfRange = LogicalShift;
405 for (
int Idx : ShiftAmts) {
409 assert(LogicalShift &&
"Logical shift expected");
410 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
422 for (
int Idx : ShiftAmts) {
441 Value *Arg0 =
II.getArgOperand(0);
442 Value *Arg1 =
II.getArgOperand(1);
443 Type *ResTy =
II.getType();
446 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
449 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
451 unsigned NumSrcElts = ArgTy->getNumElements();
452 assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
453 "Unexpected packing types");
455 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
457 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
458 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
459 "Unexpected packing types");
462 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
467 APInt MinValue, MaxValue;
493 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
494 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
496 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
497 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
508 Value *Arg0 =
II.getArgOperand(0);
509 Value *Arg1 =
II.getArgOperand(1);
510 auto *ResTy = cast<FixedVectorType>(
II.getType());
511 [[maybe_unused]]
auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
513 unsigned NumDstElts = ResTy->getNumElements();
514 assert(ArgTy->getNumElements() == (2 * NumDstElts) &&
515 ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
516 "Unexpected PMADD types");
519 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
523 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
527 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
536 for (
unsigned I = 0;
I != NumDstElts; ++
I) {
547 IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
548 LHSLo = Builder.
CreateCast(LHSCast, LHSLo, ResTy);
549 LHSHi = Builder.
CreateCast(LHSCast, LHSHi, ResTy);
550 RHSLo = Builder.
CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);
551 RHSHi = Builder.
CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);
561 Value *Arg =
II.getArgOperand(0);
562 Type *ResTy =
II.getType();
565 if (isa<UndefValue>(Arg))
568 auto *ArgTy = dyn_cast<FixedVectorType>(Arg->
getType());
578 unsigned NumElts = ArgTy->getNumElements();
590 Value *CarryIn =
II.getArgOperand(0);
591 Value *Op1 =
II.getArgOperand(1);
592 Value *Op2 =
II.getArgOperand(2);
595 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
596 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
597 "Unexpected types for x86 addcarry");
618 auto *ArgImm = dyn_cast<ConstantInt>(
II.getArgOperand(3));
619 if (!ArgImm || ArgImm->getValue().uge(256))
622 Value *ArgA =
II.getArgOperand(0);
623 Value *ArgB =
II.getArgOperand(1);
624 Value *ArgC =
II.getArgOperand(2);
628 auto Or = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
629 return {Builder.
CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
631 auto Xor = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
632 return {Builder.
CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
634 auto And = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
635 return {Builder.
CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
637 auto Not = [&](
auto V) -> std::pair<Value *, uint8_t> {
638 return {Builder.
CreateNot(V.first), ~V.second};
640 auto Nor = [&](
auto Lhs,
auto Rhs) {
return Not(
Or(Lhs, Rhs)); };
641 auto Xnor = [&](
auto Lhs,
auto Rhs) {
return Not(
Xor(Lhs, Rhs)); };
642 auto Nand = [&](
auto Lhs,
auto Rhs) {
return Not(
And(Lhs, Rhs)); };
648 bool ABIsConst = AIsConst && BIsConst;
649 bool ACIsConst = AIsConst && CIsConst;
650 bool BCIsConst = BIsConst && CIsConst;
651 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
657 std::pair<Value *, uint8_t>
A = {ArgA, 0xf0};
658 std::pair<Value *, uint8_t>
B = {ArgB, 0xcc};
659 std::pair<Value *, uint8_t>
C = {ArgC, 0xaa};
660 std::pair<Value *, uint8_t> Res = {
nullptr, 0};
667 uint8_t Imm = ArgImm->getValue().getZExtValue();
674 Res = Nor(
Or(
A,
B),
C);
694 Res = Nor(
A, Xnor(
B,
C));
702 Res = Nor(
A, Nand(
B,
C));
710 Res = Nor(
A, Not(
C));
714 Res = Nor(
A, Nor(
C, Not(
B)));
718 Res = Nor(
A, Not(
B));
722 Res = Nor(
A, Nor(
B, Not(
C)));
726 Res = Nor(
A, Nor(
B,
C));
741 Res = Nor(Xnor(
A,
C),
B);
749 Res = Nor(Xnor(
A,
B),
C);
765 Res = Nor(Xnor(
A,
B), Xnor(
A,
C));
769 Res =
And(Nand(
A,
B), Xnor(
B,
C));
793 Res = Nand(
A,
Or(
B,
C));
797 Res = Nor(Nand(
A,
C),
B);
805 Res = Nor(
B, Not(
C));
809 Res = Nor(
B, Nor(
C, Not(
A)));
813 Res = Nor(Xnor(
A,
B),
Xor(
A,
C));
817 Res =
Xor(
A, Nand(Nand(
A,
B),
C));
845 Res = Nor(Xnor(
A,
B), Nor(
B,
C));
857 Res = Nand(
A,
Or(
B, Not(
C)));
861 Res = Nor(
B, Not(
A));
865 Res = Nor(Nor(
A, Not(
C)),
B);
869 Res = Nor(Nor(
A,
C),
B);
888 Res = Nand(
Or(
A,
C),
B);
892 Res = Nor(Xnor(
A,
B), Nor(
A,
C));
904 Res = Nand(
Or(
A, Not(
C)),
B);
923 Res = Nor(Nand(
A,
B),
C);
931 Res = Nor(
Xor(
A,
B), Xnor(
A,
C));
935 Res =
Xor(
A, Nand(Nand(
A,
C),
B));
939 Res = Nor(
C, Not(
B));
943 Res = Nor(Nor(
B, Not(
A)),
C);
963 Res = Nor(Xnor(
A,
C), Nor(
B,
C));
983 Res = Nand(
A, Nand(
B, Not(
C)));
987 Res = Nor(
C, Not(
A));
991 Res = Nor(Nor(
A, Not(
B)),
C);
1003 Res = Nor(Nor(
A,
B),
C);
1014 Res = Nand(
Or(
A,
B),
C);
1018 Res = Nor(Nor(
A,
B), Xnor(
A,
C));
1037 Res = Nand(
Or(
A, Not(
B)),
C);
1057 Res = Nor(Nor(
A,
C), Xnor(
B,
C));
1065 Res = Nor(Nor(
A,
B), Xnor(
B,
C));
1084 Res =
Xor(Xnor(
A,
B),
C);
1108 Res = Nand(
A, Xnor(
B,
C));
1112 Res =
And(
A, Nand(
B,
C));
1124 Res = Nand(Nand(
A, Not(
C)),
B);
1132 Res = Nand(Nand(
A, Not(
B)),
C);
1156 Res = Nand(Xnor(
A,
C),
B);
1164 Res = Nand(Xnor(
A,
B),
C);
1172 Res = Nand(
And(
A,
B),
C);
1184 Res =
And(Xnor(
A,
B),
C);
1188 Res = Nor(
Xor(
A,
B), Nor(
C, Not(
A)));
1192 Res =
And(Xnor(
A,
C),
B);
1196 Res = Nor(
Xor(
A,
C), Nor(
B, Not(
A)));
1200 Res =
Xor(Nor(Xnor(
A,
B), Nor(
B,
C)),
C);
1204 Res =
Xor(
A, Nand(
B,
C));
1211 Res =
Xor(
B, Nor(Nor(
B, Not(
A)),
C));
1215 Res =
And(Nand(
A, Not(
B)),
C);
1223 Res =
And(Nand(
A, Not(
C)),
B);
1235 Res = Nand(
A, Nand(
B,
C));
1239 Res =
And(
A, Xnor(
B,
C));
1243 Res = Nor(Nor(
A, Not(
B)),
Xor(
B,
C));
1247 Res =
Xor(Nor(Xnor(
A,
B), Nor(
A,
C)),
C);
1251 Res =
Xor(Nand(
A,
C),
B);
1255 Res = Nor(Nor(
A,
B),
Xor(Xnor(
A,
B),
C));
1259 Res =
Xor(Nand(
A,
B),
C);
1271 Res = Nor(Nor(
A,
B),
Xor(
B,
C));
1279 Res =
Xor(Nor(
B, Not(
A)),
C);
1283 Res =
Or(Nor(
A,
B), Xnor(
B,
C));
1287 Res =
Xor(
B, Nor(
C, Not(
A)));
1291 Res =
Or(Nor(
A,
C), Xnor(
B,
C));
1299 Res = Nand(
A,
Xor(
B,
C));
1306 Res =
Xor(
A, Nor(Nor(
A, Not(
B)),
C));
1318 Res =
Xor(
A, Nor(Nor(
A,
B),
C));
1326 Res =
Xor(Nor(
A, Not(
B)),
C);
1330 Res =
Or(Nor(
A,
B), Xnor(
A,
C));
1345 Res =
Or(Nor(
A,
B),
C);
1349 Res =
Xor(Nor(Xnor(
B,
C),
A),
C);
1357 Res =
Or(Nor(
A, Not(
B)),
C);
1361 Res =
Or(
C, Not(
A));
1365 Res =
And(
A, Nand(
B, Not(
C)));
1377 Res = Nand(Nand(
A,
C),
B);
1381 Res =
Xor(
A, Nor(
C, Not(
B)));
1385 Res =
Or(Xnor(
A,
C), Nor(
B,
C));
1393 Res = Nand(
Xor(
A,
C),
B);
1397 Res =
Xor(Nor(Xnor(
A,
C),
B),
C);
1405 Res =
Or(Nor(
B, Not(
A)),
C);
1409 Res =
Or(
C, Not(
B));
1425 Res =
Or(Nand(
A,
B),
C);
1432 Res =
Xor(
A, Nor(Nor(
A, Not(
C)),
B));
1436 Res =
Xor(
A, Nor(Nor(
A,
C),
B));
1452 Res =
Xor(Nor(
A, Not(
C)),
B);
1456 Res =
Or(Xnor(
A,
B), Nor(
A,
C));
1468 Res =
Xor(
B, Nor(
A, Xnor(
B,
C)));
1479 Res =
Or(Nor(
A,
C),
B);
1483 Res =
Or(Nor(
A, Not(
C)),
B);
1487 Res =
Or(
B, Not(
A));
1499 Res =
Xor(
A, Nor(
B, Not(
C)));
1503 Res =
Or(Xnor(
A,
B), Nor(
B,
C));
1511 Res = Nand(Nand(
A,
B),
C);
1519 Res = Nand(
Xor(
A,
B),
C);
1523 Res =
Xor(Nor(Xnor(
A,
B),
C),
B);
1539 Res =
Or(
B, Nor(
C, Not(
A)));
1543 Res =
Or(
B, Not(
C));
1551 Res =
Or(Nand(
A,
C),
B);
1563 Res =
Xor(
A, Nor(Xnor(
A,
C),
B));
1571 Res =
Xor(
A, Nor(Xnor(
A,
B),
C));
1583 Res =
Or(Xnor(
A,
B), Xnor(
A,
C));
1599 Res =
Or(Xnor(
A,
B),
C);
1607 Res =
Or(Xnor(
A,
C),
B);
1614 Res = Nand(
A, Nor(
B,
C));
1621 Res =
Or(
A, Nor(
B,
C));
1625 Res =
Or(
A, Nor(
B, Not(
C)));
1629 Res =
Or(
A, Not(
B));
1633 Res =
Or(
A, Nor(
C, Not(
B)));
1637 Res =
Or(
A, Not(
C));
1645 Res =
Or(
A, Nand(
B,
C));
1653 Res =
Or(
A, Xnor(
B,
C));
1660 Res = Nand(Nor(
A,
C),
B);
1667 Res = Nand(Nor(
A,
B),
C);
1678 assert((Res.first ==
nullptr || Res.second == Imm) &&
1679 "Simplification of ternary logic does not verify!");
1685 auto *CInt = dyn_cast<ConstantInt>(
II.getArgOperand(2));
1689 auto *VecTy = cast<FixedVectorType>(
II.getType());
1690 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
1697 uint8_t Imm = CInt->getZExtValue();
1698 uint8_t ZMask = Imm & 0xf;
1699 uint8_t DestLane = (Imm >> 4) & 0x3;
1700 uint8_t SourceLane = (Imm >> 6) & 0x3;
1710 int ShuffleMask[4] = {0, 1, 2, 3};
1713 Value *V1 =
II.getArgOperand(1);
1718 if ((
II.getArgOperand(0) ==
II.getArgOperand(1)) ||
1719 (ZMask & (1 << DestLane))) {
1723 ShuffleMask[DestLane] = SourceLane;
1725 for (
unsigned i = 0; i < 4; ++i)
1726 if ((ZMask >> i) & 0x1)
1727 ShuffleMask[i] = i + 4;
1734 ShuffleMask[DestLane] = SourceLane + 4;
1745 auto LowConstantHighUndef = [&](
uint64_t Val) {
1747 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1753 auto *C0 = dyn_cast<Constant>(Op0);
1755 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1759 if (CILength && CIIndex) {
1793 for (
int i = 0; i != (int)
Length; ++i)
1795 for (
int i =
Length; i != 8; ++i)
1797 for (
int i = 8; i != 16; ++i)
1809 APInt Elt = CI0->getValue();
1816 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1817 Value *Args[] = {Op0, CILength, CIIndex};
1825 if (CI0 && CI0->isZero())
1826 return LowConstantHighUndef(0);
1870 for (
int i = 0; i != (int)
Index; ++i)
1872 for (
int i = 0; i != (int)
Length; ++i)
1876 for (
int i = 8; i != 16; ++i)
1886 auto *C0 = dyn_cast<Constant>(Op0);
1887 auto *C1 = dyn_cast<Constant>(Op1);
1889 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1892 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1897 APInt V00 = CI00->getValue();
1898 APInt V10 = CI10->getValue();
1902 APInt Val = V00 | V10;
1911 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1916 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1928 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
1932 auto *VecTy = cast<FixedVectorType>(
II.getType());
1933 unsigned NumElts = VecTy->getNumElements();
1934 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1935 "Unexpected number of elements in shuffle mask!");
1942 for (
unsigned I = 0;
I < NumElts; ++
I) {
1943 Constant *COp = V->getAggregateElement(
I);
1944 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1947 if (isa<UndefValue>(COp)) {
1952 int8_t
Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1965 auto V1 =
II.getArgOperand(0);
1973 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
1977 auto *VecTy = cast<FixedVectorType>(
II.getType());
1978 unsigned NumElts = VecTy->getNumElements();
1979 bool IsPD = VecTy->getScalarType()->isDoubleTy();
1980 unsigned NumLaneElts = IsPD ? 2 : 4;
1981 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1987 for (
unsigned I = 0;
I < NumElts; ++
I) {
1988 Constant *COp = V->getAggregateElement(
I);
1989 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1992 if (isa<UndefValue>(COp)) {
1997 APInt Index = cast<ConstantInt>(COp)->getValue();
2008 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
2010 Indexes[
I] =
Index.getZExtValue();
2013 auto V1 =
II.getArgOperand(0);
2020 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2024 auto *VecTy = cast<FixedVectorType>(
II.getType());
2025 unsigned Size = VecTy->getNumElements();
2027 "Unexpected shuffle mask size");
2032 for (
unsigned I = 0;
I <
Size; ++
I) {
2033 Constant *COp = V->getAggregateElement(
I);
2034 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2037 if (isa<UndefValue>(COp)) {
2047 auto V1 =
II.getArgOperand(0);
2051std::optional<Instruction *>
2053 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2054 unsigned DemandedWidth) {
2055 APInt UndefElts(Width, 0);
2062 case Intrinsic::x86_bmi_bextr_32:
2063 case Intrinsic::x86_bmi_bextr_64:
2064 case Intrinsic::x86_tbm_bextri_u32:
2065 case Intrinsic::x86_tbm_bextri_u64:
2067 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2071 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2077 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2078 uint64_t Result = InC->getZExtValue() >> Shift;
2081 Result &= maskTrailingOnes<uint64_t>(
Length);
2083 ConstantInt::get(
II.getType(), Result));
2090 case Intrinsic::x86_bmi_bzhi_32:
2091 case Intrinsic::x86_bmi_bzhi_64:
2093 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2095 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2103 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2104 uint64_t Result = InC->getZExtValue();
2105 Result &= maskTrailingOnes<uint64_t>(
Index);
2107 ConstantInt::get(
II.getType(), Result));
2112 case Intrinsic::x86_bmi_pext_32:
2113 case Intrinsic::x86_bmi_pext_64:
2114 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2115 if (MaskC->isNullValue()) {
2118 if (MaskC->isAllOnesValue()) {
2122 unsigned MaskIdx, MaskLen;
2123 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2127 Value *Input =
II.getArgOperand(0);
2129 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2134 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2135 uint64_t Src = SrcC->getZExtValue();
2136 uint64_t Mask = MaskC->getZExtValue();
2143 if (BitToTest & Src)
2152 ConstantInt::get(
II.getType(), Result));
2156 case Intrinsic::x86_bmi_pdep_32:
2157 case Intrinsic::x86_bmi_pdep_64:
2158 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2159 if (MaskC->isNullValue()) {
2162 if (MaskC->isAllOnesValue()) {
2166 unsigned MaskIdx, MaskLen;
2167 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2171 Value *Input =
II.getArgOperand(0);
2172 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2178 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2179 uint64_t Src = SrcC->getZExtValue();
2180 uint64_t Mask = MaskC->getZExtValue();
2187 if (BitToTest & Src)
2196 ConstantInt::get(
II.getType(), Result));
2201 case Intrinsic::x86_sse_cvtss2si:
2202 case Intrinsic::x86_sse_cvtss2si64:
2203 case Intrinsic::x86_sse_cvttss2si:
2204 case Intrinsic::x86_sse_cvttss2si64:
2205 case Intrinsic::x86_sse2_cvtsd2si:
2206 case Intrinsic::x86_sse2_cvtsd2si64:
2207 case Intrinsic::x86_sse2_cvttsd2si:
2208 case Intrinsic::x86_sse2_cvttsd2si64:
2209 case Intrinsic::x86_avx512_vcvtss2si32:
2210 case Intrinsic::x86_avx512_vcvtss2si64:
2211 case Intrinsic::x86_avx512_vcvtss2usi32:
2212 case Intrinsic::x86_avx512_vcvtss2usi64:
2213 case Intrinsic::x86_avx512_vcvtsd2si32:
2214 case Intrinsic::x86_avx512_vcvtsd2si64:
2215 case Intrinsic::x86_avx512_vcvtsd2usi32:
2216 case Intrinsic::x86_avx512_vcvtsd2usi64:
2217 case Intrinsic::x86_avx512_cvttss2si:
2218 case Intrinsic::x86_avx512_cvttss2si64:
2219 case Intrinsic::x86_avx512_cvttss2usi:
2220 case Intrinsic::x86_avx512_cvttss2usi64:
2221 case Intrinsic::x86_avx512_cvttsd2si:
2222 case Intrinsic::x86_avx512_cvttsd2si64:
2223 case Intrinsic::x86_avx512_cvttsd2usi:
2224 case Intrinsic::x86_avx512_cvttsd2usi64: {
2227 Value *Arg =
II.getArgOperand(0);
2228 unsigned VWidth = cast<FixedVectorType>(Arg->
getType())->getNumElements();
2229 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2235 case Intrinsic::x86_mmx_pmovmskb:
2236 case Intrinsic::x86_sse_movmsk_ps:
2237 case Intrinsic::x86_sse2_movmsk_pd:
2238 case Intrinsic::x86_sse2_pmovmskb_128:
2239 case Intrinsic::x86_avx_movmsk_pd_256:
2240 case Intrinsic::x86_avx_movmsk_ps_256:
2241 case Intrinsic::x86_avx2_pmovmskb:
2247 case Intrinsic::x86_sse_comieq_ss:
2248 case Intrinsic::x86_sse_comige_ss:
2249 case Intrinsic::x86_sse_comigt_ss:
2250 case Intrinsic::x86_sse_comile_ss:
2251 case Intrinsic::x86_sse_comilt_ss:
2252 case Intrinsic::x86_sse_comineq_ss:
2253 case Intrinsic::x86_sse_ucomieq_ss:
2254 case Intrinsic::x86_sse_ucomige_ss:
2255 case Intrinsic::x86_sse_ucomigt_ss:
2256 case Intrinsic::x86_sse_ucomile_ss:
2257 case Intrinsic::x86_sse_ucomilt_ss:
2258 case Intrinsic::x86_sse_ucomineq_ss:
2259 case Intrinsic::x86_sse2_comieq_sd:
2260 case Intrinsic::x86_sse2_comige_sd:
2261 case Intrinsic::x86_sse2_comigt_sd:
2262 case Intrinsic::x86_sse2_comile_sd:
2263 case Intrinsic::x86_sse2_comilt_sd:
2264 case Intrinsic::x86_sse2_comineq_sd:
2265 case Intrinsic::x86_sse2_ucomieq_sd:
2266 case Intrinsic::x86_sse2_ucomige_sd:
2267 case Intrinsic::x86_sse2_ucomigt_sd:
2268 case Intrinsic::x86_sse2_ucomile_sd:
2269 case Intrinsic::x86_sse2_ucomilt_sd:
2270 case Intrinsic::x86_sse2_ucomineq_sd:
2271 case Intrinsic::x86_avx512_vcomi_ss:
2272 case Intrinsic::x86_avx512_vcomi_sd:
2273 case Intrinsic::x86_avx512_mask_cmp_ss:
2274 case Intrinsic::x86_avx512_mask_cmp_sd: {
2277 bool MadeChange =
false;
2278 Value *Arg0 =
II.getArgOperand(0);
2279 Value *Arg1 =
II.getArgOperand(1);
2280 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2281 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2285 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2295 case Intrinsic::x86_avx512_add_ps_512:
2296 case Intrinsic::x86_avx512_div_ps_512:
2297 case Intrinsic::x86_avx512_mul_ps_512:
2298 case Intrinsic::x86_avx512_sub_ps_512:
2299 case Intrinsic::x86_avx512_add_pd_512:
2300 case Intrinsic::x86_avx512_div_pd_512:
2301 case Intrinsic::x86_avx512_mul_pd_512:
2302 case Intrinsic::x86_avx512_sub_pd_512:
2305 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2306 if (R->getValue() == 4) {
2307 Value *Arg0 =
II.getArgOperand(0);
2308 Value *Arg1 =
II.getArgOperand(1);
2314 case Intrinsic::x86_avx512_add_ps_512:
2315 case Intrinsic::x86_avx512_add_pd_512:
2318 case Intrinsic::x86_avx512_sub_ps_512:
2319 case Intrinsic::x86_avx512_sub_pd_512:
2322 case Intrinsic::x86_avx512_mul_ps_512:
2323 case Intrinsic::x86_avx512_mul_pd_512:
2326 case Intrinsic::x86_avx512_div_ps_512:
2327 case Intrinsic::x86_avx512_div_pd_512:
2337 case Intrinsic::x86_avx512_mask_add_ss_round:
2338 case Intrinsic::x86_avx512_mask_div_ss_round:
2339 case Intrinsic::x86_avx512_mask_mul_ss_round:
2340 case Intrinsic::x86_avx512_mask_sub_ss_round:
2341 case Intrinsic::x86_avx512_mask_add_sd_round:
2342 case Intrinsic::x86_avx512_mask_div_sd_round:
2343 case Intrinsic::x86_avx512_mask_mul_sd_round:
2344 case Intrinsic::x86_avx512_mask_sub_sd_round:
2347 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(4))) {
2348 if (R->getValue() == 4) {
2350 Value *Arg0 =
II.getArgOperand(0);
2351 Value *Arg1 =
II.getArgOperand(1);
2359 case Intrinsic::x86_avx512_mask_add_ss_round:
2360 case Intrinsic::x86_avx512_mask_add_sd_round:
2363 case Intrinsic::x86_avx512_mask_sub_ss_round:
2364 case Intrinsic::x86_avx512_mask_sub_sd_round:
2367 case Intrinsic::x86_avx512_mask_mul_ss_round:
2368 case Intrinsic::x86_avx512_mask_mul_sd_round:
2371 case Intrinsic::x86_avx512_mask_div_ss_round:
2372 case Intrinsic::x86_avx512_mask_div_sd_round:
2378 Value *Mask =
II.getArgOperand(3);
2379 auto *
C = dyn_cast<ConstantInt>(Mask);
2381 if (!
C || !
C->getValue()[0]) {
2385 cast<IntegerType>(Mask->getType())->
getBitWidth());
2405 case Intrinsic::x86_sse2_psrai_d:
2406 case Intrinsic::x86_sse2_psrai_w:
2407 case Intrinsic::x86_avx2_psrai_d:
2408 case Intrinsic::x86_avx2_psrai_w:
2409 case Intrinsic::x86_avx512_psrai_q_128:
2410 case Intrinsic::x86_avx512_psrai_q_256:
2411 case Intrinsic::x86_avx512_psrai_d_512:
2412 case Intrinsic::x86_avx512_psrai_q_512:
2413 case Intrinsic::x86_avx512_psrai_w_512:
2414 case Intrinsic::x86_sse2_psrli_d:
2415 case Intrinsic::x86_sse2_psrli_q:
2416 case Intrinsic::x86_sse2_psrli_w:
2417 case Intrinsic::x86_avx2_psrli_d:
2418 case Intrinsic::x86_avx2_psrli_q:
2419 case Intrinsic::x86_avx2_psrli_w:
2420 case Intrinsic::x86_avx512_psrli_d_512:
2421 case Intrinsic::x86_avx512_psrli_q_512:
2422 case Intrinsic::x86_avx512_psrli_w_512:
2423 case Intrinsic::x86_sse2_pslli_d:
2424 case Intrinsic::x86_sse2_pslli_q:
2425 case Intrinsic::x86_sse2_pslli_w:
2426 case Intrinsic::x86_avx2_pslli_d:
2427 case Intrinsic::x86_avx2_pslli_q:
2428 case Intrinsic::x86_avx2_pslli_w:
2429 case Intrinsic::x86_avx512_pslli_d_512:
2430 case Intrinsic::x86_avx512_pslli_q_512:
2431 case Intrinsic::x86_avx512_pslli_w_512:
2437 case Intrinsic::x86_sse2_psra_d:
2438 case Intrinsic::x86_sse2_psra_w:
2439 case Intrinsic::x86_avx2_psra_d:
2440 case Intrinsic::x86_avx2_psra_w:
2441 case Intrinsic::x86_avx512_psra_q_128:
2442 case Intrinsic::x86_avx512_psra_q_256:
2443 case Intrinsic::x86_avx512_psra_d_512:
2444 case Intrinsic::x86_avx512_psra_q_512:
2445 case Intrinsic::x86_avx512_psra_w_512:
2446 case Intrinsic::x86_sse2_psrl_d:
2447 case Intrinsic::x86_sse2_psrl_q:
2448 case Intrinsic::x86_sse2_psrl_w:
2449 case Intrinsic::x86_avx2_psrl_d:
2450 case Intrinsic::x86_avx2_psrl_q:
2451 case Intrinsic::x86_avx2_psrl_w:
2452 case Intrinsic::x86_avx512_psrl_d_512:
2453 case Intrinsic::x86_avx512_psrl_q_512:
2454 case Intrinsic::x86_avx512_psrl_w_512:
2455 case Intrinsic::x86_sse2_psll_d:
2456 case Intrinsic::x86_sse2_psll_q:
2457 case Intrinsic::x86_sse2_psll_w:
2458 case Intrinsic::x86_avx2_psll_d:
2459 case Intrinsic::x86_avx2_psll_q:
2460 case Intrinsic::x86_avx2_psll_w:
2461 case Intrinsic::x86_avx512_psll_d_512:
2462 case Intrinsic::x86_avx512_psll_q_512:
2463 case Intrinsic::x86_avx512_psll_w_512: {
2470 Value *Arg1 =
II.getArgOperand(1);
2472 "Unexpected packed shift size");
2473 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
2475 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2481 case Intrinsic::x86_avx2_psllv_d:
2482 case Intrinsic::x86_avx2_psllv_d_256:
2483 case Intrinsic::x86_avx2_psllv_q:
2484 case Intrinsic::x86_avx2_psllv_q_256:
2485 case Intrinsic::x86_avx512_psllv_d_512:
2486 case Intrinsic::x86_avx512_psllv_q_512:
2487 case Intrinsic::x86_avx512_psllv_w_128:
2488 case Intrinsic::x86_avx512_psllv_w_256:
2489 case Intrinsic::x86_avx512_psllv_w_512:
2490 case Intrinsic::x86_avx2_psrav_d:
2491 case Intrinsic::x86_avx2_psrav_d_256:
2492 case Intrinsic::x86_avx512_psrav_q_128:
2493 case Intrinsic::x86_avx512_psrav_q_256:
2494 case Intrinsic::x86_avx512_psrav_d_512:
2495 case Intrinsic::x86_avx512_psrav_q_512:
2496 case Intrinsic::x86_avx512_psrav_w_128:
2497 case Intrinsic::x86_avx512_psrav_w_256:
2498 case Intrinsic::x86_avx512_psrav_w_512:
2499 case Intrinsic::x86_avx2_psrlv_d:
2500 case Intrinsic::x86_avx2_psrlv_d_256:
2501 case Intrinsic::x86_avx2_psrlv_q:
2502 case Intrinsic::x86_avx2_psrlv_q_256:
2503 case Intrinsic::x86_avx512_psrlv_d_512:
2504 case Intrinsic::x86_avx512_psrlv_q_512:
2505 case Intrinsic::x86_avx512_psrlv_w_128:
2506 case Intrinsic::x86_avx512_psrlv_w_256:
2507 case Intrinsic::x86_avx512_psrlv_w_512:
2513 case Intrinsic::x86_sse2_packssdw_128:
2514 case Intrinsic::x86_sse2_packsswb_128:
2515 case Intrinsic::x86_avx2_packssdw:
2516 case Intrinsic::x86_avx2_packsswb:
2517 case Intrinsic::x86_avx512_packssdw_512:
2518 case Intrinsic::x86_avx512_packsswb_512:
2524 case Intrinsic::x86_sse2_packuswb_128:
2525 case Intrinsic::x86_sse41_packusdw:
2526 case Intrinsic::x86_avx2_packusdw:
2527 case Intrinsic::x86_avx2_packuswb:
2528 case Intrinsic::x86_avx512_packusdw_512:
2529 case Intrinsic::x86_avx512_packuswb_512:
2535 case Intrinsic::x86_sse2_pmadd_wd:
2536 case Intrinsic::x86_avx2_pmadd_wd:
2537 case Intrinsic::x86_avx512_pmaddw_d_512:
2543 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2544 case Intrinsic::x86_avx2_pmadd_ub_sw:
2545 case Intrinsic::x86_avx512_pmaddubs_w_512:
2551 case Intrinsic::x86_pclmulqdq:
2552 case Intrinsic::x86_pclmulqdq_256:
2553 case Intrinsic::x86_pclmulqdq_512: {
2554 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2555 unsigned Imm =
C->getZExtValue();
2557 bool MadeChange =
false;
2558 Value *Arg0 =
II.getArgOperand(0);
2559 Value *Arg1 =
II.getArgOperand(1);
2561 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2563 APInt UndefElts1(VWidth, 0);
2564 APInt DemandedElts1 =
2572 APInt UndefElts2(VWidth, 0);
2573 APInt DemandedElts2 =
2595 case Intrinsic::x86_sse41_insertps:
2601 case Intrinsic::x86_sse4a_extrq: {
2602 Value *Op0 =
II.getArgOperand(0);
2603 Value *Op1 =
II.getArgOperand(1);
2604 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2605 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2608 VWidth1 == 16 &&
"Unexpected operand sizes");
2611 auto *C1 = dyn_cast<Constant>(Op1);
2613 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2616 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2626 bool MadeChange =
false;
2627 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2631 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2641 case Intrinsic::x86_sse4a_extrqi: {
2644 Value *Op0 =
II.getArgOperand(0);
2645 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2647 "Unexpected operand size");
2650 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(1));
2651 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2660 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2666 case Intrinsic::x86_sse4a_insertq: {
2667 Value *Op0 =
II.getArgOperand(0);
2668 Value *Op1 =
II.getArgOperand(1);
2669 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2672 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
2673 "Unexpected operand size");
2676 auto *C1 = dyn_cast<Constant>(Op1);
2678 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2683 const APInt &V11 = CI11->getValue();
2693 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2699 case Intrinsic::x86_sse4a_insertqi: {
2703 Value *Op0 =
II.getArgOperand(0);
2704 Value *Op1 =
II.getArgOperand(1);
2705 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2706 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2709 VWidth1 == 2 &&
"Unexpected operand sizes");
2712 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2713 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(3));
2716 if (CILength && CIIndex) {
2717 APInt Len = CILength->getValue().zextOrTrunc(6);
2718 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2726 bool MadeChange =
false;
2727 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2731 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2741 case Intrinsic::x86_sse41_pblendvb:
2742 case Intrinsic::x86_sse41_blendvps:
2743 case Intrinsic::x86_sse41_blendvpd:
2744 case Intrinsic::x86_avx_blendv_ps_256:
2745 case Intrinsic::x86_avx_blendv_pd_256:
2746 case Intrinsic::x86_avx2_pblendvb: {
2748 Value *Op0 =
II.getArgOperand(0);
2749 Value *Op1 =
II.getArgOperand(1);
2750 Value *Mask =
II.getArgOperand(2);
2756 if (isa<ConstantAggregateZero>(Mask)) {
2761 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2774 auto *MaskTy = cast<FixedVectorType>(Mask->getType());
2775 auto *OpTy = cast<FixedVectorType>(
II.getType());
2776 assert(MaskTy->getPrimitiveSizeInBits() ==
2777 OpTy->getPrimitiveSizeInBits() &&
2778 "Not expecting mask and operands with different sizes");
2779 unsigned NumMaskElts = MaskTy->getNumElements();
2780 unsigned NumOperandElts = OpTy->getNumElements();
2782 if (NumMaskElts == NumOperandElts) {
2788 if (NumMaskElts < NumOperandElts) {
2799 case Intrinsic::x86_ssse3_pshuf_b_128:
2800 case Intrinsic::x86_avx2_pshuf_b:
2801 case Intrinsic::x86_avx512_pshuf_b_512:
2807 case Intrinsic::x86_avx_vpermilvar_ps:
2808 case Intrinsic::x86_avx_vpermilvar_ps_256:
2809 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2810 case Intrinsic::x86_avx_vpermilvar_pd:
2811 case Intrinsic::x86_avx_vpermilvar_pd_256:
2812 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2818 case Intrinsic::x86_avx2_permd:
2819 case Intrinsic::x86_avx2_permps:
2820 case Intrinsic::x86_avx512_permvar_df_256:
2821 case Intrinsic::x86_avx512_permvar_df_512:
2822 case Intrinsic::x86_avx512_permvar_di_256:
2823 case Intrinsic::x86_avx512_permvar_di_512:
2824 case Intrinsic::x86_avx512_permvar_hi_128:
2825 case Intrinsic::x86_avx512_permvar_hi_256:
2826 case Intrinsic::x86_avx512_permvar_hi_512:
2827 case Intrinsic::x86_avx512_permvar_qi_128:
2828 case Intrinsic::x86_avx512_permvar_qi_256:
2829 case Intrinsic::x86_avx512_permvar_qi_512:
2830 case Intrinsic::x86_avx512_permvar_sf_512:
2831 case Intrinsic::x86_avx512_permvar_si_512:
2837 case Intrinsic::x86_avx_maskload_ps:
2838 case Intrinsic::x86_avx_maskload_pd:
2839 case Intrinsic::x86_avx_maskload_ps_256:
2840 case Intrinsic::x86_avx_maskload_pd_256:
2841 case Intrinsic::x86_avx2_maskload_d:
2842 case Intrinsic::x86_avx2_maskload_q:
2843 case Intrinsic::x86_avx2_maskload_d_256:
2844 case Intrinsic::x86_avx2_maskload_q_256:
2850 case Intrinsic::x86_sse2_maskmov_dqu:
2851 case Intrinsic::x86_avx_maskstore_ps:
2852 case Intrinsic::x86_avx_maskstore_pd:
2853 case Intrinsic::x86_avx_maskstore_ps_256:
2854 case Intrinsic::x86_avx_maskstore_pd_256:
2855 case Intrinsic::x86_avx2_maskstore_d:
2856 case Intrinsic::x86_avx2_maskstore_q:
2857 case Intrinsic::x86_avx2_maskstore_d_256:
2858 case Intrinsic::x86_avx2_maskstore_q_256:
2864 case Intrinsic::x86_addcarry_32:
2865 case Intrinsic::x86_addcarry_64:
2871 case Intrinsic::x86_avx512_pternlog_d_128:
2872 case Intrinsic::x86_avx512_pternlog_d_256:
2873 case Intrinsic::x86_avx512_pternlog_d_512:
2874 case Intrinsic::x86_avx512_pternlog_q_128:
2875 case Intrinsic::x86_avx512_pternlog_q_256:
2876 case Intrinsic::x86_avx512_pternlog_q_512:
2884 return std::nullopt;
2889 bool &KnownBitsComputed)
const {
2890 switch (
II.getIntrinsicID()) {
2893 case Intrinsic::x86_mmx_pmovmskb:
2894 case Intrinsic::x86_sse_movmsk_ps:
2895 case Intrinsic::x86_sse2_movmsk_pd:
2896 case Intrinsic::x86_sse2_pmovmskb_128:
2897 case Intrinsic::x86_avx_movmsk_ps_256:
2898 case Intrinsic::x86_avx_movmsk_pd_256:
2899 case Intrinsic::x86_avx2_pmovmskb: {
2903 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
2906 auto *ArgType = cast<FixedVectorType>(
II.getArgOperand(0)->getType());
2907 ArgWidth = ArgType->getNumElements();
2913 Type *VTy =
II.getType();
2914 if (DemandedElts.
isZero()) {
2920 KnownBitsComputed =
true;
2924 return std::nullopt;
2931 simplifyAndSetOp)
const {
2932 unsigned VWidth = cast<FixedVectorType>(
II.getType())->getNumElements();
2933 switch (
II.getIntrinsicID()) {
2936 case Intrinsic::x86_xop_vfrcz_ss:
2937 case Intrinsic::x86_xop_vfrcz_sd:
2942 if (!DemandedElts[0]) {
2949 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2952 UndefElts = UndefElts[0];
2956 case Intrinsic::x86_sse_rcp_ss:
2957 case Intrinsic::x86_sse_rsqrt_ss:
2958 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2961 if (!DemandedElts[0]) {
2963 return II.getArgOperand(0);
2972 case Intrinsic::x86_sse_min_ss:
2973 case Intrinsic::x86_sse_max_ss:
2974 case Intrinsic::x86_sse_cmp_ss:
2975 case Intrinsic::x86_sse2_min_sd:
2976 case Intrinsic::x86_sse2_max_sd:
2977 case Intrinsic::x86_sse2_cmp_sd: {
2978 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2981 if (!DemandedElts[0]) {
2983 return II.getArgOperand(0);
2988 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3000 case Intrinsic::x86_sse41_round_ss:
3001 case Intrinsic::x86_sse41_round_sd: {
3003 APInt DemandedElts2 = DemandedElts;
3005 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3008 if (!DemandedElts[0]) {
3010 return II.getArgOperand(0);
3015 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3020 UndefElts |= UndefElts2[0];
3027 case Intrinsic::x86_avx512_mask_add_ss_round:
3028 case Intrinsic::x86_avx512_mask_div_ss_round:
3029 case Intrinsic::x86_avx512_mask_mul_ss_round:
3030 case Intrinsic::x86_avx512_mask_sub_ss_round:
3031 case Intrinsic::x86_avx512_mask_max_ss_round:
3032 case Intrinsic::x86_avx512_mask_min_ss_round:
3033 case Intrinsic::x86_avx512_mask_add_sd_round:
3034 case Intrinsic::x86_avx512_mask_div_sd_round:
3035 case Intrinsic::x86_avx512_mask_mul_sd_round:
3036 case Intrinsic::x86_avx512_mask_sub_sd_round:
3037 case Intrinsic::x86_avx512_mask_max_sd_round:
3038 case Intrinsic::x86_avx512_mask_min_sd_round:
3039 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3042 if (!DemandedElts[0]) {
3044 return II.getArgOperand(0);
3049 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3050 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3054 if (!UndefElts2[0] || !UndefElts3[0])
3059 case Intrinsic::x86_sse3_addsub_pd:
3060 case Intrinsic::x86_sse3_addsub_ps:
3061 case Intrinsic::x86_avx_addsub_pd_256:
3062 case Intrinsic::x86_avx_addsub_ps_256: {
3067 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3068 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3069 if (IsSubOnly || IsAddOnly) {
3070 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3073 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3075 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3078 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3079 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3080 UndefElts &= UndefElts2;
3085 case Intrinsic::x86_avx2_psllv_d:
3086 case Intrinsic::x86_avx2_psllv_d_256:
3087 case Intrinsic::x86_avx2_psllv_q:
3088 case Intrinsic::x86_avx2_psllv_q_256:
3089 case Intrinsic::x86_avx2_psrlv_d:
3090 case Intrinsic::x86_avx2_psrlv_d_256:
3091 case Intrinsic::x86_avx2_psrlv_q:
3092 case Intrinsic::x86_avx2_psrlv_q_256:
3093 case Intrinsic::x86_avx2_psrav_d:
3094 case Intrinsic::x86_avx2_psrav_d_256: {
3095 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3096 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3097 UndefElts &= UndefElts2;
3101 case Intrinsic::x86_sse2_packssdw_128:
3102 case Intrinsic::x86_sse2_packsswb_128:
3103 case Intrinsic::x86_sse2_packuswb_128:
3104 case Intrinsic::x86_sse41_packusdw:
3105 case Intrinsic::x86_avx2_packssdw:
3106 case Intrinsic::x86_avx2_packsswb:
3107 case Intrinsic::x86_avx2_packusdw:
3108 case Intrinsic::x86_avx2_packuswb:
3109 case Intrinsic::x86_avx512_packssdw_512:
3110 case Intrinsic::x86_avx512_packsswb_512:
3111 case Intrinsic::x86_avx512_packusdw_512:
3112 case Intrinsic::x86_avx512_packuswb_512: {
3113 auto *Ty0 =
II.getArgOperand(0)->getType();
3114 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3115 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3117 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3118 unsigned VWidthPerLane = VWidth / NumLanes;
3119 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3125 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3126 APInt OpDemandedElts(InnerVWidth, 0);
3127 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3128 unsigned LaneIdx = Lane * VWidthPerLane;
3129 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3130 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3131 if (DemandedElts[
Idx])
3132 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3137 APInt OpUndefElts(InnerVWidth, 0);
3138 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3141 OpUndefElts = OpUndefElts.
zext(VWidth);
3142 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3143 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3144 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3145 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3146 UndefElts |= LaneElts;
3152 case Intrinsic::x86_sse2_pmadd_wd:
3153 case Intrinsic::x86_avx2_pmadd_wd:
3154 case Intrinsic::x86_avx512_pmaddw_d_512:
3155 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3156 case Intrinsic::x86_avx2_pmadd_ub_sw:
3157 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3159 auto *ArgTy =
II.getArgOperand(0)->getType();
3160 unsigned InnerVWidth = cast<FixedVectorType>(ArgTy)->getNumElements();
3161 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3163 APInt Op0UndefElts(InnerVWidth, 0);
3164 APInt Op1UndefElts(InnerVWidth, 0);
3165 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3166 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3171 case Intrinsic::x86_ssse3_pshuf_b_128:
3172 case Intrinsic::x86_avx2_pshuf_b:
3173 case Intrinsic::x86_avx512_pshuf_b_512:
3175 case Intrinsic::x86_avx_vpermilvar_ps:
3176 case Intrinsic::x86_avx_vpermilvar_ps_256:
3177 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3178 case Intrinsic::x86_avx_vpermilvar_pd:
3179 case Intrinsic::x86_avx_vpermilvar_pd_256:
3180 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3182 case Intrinsic::x86_avx2_permd:
3183 case Intrinsic::x86_avx2_permps: {
3184 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3190 case Intrinsic::x86_sse4a_extrq:
3191 case Intrinsic::x86_sse4a_extrqi:
3192 case Intrinsic::x86_sse4a_insertq:
3193 case Intrinsic::x86_sse4a_insertqi:
3197 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86pmadd(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsPMADDWD)
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.