18#include "llvm/IR/IntrinsicsX86.h"
26#define DEBUG_TYPE "x86tti"
31 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
35 assert(V &&
"Vector must be foldable");
43 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
64 if (isa<ConstantAggregateZero>(Mask))
72 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
73 PointerType *VecPtrTy = PointerType::get(
II.getType(), AddrSpace);
78 II.getType(), PtrCast,
Align(1), BoolMask, ZeroVec);
94 if (isa<ConstantAggregateZero>(Mask)) {
101 if (
II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
107 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
123 bool LogicalShift =
false;
124 bool ShiftLeft =
false;
127 switch (
II.getIntrinsicID()) {
130 case Intrinsic::x86_sse2_psrai_d:
131 case Intrinsic::x86_sse2_psrai_w:
132 case Intrinsic::x86_avx2_psrai_d:
133 case Intrinsic::x86_avx2_psrai_w:
134 case Intrinsic::x86_avx512_psrai_q_128:
135 case Intrinsic::x86_avx512_psrai_q_256:
136 case Intrinsic::x86_avx512_psrai_d_512:
137 case Intrinsic::x86_avx512_psrai_q_512:
138 case Intrinsic::x86_avx512_psrai_w_512:
141 case Intrinsic::x86_sse2_psra_d:
142 case Intrinsic::x86_sse2_psra_w:
143 case Intrinsic::x86_avx2_psra_d:
144 case Intrinsic::x86_avx2_psra_w:
145 case Intrinsic::x86_avx512_psra_q_128:
146 case Intrinsic::x86_avx512_psra_q_256:
147 case Intrinsic::x86_avx512_psra_d_512:
148 case Intrinsic::x86_avx512_psra_q_512:
149 case Intrinsic::x86_avx512_psra_w_512:
150 LogicalShift =
false;
153 case Intrinsic::x86_sse2_psrli_d:
154 case Intrinsic::x86_sse2_psrli_q:
155 case Intrinsic::x86_sse2_psrli_w:
156 case Intrinsic::x86_avx2_psrli_d:
157 case Intrinsic::x86_avx2_psrli_q:
158 case Intrinsic::x86_avx2_psrli_w:
159 case Intrinsic::x86_avx512_psrli_d_512:
160 case Intrinsic::x86_avx512_psrli_q_512:
161 case Intrinsic::x86_avx512_psrli_w_512:
164 case Intrinsic::x86_sse2_psrl_d:
165 case Intrinsic::x86_sse2_psrl_q:
166 case Intrinsic::x86_sse2_psrl_w:
167 case Intrinsic::x86_avx2_psrl_d:
168 case Intrinsic::x86_avx2_psrl_q:
169 case Intrinsic::x86_avx2_psrl_w:
170 case Intrinsic::x86_avx512_psrl_d_512:
171 case Intrinsic::x86_avx512_psrl_q_512:
172 case Intrinsic::x86_avx512_psrl_w_512:
176 case Intrinsic::x86_sse2_pslli_d:
177 case Intrinsic::x86_sse2_pslli_q:
178 case Intrinsic::x86_sse2_pslli_w:
179 case Intrinsic::x86_avx2_pslli_d:
180 case Intrinsic::x86_avx2_pslli_q:
181 case Intrinsic::x86_avx2_pslli_w:
182 case Intrinsic::x86_avx512_pslli_d_512:
183 case Intrinsic::x86_avx512_pslli_q_512:
184 case Intrinsic::x86_avx512_pslli_w_512:
187 case Intrinsic::x86_sse2_psll_d:
188 case Intrinsic::x86_sse2_psll_q:
189 case Intrinsic::x86_sse2_psll_w:
190 case Intrinsic::x86_avx2_psll_d:
191 case Intrinsic::x86_avx2_psll_q:
192 case Intrinsic::x86_avx2_psll_w:
193 case Intrinsic::x86_avx512_psll_d_512:
194 case Intrinsic::x86_avx512_psll_q_512:
195 case Intrinsic::x86_avx512_psll_w_512:
200 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
202 Value *Vec =
II.getArgOperand(0);
203 Value *Amt =
II.getArgOperand(1);
204 auto *VT = cast<FixedVectorType>(Vec->
getType());
205 Type *SVT = VT->getElementType();
207 unsigned VWidth = VT->getNumElements();
220 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
227 Amt = ConstantInt::get(SVT,
BitWidth - 1);
234 cast<VectorType>(AmtVT)->getElementType() == SVT &&
235 "Unexpected shift-by-scalar type");
236 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
240 Amt, DemandedLower,
II.getDataLayout());
242 Amt, DemandedUpper,
II.getDataLayout());
247 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
254 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
261 cast<VectorType>(AmtVT)->getElementType() == SVT &&
262 "Unexpected shift-by-scalar type");
266 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
267 unsigned SubEltIdx = (NumSubElts - 1) - i;
268 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
305 bool LogicalShift =
false;
306 bool ShiftLeft =
false;
308 switch (
II.getIntrinsicID()) {
311 case Intrinsic::x86_avx2_psrav_d:
312 case Intrinsic::x86_avx2_psrav_d_256:
313 case Intrinsic::x86_avx512_psrav_q_128:
314 case Intrinsic::x86_avx512_psrav_q_256:
315 case Intrinsic::x86_avx512_psrav_d_512:
316 case Intrinsic::x86_avx512_psrav_q_512:
317 case Intrinsic::x86_avx512_psrav_w_128:
318 case Intrinsic::x86_avx512_psrav_w_256:
319 case Intrinsic::x86_avx512_psrav_w_512:
320 LogicalShift =
false;
323 case Intrinsic::x86_avx2_psrlv_d:
324 case Intrinsic::x86_avx2_psrlv_d_256:
325 case Intrinsic::x86_avx2_psrlv_q:
326 case Intrinsic::x86_avx2_psrlv_q_256:
327 case Intrinsic::x86_avx512_psrlv_d_512:
328 case Intrinsic::x86_avx512_psrlv_q_512:
329 case Intrinsic::x86_avx512_psrlv_w_128:
330 case Intrinsic::x86_avx512_psrlv_w_256:
331 case Intrinsic::x86_avx512_psrlv_w_512:
335 case Intrinsic::x86_avx2_psllv_d:
336 case Intrinsic::x86_avx2_psllv_d_256:
337 case Intrinsic::x86_avx2_psllv_q:
338 case Intrinsic::x86_avx2_psllv_q_256:
339 case Intrinsic::x86_avx512_psllv_d_512:
340 case Intrinsic::x86_avx512_psllv_q_512:
341 case Intrinsic::x86_avx512_psllv_w_128:
342 case Intrinsic::x86_avx512_psllv_w_256:
343 case Intrinsic::x86_avx512_psllv_w_512:
348 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
350 Value *Vec =
II.getArgOperand(0);
351 Value *Amt =
II.getArgOperand(1);
352 auto *VT = cast<FixedVectorType>(
II.getType());
353 Type *SVT = VT->getElementType();
354 int NumElts = VT->getNumElements();
362 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
368 auto *CShift = dyn_cast<Constant>(Amt);
374 bool AnyOutOfRange =
false;
376 for (
int I = 0;
I < NumElts; ++
I) {
377 auto *CElt = CShift->getAggregateElement(
I);
378 if (isa_and_nonnull<UndefValue>(CElt)) {
383 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
390 APInt ShiftVal = COp->getValue();
392 AnyOutOfRange = LogicalShift;
405 for (
int Idx : ShiftAmts) {
409 assert(LogicalShift &&
"Logical shift expected");
410 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
422 for (
int Idx : ShiftAmts) {
441 Value *Arg0 =
II.getArgOperand(0);
442 Value *Arg1 =
II.getArgOperand(1);
443 Type *ResTy =
II.getType();
446 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
449 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
451 unsigned NumSrcElts = ArgTy->getNumElements();
453 "Unexpected packing types");
455 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
457 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
458 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
459 "Unexpected packing types");
462 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
467 APInt MinValue, MaxValue;
493 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
494 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
496 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
497 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
508 Value *Arg0 =
II.getArgOperand(0);
509 Value *Arg1 =
II.getArgOperand(1);
510 auto *ResTy = cast<FixedVectorType>(
II.getType());
511 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
512 assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&
513 "Unexpected PMULH types");
514 assert((!IsRounding || IsSigned) &&
"PMULHRS instruction must be signed");
517 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
521 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
527 return IsSigned ? Builder.
CreateAShr(Arg1, 15)
530 return IsSigned ? Builder.
CreateAShr(Arg0, 15)
535 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
540 IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
566 Value *Arg0 =
II.getArgOperand(0);
567 Value *Arg1 =
II.getArgOperand(1);
568 auto *ResTy = cast<FixedVectorType>(
II.getType());
569 [[maybe_unused]]
auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
571 unsigned NumDstElts = ResTy->getNumElements();
572 assert(ArgTy->getNumElements() == (2 * NumDstElts) &&
573 ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
574 "Unexpected PMADD types");
577 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
581 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
585 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
594 for (
unsigned I = 0;
I != NumDstElts; ++
I) {
605 IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
606 LHSLo = Builder.
CreateCast(LHSCast, LHSLo, ResTy);
607 LHSHi = Builder.
CreateCast(LHSCast, LHSHi, ResTy);
608 RHSLo = Builder.
CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);
609 RHSHi = Builder.
CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);
619 Value *Arg =
II.getArgOperand(0);
620 Type *ResTy =
II.getType();
623 if (isa<UndefValue>(Arg))
628 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb)
631 auto *ArgTy = cast<FixedVectorType>(Arg->
getType());
638 unsigned NumElts = ArgTy->getNumElements();
650 Value *CarryIn =
II.getArgOperand(0);
651 Value *Op1 =
II.getArgOperand(1);
652 Value *Op2 =
II.getArgOperand(2);
655 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
656 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
657 "Unexpected types for x86 addcarry");
678 auto *ArgImm = dyn_cast<ConstantInt>(
II.getArgOperand(3));
679 if (!ArgImm || ArgImm->getValue().uge(256))
682 Value *ArgA =
II.getArgOperand(0);
683 Value *ArgB =
II.getArgOperand(1);
684 Value *ArgC =
II.getArgOperand(2);
688 auto Or = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
689 return {Builder.
CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
691 auto Xor = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
692 return {Builder.
CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
694 auto And = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
695 return {Builder.
CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
697 auto Not = [&](
auto V) -> std::pair<Value *, uint8_t> {
698 return {Builder.
CreateNot(V.first), ~V.second};
700 auto Nor = [&](
auto Lhs,
auto Rhs) {
return Not(
Or(Lhs, Rhs)); };
701 auto Xnor = [&](
auto Lhs,
auto Rhs) {
return Not(
Xor(Lhs, Rhs)); };
702 auto Nand = [&](
auto Lhs,
auto Rhs) {
return Not(
And(Lhs, Rhs)); };
708 bool ABIsConst = AIsConst && BIsConst;
709 bool ACIsConst = AIsConst && CIsConst;
710 bool BCIsConst = BIsConst && CIsConst;
711 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
717 std::pair<Value *, uint8_t>
A = {ArgA, 0xf0};
718 std::pair<Value *, uint8_t>
B = {ArgB, 0xcc};
719 std::pair<Value *, uint8_t>
C = {ArgC, 0xaa};
720 std::pair<Value *, uint8_t> Res = {
nullptr, 0};
727 uint8_t Imm = ArgImm->getValue().getZExtValue();
734 Res = Nor(
Or(
A,
B),
C);
754 Res = Nor(
A, Xnor(
B,
C));
762 Res = Nor(
A, Nand(
B,
C));
770 Res = Nor(
A, Not(
C));
774 Res = Nor(
A, Nor(
C, Not(
B)));
778 Res = Nor(
A, Not(
B));
782 Res = Nor(
A, Nor(
B, Not(
C)));
786 Res = Nor(
A, Nor(
B,
C));
801 Res = Nor(Xnor(
A,
C),
B);
809 Res = Nor(Xnor(
A,
B),
C);
825 Res = Nor(Xnor(
A,
B), Xnor(
A,
C));
829 Res =
And(Nand(
A,
B), Xnor(
B,
C));
853 Res = Nand(
A,
Or(
B,
C));
857 Res = Nor(Nand(
A,
C),
B);
865 Res = Nor(
B, Not(
C));
869 Res = Nor(
B, Nor(
C, Not(
A)));
873 Res = Nor(Xnor(
A,
B),
Xor(
A,
C));
877 Res =
Xor(
A, Nand(Nand(
A,
B),
C));
905 Res = Nor(Xnor(
A,
B), Nor(
B,
C));
917 Res = Nand(
A,
Or(
B, Not(
C)));
921 Res = Nor(
B, Not(
A));
925 Res = Nor(Nor(
A, Not(
C)),
B);
929 Res = Nor(Nor(
A,
C),
B);
948 Res = Nand(
Or(
A,
C),
B);
952 Res = Nor(Xnor(
A,
B), Nor(
A,
C));
964 Res = Nand(
Or(
A, Not(
C)),
B);
983 Res = Nor(Nand(
A,
B),
C);
991 Res = Nor(
Xor(
A,
B), Xnor(
A,
C));
995 Res =
Xor(
A, Nand(Nand(
A,
C),
B));
999 Res = Nor(
C, Not(
B));
1003 Res = Nor(Nor(
B, Not(
A)),
C);
1023 Res = Nor(Xnor(
A,
C), Nor(
B,
C));
1031 Res =
And(Nand(
A,
C),
B);
1043 Res = Nand(
A, Nand(
B, Not(
C)));
1047 Res = Nor(
C, Not(
A));
1051 Res = Nor(Nor(
A, Not(
B)),
C);
1063 Res = Nor(Nor(
A,
B),
C);
1074 Res = Nand(
Or(
A,
B),
C);
1078 Res = Nor(Nor(
A,
B), Xnor(
A,
C));
1097 Res = Nand(
Or(
A, Not(
B)),
C);
1117 Res = Nor(Nor(
A,
C), Xnor(
B,
C));
1125 Res = Nor(Nor(
A,
B), Xnor(
B,
C));
1144 Res =
Xor(Xnor(
A,
B),
C);
1168 Res = Nand(
A, Xnor(
B,
C));
1172 Res =
And(
A, Nand(
B,
C));
1184 Res = Nand(Nand(
A, Not(
C)),
B);
1192 Res = Nand(Nand(
A, Not(
B)),
C);
1216 Res = Nand(Xnor(
A,
C),
B);
1224 Res = Nand(Xnor(
A,
B),
C);
1232 Res = Nand(
And(
A,
B),
C);
1244 Res =
And(Xnor(
A,
B),
C);
1248 Res = Nor(
Xor(
A,
B), Nor(
C, Not(
A)));
1252 Res =
And(Xnor(
A,
C),
B);
1256 Res = Nor(
Xor(
A,
C), Nor(
B, Not(
A)));
1260 Res =
Xor(Nor(Xnor(
A,
B), Nor(
B,
C)),
C);
1264 Res =
Xor(
A, Nand(
B,
C));
1271 Res =
Xor(
B, Nor(Nor(
B, Not(
A)),
C));
1275 Res =
And(Nand(
A, Not(
B)),
C);
1283 Res =
And(Nand(
A, Not(
C)),
B);
1295 Res = Nand(
A, Nand(
B,
C));
1299 Res =
And(
A, Xnor(
B,
C));
1303 Res = Nor(Nor(
A, Not(
B)),
Xor(
B,
C));
1307 Res =
Xor(Nor(Xnor(
A,
B), Nor(
A,
C)),
C);
1311 Res =
Xor(Nand(
A,
C),
B);
1315 Res = Nor(Nor(
A,
B),
Xor(Xnor(
A,
B),
C));
1319 Res =
Xor(Nand(
A,
B),
C);
1331 Res = Nor(Nor(
A,
B),
Xor(
B,
C));
1339 Res =
Xor(Nor(
B, Not(
A)),
C);
1343 Res =
Or(Nor(
A,
B), Xnor(
B,
C));
1347 Res =
Xor(
B, Nor(
C, Not(
A)));
1351 Res =
Or(Nor(
A,
C), Xnor(
B,
C));
1359 Res = Nand(
A,
Xor(
B,
C));
1366 Res =
Xor(
A, Nor(Nor(
A, Not(
B)),
C));
1378 Res =
Xor(
A, Nor(Nor(
A,
B),
C));
1386 Res =
Xor(Nor(
A, Not(
B)),
C);
1390 Res =
Or(Nor(
A,
B), Xnor(
A,
C));
1405 Res =
Or(Nor(
A,
B),
C);
1409 Res =
Xor(Nor(Xnor(
B,
C),
A),
C);
1417 Res =
Or(Nor(
A, Not(
B)),
C);
1421 Res =
Or(
C, Not(
A));
1425 Res =
And(
A, Nand(
B, Not(
C)));
1437 Res = Nand(Nand(
A,
C),
B);
1441 Res =
Xor(
A, Nor(
C, Not(
B)));
1445 Res =
Or(Xnor(
A,
C), Nor(
B,
C));
1453 Res = Nand(
Xor(
A,
C),
B);
1457 Res =
Xor(Nor(Xnor(
A,
C),
B),
C);
1465 Res =
Or(Nor(
B, Not(
A)),
C);
1469 Res =
Or(
C, Not(
B));
1485 Res =
Or(Nand(
A,
B),
C);
1492 Res =
Xor(
A, Nor(Nor(
A, Not(
C)),
B));
1496 Res =
Xor(
A, Nor(Nor(
A,
C),
B));
1512 Res =
Xor(Nor(
A, Not(
C)),
B);
1516 Res =
Or(Xnor(
A,
B), Nor(
A,
C));
1528 Res =
Xor(
B, Nor(
A, Xnor(
B,
C)));
1539 Res =
Or(Nor(
A,
C),
B);
1543 Res =
Or(Nor(
A, Not(
C)),
B);
1547 Res =
Or(
B, Not(
A));
1559 Res =
Xor(
A, Nor(
B, Not(
C)));
1563 Res =
Or(Xnor(
A,
B), Nor(
B,
C));
1571 Res = Nand(Nand(
A,
B),
C);
1579 Res = Nand(
Xor(
A,
B),
C);
1583 Res =
Xor(Nor(Xnor(
A,
B),
C),
B);
1599 Res =
Or(
B, Nor(
C, Not(
A)));
1603 Res =
Or(
B, Not(
C));
1611 Res =
Or(Nand(
A,
C),
B);
1623 Res =
Xor(
A, Nor(Xnor(
A,
C),
B));
1631 Res =
Xor(
A, Nor(Xnor(
A,
B),
C));
1643 Res =
Or(Xnor(
A,
B), Xnor(
A,
C));
1659 Res =
Or(Xnor(
A,
B),
C);
1667 Res =
Or(Xnor(
A,
C),
B);
1674 Res = Nand(
A, Nor(
B,
C));
1681 Res =
Or(
A, Nor(
B,
C));
1685 Res =
Or(
A, Nor(
B, Not(
C)));
1689 Res =
Or(
A, Not(
B));
1693 Res =
Or(
A, Nor(
C, Not(
B)));
1697 Res =
Or(
A, Not(
C));
1705 Res =
Or(
A, Nand(
B,
C));
1713 Res =
Or(
A, Xnor(
B,
C));
1720 Res = Nand(Nor(
A,
C),
B);
1727 Res = Nand(Nor(
A,
B),
C);
1738 assert((Res.first ==
nullptr || Res.second == Imm) &&
1739 "Simplification of ternary logic does not verify!");
1745 auto *CInt = dyn_cast<ConstantInt>(
II.getArgOperand(2));
1749 auto *VecTy = cast<FixedVectorType>(
II.getType());
1750 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
1757 uint8_t Imm = CInt->getZExtValue();
1758 uint8_t ZMask = Imm & 0xf;
1759 uint8_t DestLane = (Imm >> 4) & 0x3;
1760 uint8_t SourceLane = (Imm >> 6) & 0x3;
1770 int ShuffleMask[4] = {0, 1, 2, 3};
1773 Value *V1 =
II.getArgOperand(1);
1778 if ((
II.getArgOperand(0) ==
II.getArgOperand(1)) ||
1779 (ZMask & (1 << DestLane))) {
1783 ShuffleMask[DestLane] = SourceLane;
1785 for (
unsigned i = 0; i < 4; ++i)
1786 if ((ZMask >> i) & 0x1)
1787 ShuffleMask[i] = i + 4;
1794 ShuffleMask[DestLane] = SourceLane + 4;
1805 auto LowConstantHighUndef = [&](
uint64_t Val) {
1807 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1813 auto *C0 = dyn_cast<Constant>(Op0);
1815 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1819 if (CILength && CIIndex) {
1853 for (
int i = 0; i != (int)
Length; ++i)
1855 for (
int i =
Length; i != 8; ++i)
1857 for (
int i = 8; i != 16; ++i)
1869 APInt Elt = CI0->getValue();
1876 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1877 Value *Args[] = {Op0, CILength, CIIndex};
1885 if (CI0 && CI0->isZero())
1886 return LowConstantHighUndef(0);
1930 for (
int i = 0; i != (int)
Index; ++i)
1932 for (
int i = 0; i != (int)
Length; ++i)
1936 for (
int i = 8; i != 16; ++i)
1946 auto *C0 = dyn_cast<Constant>(Op0);
1947 auto *C1 = dyn_cast<Constant>(Op1);
1949 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1952 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1957 APInt V00 = CI00->getValue();
1958 APInt V10 = CI10->getValue();
1962 APInt Val = V00 | V10;
1971 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1976 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1988 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
1992 auto *VecTy = cast<FixedVectorType>(
II.getType());
1993 unsigned NumElts = VecTy->getNumElements();
1994 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1995 "Unexpected number of elements in shuffle mask!");
2002 for (
unsigned I = 0;
I < NumElts; ++
I) {
2003 Constant *COp = V->getAggregateElement(
I);
2004 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2007 if (isa<UndefValue>(COp)) {
2012 int8_t
Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
2025 auto V1 =
II.getArgOperand(0);
2033 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2037 auto *VecTy = cast<FixedVectorType>(
II.getType());
2038 unsigned NumElts = VecTy->getNumElements();
2039 bool IsPD = VecTy->getScalarType()->isDoubleTy();
2040 unsigned NumLaneElts = IsPD ? 2 : 4;
2041 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
2047 for (
unsigned I = 0;
I < NumElts; ++
I) {
2048 Constant *COp = V->getAggregateElement(
I);
2049 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2052 if (isa<UndefValue>(COp)) {
2057 APInt Index = cast<ConstantInt>(COp)->getValue();
2068 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
2070 Indexes[
I] =
Index.getZExtValue();
2073 auto V1 =
II.getArgOperand(0);
2080 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2084 auto *VecTy = cast<FixedVectorType>(
II.getType());
2085 unsigned Size = VecTy->getNumElements();
2087 "Unexpected shuffle mask size");
2092 for (
unsigned I = 0;
I <
Size; ++
I) {
2093 Constant *COp = V->getAggregateElement(
I);
2094 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2097 if (isa<UndefValue>(COp)) {
2107 auto V1 =
II.getArgOperand(0);
2114 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2118 auto *VecTy = cast<FixedVectorType>(
II.getType());
2119 unsigned Size = VecTy->getNumElements();
2122 "Unexpected shuffle mask size");
2127 for (
unsigned I = 0;
I <
Size; ++
I) {
2128 Constant *COp = V->getAggregateElement(
I);
2129 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2132 if (isa<UndefValue>(COp)) {
2142 auto V1 =
II.getArgOperand(0);
2143 auto V2 =
II.getArgOperand(2);
2147std::optional<Instruction *>
2149 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2150 unsigned DemandedWidth) {
2151 APInt UndefElts(Width, 0);
2158 case Intrinsic::x86_bmi_bextr_32:
2159 case Intrinsic::x86_bmi_bextr_64:
2160 case Intrinsic::x86_tbm_bextri_u32:
2161 case Intrinsic::x86_tbm_bextri_u64:
2163 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2167 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2173 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2174 uint64_t Result = InC->getZExtValue() >> Shift;
2177 Result &= maskTrailingOnes<uint64_t>(
Length);
2179 ConstantInt::get(
II.getType(), Result));
2186 case Intrinsic::x86_bmi_bzhi_32:
2187 case Intrinsic::x86_bmi_bzhi_64:
2189 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2191 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2199 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2200 uint64_t Result = InC->getZExtValue();
2201 Result &= maskTrailingOnes<uint64_t>(
Index);
2203 ConstantInt::get(
II.getType(), Result));
2208 case Intrinsic::x86_bmi_pext_32:
2209 case Intrinsic::x86_bmi_pext_64:
2210 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2211 if (MaskC->isNullValue()) {
2214 if (MaskC->isAllOnesValue()) {
2218 unsigned MaskIdx, MaskLen;
2219 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2223 Value *Input =
II.getArgOperand(0);
2225 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2230 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2231 uint64_t Src = SrcC->getZExtValue();
2232 uint64_t Mask = MaskC->getZExtValue();
2239 if (BitToTest & Src)
2248 ConstantInt::get(
II.getType(), Result));
2252 case Intrinsic::x86_bmi_pdep_32:
2253 case Intrinsic::x86_bmi_pdep_64:
2254 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2255 if (MaskC->isNullValue()) {
2258 if (MaskC->isAllOnesValue()) {
2262 unsigned MaskIdx, MaskLen;
2263 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2267 Value *Input =
II.getArgOperand(0);
2268 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2274 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2275 uint64_t Src = SrcC->getZExtValue();
2276 uint64_t Mask = MaskC->getZExtValue();
2283 if (BitToTest & Src)
2292 ConstantInt::get(
II.getType(), Result));
2297 case Intrinsic::x86_sse_cvtss2si:
2298 case Intrinsic::x86_sse_cvtss2si64:
2299 case Intrinsic::x86_sse_cvttss2si:
2300 case Intrinsic::x86_sse_cvttss2si64:
2301 case Intrinsic::x86_sse2_cvtsd2si:
2302 case Intrinsic::x86_sse2_cvtsd2si64:
2303 case Intrinsic::x86_sse2_cvttsd2si:
2304 case Intrinsic::x86_sse2_cvttsd2si64:
2305 case Intrinsic::x86_avx512_vcvtss2si32:
2306 case Intrinsic::x86_avx512_vcvtss2si64:
2307 case Intrinsic::x86_avx512_vcvtss2usi32:
2308 case Intrinsic::x86_avx512_vcvtss2usi64:
2309 case Intrinsic::x86_avx512_vcvtsd2si32:
2310 case Intrinsic::x86_avx512_vcvtsd2si64:
2311 case Intrinsic::x86_avx512_vcvtsd2usi32:
2312 case Intrinsic::x86_avx512_vcvtsd2usi64:
2313 case Intrinsic::x86_avx512_cvttss2si:
2314 case Intrinsic::x86_avx512_cvttss2si64:
2315 case Intrinsic::x86_avx512_cvttss2usi:
2316 case Intrinsic::x86_avx512_cvttss2usi64:
2317 case Intrinsic::x86_avx512_cvttsd2si:
2318 case Intrinsic::x86_avx512_cvttsd2si64:
2319 case Intrinsic::x86_avx512_cvttsd2usi:
2320 case Intrinsic::x86_avx512_cvttsd2usi64: {
2323 Value *Arg =
II.getArgOperand(0);
2324 unsigned VWidth = cast<FixedVectorType>(Arg->
getType())->getNumElements();
2325 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2331 case Intrinsic::x86_mmx_pmovmskb:
2332 case Intrinsic::x86_sse_movmsk_ps:
2333 case Intrinsic::x86_sse2_movmsk_pd:
2334 case Intrinsic::x86_sse2_pmovmskb_128:
2335 case Intrinsic::x86_avx_movmsk_pd_256:
2336 case Intrinsic::x86_avx_movmsk_ps_256:
2337 case Intrinsic::x86_avx2_pmovmskb:
2343 case Intrinsic::x86_sse_comieq_ss:
2344 case Intrinsic::x86_sse_comige_ss:
2345 case Intrinsic::x86_sse_comigt_ss:
2346 case Intrinsic::x86_sse_comile_ss:
2347 case Intrinsic::x86_sse_comilt_ss:
2348 case Intrinsic::x86_sse_comineq_ss:
2349 case Intrinsic::x86_sse_ucomieq_ss:
2350 case Intrinsic::x86_sse_ucomige_ss:
2351 case Intrinsic::x86_sse_ucomigt_ss:
2352 case Intrinsic::x86_sse_ucomile_ss:
2353 case Intrinsic::x86_sse_ucomilt_ss:
2354 case Intrinsic::x86_sse_ucomineq_ss:
2355 case Intrinsic::x86_sse2_comieq_sd:
2356 case Intrinsic::x86_sse2_comige_sd:
2357 case Intrinsic::x86_sse2_comigt_sd:
2358 case Intrinsic::x86_sse2_comile_sd:
2359 case Intrinsic::x86_sse2_comilt_sd:
2360 case Intrinsic::x86_sse2_comineq_sd:
2361 case Intrinsic::x86_sse2_ucomieq_sd:
2362 case Intrinsic::x86_sse2_ucomige_sd:
2363 case Intrinsic::x86_sse2_ucomigt_sd:
2364 case Intrinsic::x86_sse2_ucomile_sd:
2365 case Intrinsic::x86_sse2_ucomilt_sd:
2366 case Intrinsic::x86_sse2_ucomineq_sd:
2367 case Intrinsic::x86_avx512_vcomi_ss:
2368 case Intrinsic::x86_avx512_vcomi_sd:
2369 case Intrinsic::x86_avx512_mask_cmp_ss:
2370 case Intrinsic::x86_avx512_mask_cmp_sd: {
2373 bool MadeChange =
false;
2374 Value *Arg0 =
II.getArgOperand(0);
2375 Value *Arg1 =
II.getArgOperand(1);
2376 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2377 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2381 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2391 case Intrinsic::x86_avx512_add_ps_512:
2392 case Intrinsic::x86_avx512_div_ps_512:
2393 case Intrinsic::x86_avx512_mul_ps_512:
2394 case Intrinsic::x86_avx512_sub_ps_512:
2395 case Intrinsic::x86_avx512_add_pd_512:
2396 case Intrinsic::x86_avx512_div_pd_512:
2397 case Intrinsic::x86_avx512_mul_pd_512:
2398 case Intrinsic::x86_avx512_sub_pd_512:
2401 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2402 if (R->getValue() == 4) {
2403 Value *Arg0 =
II.getArgOperand(0);
2404 Value *Arg1 =
II.getArgOperand(1);
2410 case Intrinsic::x86_avx512_add_ps_512:
2411 case Intrinsic::x86_avx512_add_pd_512:
2414 case Intrinsic::x86_avx512_sub_ps_512:
2415 case Intrinsic::x86_avx512_sub_pd_512:
2418 case Intrinsic::x86_avx512_mul_ps_512:
2419 case Intrinsic::x86_avx512_mul_pd_512:
2422 case Intrinsic::x86_avx512_div_ps_512:
2423 case Intrinsic::x86_avx512_div_pd_512:
2433 case Intrinsic::x86_avx512_mask_add_ss_round:
2434 case Intrinsic::x86_avx512_mask_div_ss_round:
2435 case Intrinsic::x86_avx512_mask_mul_ss_round:
2436 case Intrinsic::x86_avx512_mask_sub_ss_round:
2437 case Intrinsic::x86_avx512_mask_add_sd_round:
2438 case Intrinsic::x86_avx512_mask_div_sd_round:
2439 case Intrinsic::x86_avx512_mask_mul_sd_round:
2440 case Intrinsic::x86_avx512_mask_sub_sd_round:
2443 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(4))) {
2444 if (R->getValue() == 4) {
2446 Value *Arg0 =
II.getArgOperand(0);
2447 Value *Arg1 =
II.getArgOperand(1);
2455 case Intrinsic::x86_avx512_mask_add_ss_round:
2456 case Intrinsic::x86_avx512_mask_add_sd_round:
2459 case Intrinsic::x86_avx512_mask_sub_ss_round:
2460 case Intrinsic::x86_avx512_mask_sub_sd_round:
2463 case Intrinsic::x86_avx512_mask_mul_ss_round:
2464 case Intrinsic::x86_avx512_mask_mul_sd_round:
2467 case Intrinsic::x86_avx512_mask_div_ss_round:
2468 case Intrinsic::x86_avx512_mask_div_sd_round:
2474 Value *Mask =
II.getArgOperand(3);
2475 auto *
C = dyn_cast<ConstantInt>(Mask);
2477 if (!
C || !
C->getValue()[0]) {
2481 cast<IntegerType>(Mask->getType())->
getBitWidth());
2501 case Intrinsic::x86_sse2_psrai_d:
2502 case Intrinsic::x86_sse2_psrai_w:
2503 case Intrinsic::x86_avx2_psrai_d:
2504 case Intrinsic::x86_avx2_psrai_w:
2505 case Intrinsic::x86_avx512_psrai_q_128:
2506 case Intrinsic::x86_avx512_psrai_q_256:
2507 case Intrinsic::x86_avx512_psrai_d_512:
2508 case Intrinsic::x86_avx512_psrai_q_512:
2509 case Intrinsic::x86_avx512_psrai_w_512:
2510 case Intrinsic::x86_sse2_psrli_d:
2511 case Intrinsic::x86_sse2_psrli_q:
2512 case Intrinsic::x86_sse2_psrli_w:
2513 case Intrinsic::x86_avx2_psrli_d:
2514 case Intrinsic::x86_avx2_psrli_q:
2515 case Intrinsic::x86_avx2_psrli_w:
2516 case Intrinsic::x86_avx512_psrli_d_512:
2517 case Intrinsic::x86_avx512_psrli_q_512:
2518 case Intrinsic::x86_avx512_psrli_w_512:
2519 case Intrinsic::x86_sse2_pslli_d:
2520 case Intrinsic::x86_sse2_pslli_q:
2521 case Intrinsic::x86_sse2_pslli_w:
2522 case Intrinsic::x86_avx2_pslli_d:
2523 case Intrinsic::x86_avx2_pslli_q:
2524 case Intrinsic::x86_avx2_pslli_w:
2525 case Intrinsic::x86_avx512_pslli_d_512:
2526 case Intrinsic::x86_avx512_pslli_q_512:
2527 case Intrinsic::x86_avx512_pslli_w_512:
2533 case Intrinsic::x86_sse2_psra_d:
2534 case Intrinsic::x86_sse2_psra_w:
2535 case Intrinsic::x86_avx2_psra_d:
2536 case Intrinsic::x86_avx2_psra_w:
2537 case Intrinsic::x86_avx512_psra_q_128:
2538 case Intrinsic::x86_avx512_psra_q_256:
2539 case Intrinsic::x86_avx512_psra_d_512:
2540 case Intrinsic::x86_avx512_psra_q_512:
2541 case Intrinsic::x86_avx512_psra_w_512:
2542 case Intrinsic::x86_sse2_psrl_d:
2543 case Intrinsic::x86_sse2_psrl_q:
2544 case Intrinsic::x86_sse2_psrl_w:
2545 case Intrinsic::x86_avx2_psrl_d:
2546 case Intrinsic::x86_avx2_psrl_q:
2547 case Intrinsic::x86_avx2_psrl_w:
2548 case Intrinsic::x86_avx512_psrl_d_512:
2549 case Intrinsic::x86_avx512_psrl_q_512:
2550 case Intrinsic::x86_avx512_psrl_w_512:
2551 case Intrinsic::x86_sse2_psll_d:
2552 case Intrinsic::x86_sse2_psll_q:
2553 case Intrinsic::x86_sse2_psll_w:
2554 case Intrinsic::x86_avx2_psll_d:
2555 case Intrinsic::x86_avx2_psll_q:
2556 case Intrinsic::x86_avx2_psll_w:
2557 case Intrinsic::x86_avx512_psll_d_512:
2558 case Intrinsic::x86_avx512_psll_q_512:
2559 case Intrinsic::x86_avx512_psll_w_512: {
2566 Value *Arg1 =
II.getArgOperand(1);
2568 "Unexpected packed shift size");
2569 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
2571 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2577 case Intrinsic::x86_avx2_psllv_d:
2578 case Intrinsic::x86_avx2_psllv_d_256:
2579 case Intrinsic::x86_avx2_psllv_q:
2580 case Intrinsic::x86_avx2_psllv_q_256:
2581 case Intrinsic::x86_avx512_psllv_d_512:
2582 case Intrinsic::x86_avx512_psllv_q_512:
2583 case Intrinsic::x86_avx512_psllv_w_128:
2584 case Intrinsic::x86_avx512_psllv_w_256:
2585 case Intrinsic::x86_avx512_psllv_w_512:
2586 case Intrinsic::x86_avx2_psrav_d:
2587 case Intrinsic::x86_avx2_psrav_d_256:
2588 case Intrinsic::x86_avx512_psrav_q_128:
2589 case Intrinsic::x86_avx512_psrav_q_256:
2590 case Intrinsic::x86_avx512_psrav_d_512:
2591 case Intrinsic::x86_avx512_psrav_q_512:
2592 case Intrinsic::x86_avx512_psrav_w_128:
2593 case Intrinsic::x86_avx512_psrav_w_256:
2594 case Intrinsic::x86_avx512_psrav_w_512:
2595 case Intrinsic::x86_avx2_psrlv_d:
2596 case Intrinsic::x86_avx2_psrlv_d_256:
2597 case Intrinsic::x86_avx2_psrlv_q:
2598 case Intrinsic::x86_avx2_psrlv_q_256:
2599 case Intrinsic::x86_avx512_psrlv_d_512:
2600 case Intrinsic::x86_avx512_psrlv_q_512:
2601 case Intrinsic::x86_avx512_psrlv_w_128:
2602 case Intrinsic::x86_avx512_psrlv_w_256:
2603 case Intrinsic::x86_avx512_psrlv_w_512:
2609 case Intrinsic::x86_sse2_packssdw_128:
2610 case Intrinsic::x86_sse2_packsswb_128:
2611 case Intrinsic::x86_avx2_packssdw:
2612 case Intrinsic::x86_avx2_packsswb:
2613 case Intrinsic::x86_avx512_packssdw_512:
2614 case Intrinsic::x86_avx512_packsswb_512:
2620 case Intrinsic::x86_sse2_packuswb_128:
2621 case Intrinsic::x86_sse41_packusdw:
2622 case Intrinsic::x86_avx2_packusdw:
2623 case Intrinsic::x86_avx2_packuswb:
2624 case Intrinsic::x86_avx512_packusdw_512:
2625 case Intrinsic::x86_avx512_packuswb_512:
2631 case Intrinsic::x86_sse2_pmulh_w:
2632 case Intrinsic::x86_avx2_pmulh_w:
2633 case Intrinsic::x86_avx512_pmulh_w_512:
2639 case Intrinsic::x86_sse2_pmulhu_w:
2640 case Intrinsic::x86_avx2_pmulhu_w:
2641 case Intrinsic::x86_avx512_pmulhu_w_512:
2647 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2648 case Intrinsic::x86_avx2_pmul_hr_sw:
2649 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2655 case Intrinsic::x86_sse2_pmadd_wd:
2656 case Intrinsic::x86_avx2_pmadd_wd:
2657 case Intrinsic::x86_avx512_pmaddw_d_512:
2663 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2664 case Intrinsic::x86_avx2_pmadd_ub_sw:
2665 case Intrinsic::x86_avx512_pmaddubs_w_512:
2671 case Intrinsic::x86_pclmulqdq:
2672 case Intrinsic::x86_pclmulqdq_256:
2673 case Intrinsic::x86_pclmulqdq_512: {
2674 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2675 unsigned Imm =
C->getZExtValue();
2677 bool MadeChange =
false;
2678 Value *Arg0 =
II.getArgOperand(0);
2679 Value *Arg1 =
II.getArgOperand(1);
2681 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2683 APInt UndefElts1(VWidth, 0);
2684 APInt DemandedElts1 =
2692 APInt UndefElts2(VWidth, 0);
2693 APInt DemandedElts2 =
2715 case Intrinsic::x86_sse41_insertps:
2721 case Intrinsic::x86_sse4a_extrq: {
2722 Value *Op0 =
II.getArgOperand(0);
2723 Value *Op1 =
II.getArgOperand(1);
2724 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2725 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2728 VWidth1 == 16 &&
"Unexpected operand sizes");
2731 auto *C1 = dyn_cast<Constant>(Op1);
2733 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2736 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2746 bool MadeChange =
false;
2747 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2751 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2761 case Intrinsic::x86_sse4a_extrqi: {
2764 Value *Op0 =
II.getArgOperand(0);
2765 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2767 "Unexpected operand size");
2770 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(1));
2771 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2780 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2786 case Intrinsic::x86_sse4a_insertq: {
2787 Value *Op0 =
II.getArgOperand(0);
2788 Value *Op1 =
II.getArgOperand(1);
2789 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2792 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
2793 "Unexpected operand size");
2796 auto *C1 = dyn_cast<Constant>(Op1);
2798 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2803 const APInt &V11 = CI11->getValue();
2813 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2819 case Intrinsic::x86_sse4a_insertqi: {
2823 Value *Op0 =
II.getArgOperand(0);
2824 Value *Op1 =
II.getArgOperand(1);
2825 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2826 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2829 VWidth1 == 2 &&
"Unexpected operand sizes");
2832 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2833 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(3));
2836 if (CILength && CIIndex) {
2837 APInt Len = CILength->getValue().zextOrTrunc(6);
2838 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2846 bool MadeChange =
false;
2847 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2851 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2861 case Intrinsic::x86_sse41_pblendvb:
2862 case Intrinsic::x86_sse41_blendvps:
2863 case Intrinsic::x86_sse41_blendvpd:
2864 case Intrinsic::x86_avx_blendv_ps_256:
2865 case Intrinsic::x86_avx_blendv_pd_256:
2866 case Intrinsic::x86_avx2_pblendvb: {
2868 Value *Op0 =
II.getArgOperand(0);
2869 Value *Op1 =
II.getArgOperand(1);
2870 Value *Mask =
II.getArgOperand(2);
2876 if (isa<ConstantAggregateZero>(Mask)) {
2881 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2892 Value *MaskSrc =
nullptr;
2895 m_Mask(ShuffleMask))))) {
2897 int NumElts = cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2900 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
2911 auto *MaskTy = cast<FixedVectorType>(Mask->getType());
2912 auto *OpTy = cast<FixedVectorType>(
II.getType());
2913 unsigned NumMaskElts = MaskTy->getNumElements();
2914 unsigned NumOperandElts = OpTy->getNumElements();
2918 unsigned NumMaskSrcElts =
2919 cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2920 NumMaskElts = (ShuffleMask.
size() * NumMaskElts) / NumMaskSrcElts;
2922 if (NumMaskElts > NumOperandElts)
2930 assert(MaskTy->getPrimitiveSizeInBits() ==
2931 OpTy->getPrimitiveSizeInBits() &&
2932 "Not expecting mask and operands with different sizes");
2934 if (NumMaskElts == NumOperandElts) {
2940 if (NumMaskElts < NumOperandElts) {
2951 case Intrinsic::x86_ssse3_pshuf_b_128:
2952 case Intrinsic::x86_avx2_pshuf_b:
2953 case Intrinsic::x86_avx512_pshuf_b_512:
2959 case Intrinsic::x86_avx_vpermilvar_ps:
2960 case Intrinsic::x86_avx_vpermilvar_ps_256:
2961 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2962 case Intrinsic::x86_avx_vpermilvar_pd:
2963 case Intrinsic::x86_avx_vpermilvar_pd_256:
2964 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2970 case Intrinsic::x86_avx2_permd:
2971 case Intrinsic::x86_avx2_permps:
2972 case Intrinsic::x86_avx512_permvar_df_256:
2973 case Intrinsic::x86_avx512_permvar_df_512:
2974 case Intrinsic::x86_avx512_permvar_di_256:
2975 case Intrinsic::x86_avx512_permvar_di_512:
2976 case Intrinsic::x86_avx512_permvar_hi_128:
2977 case Intrinsic::x86_avx512_permvar_hi_256:
2978 case Intrinsic::x86_avx512_permvar_hi_512:
2979 case Intrinsic::x86_avx512_permvar_qi_128:
2980 case Intrinsic::x86_avx512_permvar_qi_256:
2981 case Intrinsic::x86_avx512_permvar_qi_512:
2982 case Intrinsic::x86_avx512_permvar_sf_512:
2983 case Intrinsic::x86_avx512_permvar_si_512:
2989 case Intrinsic::x86_avx512_vpermi2var_d_128:
2990 case Intrinsic::x86_avx512_vpermi2var_d_256:
2991 case Intrinsic::x86_avx512_vpermi2var_d_512:
2992 case Intrinsic::x86_avx512_vpermi2var_hi_128:
2993 case Intrinsic::x86_avx512_vpermi2var_hi_256:
2994 case Intrinsic::x86_avx512_vpermi2var_hi_512:
2995 case Intrinsic::x86_avx512_vpermi2var_pd_128:
2996 case Intrinsic::x86_avx512_vpermi2var_pd_256:
2997 case Intrinsic::x86_avx512_vpermi2var_pd_512:
2998 case Intrinsic::x86_avx512_vpermi2var_ps_128:
2999 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3000 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3001 case Intrinsic::x86_avx512_vpermi2var_q_128:
3002 case Intrinsic::x86_avx512_vpermi2var_q_256:
3003 case Intrinsic::x86_avx512_vpermi2var_q_512:
3004 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3005 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3006 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3012 case Intrinsic::x86_avx_maskload_ps:
3013 case Intrinsic::x86_avx_maskload_pd:
3014 case Intrinsic::x86_avx_maskload_ps_256:
3015 case Intrinsic::x86_avx_maskload_pd_256:
3016 case Intrinsic::x86_avx2_maskload_d:
3017 case Intrinsic::x86_avx2_maskload_q:
3018 case Intrinsic::x86_avx2_maskload_d_256:
3019 case Intrinsic::x86_avx2_maskload_q_256:
3025 case Intrinsic::x86_sse2_maskmov_dqu:
3026 case Intrinsic::x86_avx_maskstore_ps:
3027 case Intrinsic::x86_avx_maskstore_pd:
3028 case Intrinsic::x86_avx_maskstore_ps_256:
3029 case Intrinsic::x86_avx_maskstore_pd_256:
3030 case Intrinsic::x86_avx2_maskstore_d:
3031 case Intrinsic::x86_avx2_maskstore_q:
3032 case Intrinsic::x86_avx2_maskstore_d_256:
3033 case Intrinsic::x86_avx2_maskstore_q_256:
3039 case Intrinsic::x86_addcarry_32:
3040 case Intrinsic::x86_addcarry_64:
3046 case Intrinsic::x86_avx512_pternlog_d_128:
3047 case Intrinsic::x86_avx512_pternlog_d_256:
3048 case Intrinsic::x86_avx512_pternlog_d_512:
3049 case Intrinsic::x86_avx512_pternlog_q_128:
3050 case Intrinsic::x86_avx512_pternlog_q_256:
3051 case Intrinsic::x86_avx512_pternlog_q_512:
3059 return std::nullopt;
3064 bool &KnownBitsComputed)
const {
3065 switch (
II.getIntrinsicID()) {
3068 case Intrinsic::x86_mmx_pmovmskb:
3069 case Intrinsic::x86_sse_movmsk_ps:
3070 case Intrinsic::x86_sse2_movmsk_pd:
3071 case Intrinsic::x86_sse2_pmovmskb_128:
3072 case Intrinsic::x86_avx_movmsk_ps_256:
3073 case Intrinsic::x86_avx_movmsk_pd_256:
3074 case Intrinsic::x86_avx2_pmovmskb: {
3078 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
3081 auto *ArgType = cast<FixedVectorType>(
II.getArgOperand(0)->getType());
3082 ArgWidth = ArgType->getNumElements();
3088 Type *VTy =
II.getType();
3089 if (DemandedElts.
isZero()) {
3095 KnownBitsComputed =
true;
3099 return std::nullopt;
3106 simplifyAndSetOp)
const {
3107 unsigned VWidth = cast<FixedVectorType>(
II.getType())->getNumElements();
3108 switch (
II.getIntrinsicID()) {
3111 case Intrinsic::x86_xop_vfrcz_ss:
3112 case Intrinsic::x86_xop_vfrcz_sd:
3117 if (!DemandedElts[0]) {
3124 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3127 UndefElts = UndefElts[0];
3131 case Intrinsic::x86_sse_rcp_ss:
3132 case Intrinsic::x86_sse_rsqrt_ss:
3133 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3136 if (!DemandedElts[0]) {
3138 return II.getArgOperand(0);
3147 case Intrinsic::x86_sse_min_ss:
3148 case Intrinsic::x86_sse_max_ss:
3149 case Intrinsic::x86_sse_cmp_ss:
3150 case Intrinsic::x86_sse2_min_sd:
3151 case Intrinsic::x86_sse2_max_sd:
3152 case Intrinsic::x86_sse2_cmp_sd: {
3153 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3156 if (!DemandedElts[0]) {
3158 return II.getArgOperand(0);
3163 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3175 case Intrinsic::x86_sse41_round_ss:
3176 case Intrinsic::x86_sse41_round_sd: {
3178 APInt DemandedElts2 = DemandedElts;
3180 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3183 if (!DemandedElts[0]) {
3185 return II.getArgOperand(0);
3190 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3195 UndefElts |= UndefElts2[0];
3202 case Intrinsic::x86_avx512_mask_add_ss_round:
3203 case Intrinsic::x86_avx512_mask_div_ss_round:
3204 case Intrinsic::x86_avx512_mask_mul_ss_round:
3205 case Intrinsic::x86_avx512_mask_sub_ss_round:
3206 case Intrinsic::x86_avx512_mask_max_ss_round:
3207 case Intrinsic::x86_avx512_mask_min_ss_round:
3208 case Intrinsic::x86_avx512_mask_add_sd_round:
3209 case Intrinsic::x86_avx512_mask_div_sd_round:
3210 case Intrinsic::x86_avx512_mask_mul_sd_round:
3211 case Intrinsic::x86_avx512_mask_sub_sd_round:
3212 case Intrinsic::x86_avx512_mask_max_sd_round:
3213 case Intrinsic::x86_avx512_mask_min_sd_round:
3214 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3217 if (!DemandedElts[0]) {
3219 return II.getArgOperand(0);
3224 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3225 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3229 if (!UndefElts2[0] || !UndefElts3[0])
3234 case Intrinsic::x86_sse3_addsub_pd:
3235 case Intrinsic::x86_sse3_addsub_ps:
3236 case Intrinsic::x86_avx_addsub_pd_256:
3237 case Intrinsic::x86_avx_addsub_ps_256: {
3242 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3243 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3244 if (IsSubOnly || IsAddOnly) {
3245 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3248 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3250 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3253 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3254 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3255 UndefElts &= UndefElts2;
3260 case Intrinsic::x86_avx2_psllv_d:
3261 case Intrinsic::x86_avx2_psllv_d_256:
3262 case Intrinsic::x86_avx2_psllv_q:
3263 case Intrinsic::x86_avx2_psllv_q_256:
3264 case Intrinsic::x86_avx2_psrlv_d:
3265 case Intrinsic::x86_avx2_psrlv_d_256:
3266 case Intrinsic::x86_avx2_psrlv_q:
3267 case Intrinsic::x86_avx2_psrlv_q_256:
3268 case Intrinsic::x86_avx2_psrav_d:
3269 case Intrinsic::x86_avx2_psrav_d_256: {
3270 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3271 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3272 UndefElts &= UndefElts2;
3276 case Intrinsic::x86_sse2_pmulh_w:
3277 case Intrinsic::x86_avx2_pmulh_w:
3278 case Intrinsic::x86_avx512_pmulh_w_512:
3279 case Intrinsic::x86_sse2_pmulhu_w:
3280 case Intrinsic::x86_avx2_pmulhu_w:
3281 case Intrinsic::x86_avx512_pmulhu_w_512:
3282 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3283 case Intrinsic::x86_avx2_pmul_hr_sw:
3284 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3285 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3286 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3291 case Intrinsic::x86_sse2_packssdw_128:
3292 case Intrinsic::x86_sse2_packsswb_128:
3293 case Intrinsic::x86_sse2_packuswb_128:
3294 case Intrinsic::x86_sse41_packusdw:
3295 case Intrinsic::x86_avx2_packssdw:
3296 case Intrinsic::x86_avx2_packsswb:
3297 case Intrinsic::x86_avx2_packusdw:
3298 case Intrinsic::x86_avx2_packuswb:
3299 case Intrinsic::x86_avx512_packssdw_512:
3300 case Intrinsic::x86_avx512_packsswb_512:
3301 case Intrinsic::x86_avx512_packusdw_512:
3302 case Intrinsic::x86_avx512_packuswb_512: {
3303 auto *Ty0 =
II.getArgOperand(0)->getType();
3304 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3305 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3307 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3308 unsigned VWidthPerLane = VWidth / NumLanes;
3309 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3315 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3316 APInt OpDemandedElts(InnerVWidth, 0);
3317 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3318 unsigned LaneIdx = Lane * VWidthPerLane;
3319 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3320 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3321 if (DemandedElts[
Idx])
3322 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3327 APInt OpUndefElts(InnerVWidth, 0);
3328 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3331 OpUndefElts = OpUndefElts.
zext(VWidth);
3332 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3333 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3334 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3335 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3336 UndefElts |= LaneElts;
3342 case Intrinsic::x86_sse2_pmadd_wd:
3343 case Intrinsic::x86_avx2_pmadd_wd:
3344 case Intrinsic::x86_avx512_pmaddw_d_512:
3345 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3346 case Intrinsic::x86_avx2_pmadd_ub_sw:
3347 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3349 auto *ArgTy =
II.getArgOperand(0)->getType();
3350 unsigned InnerVWidth = cast<FixedVectorType>(ArgTy)->getNumElements();
3351 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3353 APInt Op0UndefElts(InnerVWidth, 0);
3354 APInt Op1UndefElts(InnerVWidth, 0);
3355 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3356 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3362 case Intrinsic::x86_ssse3_pshuf_b_128:
3363 case Intrinsic::x86_avx2_pshuf_b:
3364 case Intrinsic::x86_avx512_pshuf_b_512:
3366 case Intrinsic::x86_avx_vpermilvar_ps:
3367 case Intrinsic::x86_avx_vpermilvar_ps_256:
3368 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3369 case Intrinsic::x86_avx_vpermilvar_pd:
3370 case Intrinsic::x86_avx_vpermilvar_pd_256:
3371 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3373 case Intrinsic::x86_avx2_permd:
3374 case Intrinsic::x86_avx2_permps: {
3375 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3381 case Intrinsic::x86_sse4a_extrq:
3382 case Intrinsic::x86_sse4a_extrqi:
3383 case Intrinsic::x86_sse4a_insertq:
3384 case Intrinsic::x86_sse4a_insertqi:
3388 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static Value * simplifyX86vpermv3(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86pmulh(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned, bool IsRounding)
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86pmadd(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsPMADDWD)
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * getExtendedElementVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.