18#include "llvm/IR/IntrinsicsX86.h"
26#define DEBUG_TYPE "x86tti"
31 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
35 assert(V &&
"Vector must be foldable");
43 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
64 if (isa<ConstantAggregateZero>(Mask))
72 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
73 PointerType *VecPtrTy = PointerType::get(
II.getType(), AddrSpace);
78 II.getType(), PtrCast,
Align(1), BoolMask, ZeroVec);
94 if (isa<ConstantAggregateZero>(Mask)) {
101 if (
II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
107 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
123 bool LogicalShift =
false;
124 bool ShiftLeft =
false;
127 switch (
II.getIntrinsicID()) {
130 case Intrinsic::x86_sse2_psrai_d:
131 case Intrinsic::x86_sse2_psrai_w:
132 case Intrinsic::x86_avx2_psrai_d:
133 case Intrinsic::x86_avx2_psrai_w:
134 case Intrinsic::x86_avx512_psrai_q_128:
135 case Intrinsic::x86_avx512_psrai_q_256:
136 case Intrinsic::x86_avx512_psrai_d_512:
137 case Intrinsic::x86_avx512_psrai_q_512:
138 case Intrinsic::x86_avx512_psrai_w_512:
141 case Intrinsic::x86_sse2_psra_d:
142 case Intrinsic::x86_sse2_psra_w:
143 case Intrinsic::x86_avx2_psra_d:
144 case Intrinsic::x86_avx2_psra_w:
145 case Intrinsic::x86_avx512_psra_q_128:
146 case Intrinsic::x86_avx512_psra_q_256:
147 case Intrinsic::x86_avx512_psra_d_512:
148 case Intrinsic::x86_avx512_psra_q_512:
149 case Intrinsic::x86_avx512_psra_w_512:
150 LogicalShift =
false;
153 case Intrinsic::x86_sse2_psrli_d:
154 case Intrinsic::x86_sse2_psrli_q:
155 case Intrinsic::x86_sse2_psrli_w:
156 case Intrinsic::x86_avx2_psrli_d:
157 case Intrinsic::x86_avx2_psrli_q:
158 case Intrinsic::x86_avx2_psrli_w:
159 case Intrinsic::x86_avx512_psrli_d_512:
160 case Intrinsic::x86_avx512_psrli_q_512:
161 case Intrinsic::x86_avx512_psrli_w_512:
164 case Intrinsic::x86_sse2_psrl_d:
165 case Intrinsic::x86_sse2_psrl_q:
166 case Intrinsic::x86_sse2_psrl_w:
167 case Intrinsic::x86_avx2_psrl_d:
168 case Intrinsic::x86_avx2_psrl_q:
169 case Intrinsic::x86_avx2_psrl_w:
170 case Intrinsic::x86_avx512_psrl_d_512:
171 case Intrinsic::x86_avx512_psrl_q_512:
172 case Intrinsic::x86_avx512_psrl_w_512:
176 case Intrinsic::x86_sse2_pslli_d:
177 case Intrinsic::x86_sse2_pslli_q:
178 case Intrinsic::x86_sse2_pslli_w:
179 case Intrinsic::x86_avx2_pslli_d:
180 case Intrinsic::x86_avx2_pslli_q:
181 case Intrinsic::x86_avx2_pslli_w:
182 case Intrinsic::x86_avx512_pslli_d_512:
183 case Intrinsic::x86_avx512_pslli_q_512:
184 case Intrinsic::x86_avx512_pslli_w_512:
187 case Intrinsic::x86_sse2_psll_d:
188 case Intrinsic::x86_sse2_psll_q:
189 case Intrinsic::x86_sse2_psll_w:
190 case Intrinsic::x86_avx2_psll_d:
191 case Intrinsic::x86_avx2_psll_q:
192 case Intrinsic::x86_avx2_psll_w:
193 case Intrinsic::x86_avx512_psll_d_512:
194 case Intrinsic::x86_avx512_psll_q_512:
195 case Intrinsic::x86_avx512_psll_w_512:
200 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
202 Value *Vec =
II.getArgOperand(0);
203 Value *Amt =
II.getArgOperand(1);
204 auto *VT = cast<FixedVectorType>(Vec->
getType());
205 Type *SVT = VT->getElementType();
207 unsigned VWidth = VT->getNumElements();
220 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
227 Amt = ConstantInt::get(SVT,
BitWidth - 1);
234 cast<VectorType>(AmtVT)->getElementType() == SVT &&
235 "Unexpected shift-by-scalar type");
236 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
240 Amt, DemandedLower,
II.getDataLayout());
242 Amt, DemandedUpper,
II.getDataLayout());
247 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
254 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
261 cast<VectorType>(AmtVT)->getElementType() == SVT &&
262 "Unexpected shift-by-scalar type");
266 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
267 unsigned SubEltIdx = (NumSubElts - 1) - i;
268 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
305 bool LogicalShift =
false;
306 bool ShiftLeft =
false;
308 switch (
II.getIntrinsicID()) {
311 case Intrinsic::x86_avx2_psrav_d:
312 case Intrinsic::x86_avx2_psrav_d_256:
313 case Intrinsic::x86_avx512_psrav_q_128:
314 case Intrinsic::x86_avx512_psrav_q_256:
315 case Intrinsic::x86_avx512_psrav_d_512:
316 case Intrinsic::x86_avx512_psrav_q_512:
317 case Intrinsic::x86_avx512_psrav_w_128:
318 case Intrinsic::x86_avx512_psrav_w_256:
319 case Intrinsic::x86_avx512_psrav_w_512:
320 LogicalShift =
false;
323 case Intrinsic::x86_avx2_psrlv_d:
324 case Intrinsic::x86_avx2_psrlv_d_256:
325 case Intrinsic::x86_avx2_psrlv_q:
326 case Intrinsic::x86_avx2_psrlv_q_256:
327 case Intrinsic::x86_avx512_psrlv_d_512:
328 case Intrinsic::x86_avx512_psrlv_q_512:
329 case Intrinsic::x86_avx512_psrlv_w_128:
330 case Intrinsic::x86_avx512_psrlv_w_256:
331 case Intrinsic::x86_avx512_psrlv_w_512:
335 case Intrinsic::x86_avx2_psllv_d:
336 case Intrinsic::x86_avx2_psllv_d_256:
337 case Intrinsic::x86_avx2_psllv_q:
338 case Intrinsic::x86_avx2_psllv_q_256:
339 case Intrinsic::x86_avx512_psllv_d_512:
340 case Intrinsic::x86_avx512_psllv_q_512:
341 case Intrinsic::x86_avx512_psllv_w_128:
342 case Intrinsic::x86_avx512_psllv_w_256:
343 case Intrinsic::x86_avx512_psllv_w_512:
348 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
350 Value *Vec =
II.getArgOperand(0);
351 Value *Amt =
II.getArgOperand(1);
352 auto *VT = cast<FixedVectorType>(
II.getType());
353 Type *SVT = VT->getElementType();
354 int NumElts = VT->getNumElements();
362 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
368 auto *CShift = dyn_cast<Constant>(Amt);
374 bool AnyOutOfRange =
false;
376 for (
int I = 0;
I < NumElts; ++
I) {
377 auto *CElt = CShift->getAggregateElement(
I);
378 if (isa_and_nonnull<UndefValue>(CElt)) {
383 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
390 APInt ShiftVal = COp->getValue();
392 AnyOutOfRange = LogicalShift;
405 for (
int Idx : ShiftAmts) {
409 assert(LogicalShift &&
"Logical shift expected");
410 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
422 for (
int Idx : ShiftAmts) {
441 Value *Arg0 =
II.getArgOperand(0);
442 Value *Arg1 =
II.getArgOperand(1);
443 Type *ResTy =
II.getType();
446 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
449 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
451 unsigned NumSrcElts = ArgTy->getNumElements();
453 "Unexpected packing types");
455 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
457 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
458 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
459 "Unexpected packing types");
462 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
467 APInt MinValue, MaxValue;
493 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
494 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
496 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
497 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
508 Value *Arg0 =
II.getArgOperand(0);
509 Value *Arg1 =
II.getArgOperand(1);
510 auto *ResTy = cast<FixedVectorType>(
II.getType());
511 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
512 assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&
513 "Unexpected PMULH types");
514 assert((!IsRounding || IsSigned) &&
"PMULHRS instruction must be signed");
517 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
521 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
527 return IsSigned ? Builder.
CreateAShr(Arg1, 15)
530 return IsSigned ? Builder.
CreateAShr(Arg0, 15)
535 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
540 IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
566 Value *Arg0 =
II.getArgOperand(0);
567 Value *Arg1 =
II.getArgOperand(1);
568 auto *ResTy = cast<FixedVectorType>(
II.getType());
569 [[maybe_unused]]
auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
571 unsigned NumDstElts = ResTy->getNumElements();
572 assert(ArgTy->getNumElements() == (2 * NumDstElts) &&
573 ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
574 "Unexpected PMADD types");
577 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
581 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
585 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
594 for (
unsigned I = 0;
I != NumDstElts; ++
I) {
605 IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
606 LHSLo = Builder.
CreateCast(LHSCast, LHSLo, ResTy);
607 LHSHi = Builder.
CreateCast(LHSCast, LHSHi, ResTy);
608 RHSLo = Builder.
CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);
609 RHSHi = Builder.
CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);
619 Value *Arg =
II.getArgOperand(0);
620 Type *ResTy =
II.getType();
623 if (isa<UndefValue>(Arg))
628 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb)
631 auto *ArgTy = cast<FixedVectorType>(Arg->
getType());
638 unsigned NumElts = ArgTy->getNumElements();
650 Value *CarryIn =
II.getArgOperand(0);
651 Value *Op1 =
II.getArgOperand(1);
652 Value *Op2 =
II.getArgOperand(2);
655 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
656 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
657 "Unexpected types for x86 addcarry");
678 auto *ArgImm = dyn_cast<ConstantInt>(
II.getArgOperand(3));
679 if (!ArgImm || ArgImm->getValue().uge(256))
682 Value *ArgA =
II.getArgOperand(0);
683 Value *ArgB =
II.getArgOperand(1);
684 Value *ArgC =
II.getArgOperand(2);
688 auto Or = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
689 return {Builder.
CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
691 auto Xor = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
692 return {Builder.
CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
694 auto And = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
695 return {Builder.
CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
697 auto Not = [&](
auto V) -> std::pair<Value *, uint8_t> {
698 return {Builder.
CreateNot(V.first), ~V.second};
700 auto Nor = [&](
auto Lhs,
auto Rhs) {
return Not(
Or(Lhs, Rhs)); };
701 auto Xnor = [&](
auto Lhs,
auto Rhs) {
return Not(
Xor(Lhs, Rhs)); };
702 auto Nand = [&](
auto Lhs,
auto Rhs) {
return Not(
And(Lhs, Rhs)); };
708 bool ABIsConst = AIsConst && BIsConst;
709 bool ACIsConst = AIsConst && CIsConst;
710 bool BCIsConst = BIsConst && CIsConst;
711 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
717 std::pair<Value *, uint8_t>
A = {ArgA, 0xf0};
718 std::pair<Value *, uint8_t>
B = {ArgB, 0xcc};
719 std::pair<Value *, uint8_t>
C = {ArgC, 0xaa};
720 std::pair<Value *, uint8_t> Res = {
nullptr, 0};
727 uint8_t Imm = ArgImm->getValue().getZExtValue();
734 Res = Nor(
Or(
A,
B),
C);
754 Res = Nor(
A, Xnor(
B,
C));
762 Res = Nor(
A, Nand(
B,
C));
770 Res = Nor(
A, Not(
C));
774 Res = Nor(
A, Nor(
C, Not(
B)));
778 Res = Nor(
A, Not(
B));
782 Res = Nor(
A, Nor(
B, Not(
C)));
786 Res = Nor(
A, Nor(
B,
C));
801 Res = Nor(Xnor(
A,
C),
B);
809 Res = Nor(Xnor(
A,
B),
C);
825 Res = Nor(Xnor(
A,
B), Xnor(
A,
C));
829 Res =
And(Nand(
A,
B), Xnor(
B,
C));
853 Res = Nand(
A,
Or(
B,
C));
857 Res = Nor(Nand(
A,
C),
B);
865 Res = Nor(
B, Not(
C));
869 Res = Nor(
B, Nor(
C, Not(
A)));
873 Res = Nor(Xnor(
A,
B),
Xor(
A,
C));
877 Res =
Xor(
A, Nand(Nand(
A,
B),
C));
905 Res = Nor(Xnor(
A,
B), Nor(
B,
C));
917 Res = Nand(
A,
Or(
B, Not(
C)));
921 Res = Nor(
B, Not(
A));
925 Res = Nor(Nor(
A, Not(
C)),
B);
929 Res = Nor(Nor(
A,
C),
B);
948 Res = Nand(
Or(
A,
C),
B);
952 Res = Nor(Xnor(
A,
B), Nor(
A,
C));
964 Res = Nand(
Or(
A, Not(
C)),
B);
983 Res = Nor(Nand(
A,
B),
C);
991 Res = Nor(
Xor(
A,
B), Xnor(
A,
C));
995 Res =
Xor(
A, Nand(Nand(
A,
C),
B));
999 Res = Nor(
C, Not(
B));
1003 Res = Nor(Nor(
B, Not(
A)),
C);
1023 Res = Nor(Xnor(
A,
C), Nor(
B,
C));
1031 Res =
And(Nand(
A,
C),
B);
1043 Res = Nand(
A, Nand(
B, Not(
C)));
1047 Res = Nor(
C, Not(
A));
1051 Res = Nor(Nor(
A, Not(
B)),
C);
1063 Res = Nor(Nor(
A,
B),
C);
1074 Res = Nand(
Or(
A,
B),
C);
1078 Res = Nor(Nor(
A,
B), Xnor(
A,
C));
1097 Res = Nand(
Or(
A, Not(
B)),
C);
1117 Res = Nor(Nor(
A,
C), Xnor(
B,
C));
1125 Res = Nor(Nor(
A,
B), Xnor(
B,
C));
1144 Res =
Xor(Xnor(
A,
B),
C);
1168 Res = Nand(
A, Xnor(
B,
C));
1172 Res =
And(
A, Nand(
B,
C));
1184 Res = Nand(Nand(
A, Not(
C)),
B);
1192 Res = Nand(Nand(
A, Not(
B)),
C);
1216 Res = Nand(Xnor(
A,
C),
B);
1224 Res = Nand(Xnor(
A,
B),
C);
1232 Res = Nand(
And(
A,
B),
C);
1244 Res =
And(Xnor(
A,
B),
C);
1248 Res = Nor(
Xor(
A,
B), Nor(
C, Not(
A)));
1252 Res =
And(Xnor(
A,
C),
B);
1256 Res = Nor(
Xor(
A,
C), Nor(
B, Not(
A)));
1260 Res =
Xor(Nor(Xnor(
A,
B), Nor(
B,
C)),
C);
1264 Res =
Xor(
A, Nand(
B,
C));
1271 Res =
Xor(
B, Nor(Nor(
B, Not(
A)),
C));
1275 Res =
And(Nand(
A, Not(
B)),
C);
1283 Res =
And(Nand(
A, Not(
C)),
B);
1295 Res = Nand(
A, Nand(
B,
C));
1299 Res =
And(
A, Xnor(
B,
C));
1303 Res = Nor(Nor(
A, Not(
B)),
Xor(
B,
C));
1307 Res =
Xor(Nor(Xnor(
A,
B), Nor(
A,
C)),
C);
1311 Res =
Xor(Nand(
A,
C),
B);
1315 Res = Nor(Nor(
A,
B),
Xor(Xnor(
A,
B),
C));
1319 Res =
Xor(Nand(
A,
B),
C);
1331 Res = Nor(Nor(
A,
B),
Xor(
B,
C));
1339 Res =
Xor(Nor(
B, Not(
A)),
C);
1343 Res =
Or(Nor(
A,
B), Xnor(
B,
C));
1347 Res =
Xor(
B, Nor(
C, Not(
A)));
1351 Res =
Or(Nor(
A,
C), Xnor(
B,
C));
1359 Res = Nand(
A,
Xor(
B,
C));
1366 Res =
Xor(
A, Nor(Nor(
A, Not(
B)),
C));
1378 Res =
Xor(
A, Nor(Nor(
A,
B),
C));
1386 Res =
Xor(Nor(
A, Not(
B)),
C);
1390 Res =
Or(Nor(
A,
B), Xnor(
A,
C));
1405 Res =
Or(Nor(
A,
B),
C);
1409 Res =
Xor(Nor(Xnor(
B,
C),
A),
C);
1417 Res =
Or(Nor(
A, Not(
B)),
C);
1421 Res =
Or(
C, Not(
A));
1425 Res =
And(
A, Nand(
B, Not(
C)));
1437 Res = Nand(Nand(
A,
C),
B);
1441 Res =
Xor(
A, Nor(
C, Not(
B)));
1445 Res =
Or(Xnor(
A,
C), Nor(
B,
C));
1453 Res = Nand(
Xor(
A,
C),
B);
1457 Res =
Xor(Nor(Xnor(
A,
C),
B),
C);
1465 Res =
Or(Nor(
B, Not(
A)),
C);
1469 Res =
Or(
C, Not(
B));
1485 Res =
Or(Nand(
A,
B),
C);
1492 Res =
Xor(
A, Nor(Nor(
A, Not(
C)),
B));
1496 Res =
Xor(
A, Nor(Nor(
A,
C),
B));
1512 Res =
Xor(Nor(
A, Not(
C)),
B);
1516 Res =
Or(Xnor(
A,
B), Nor(
A,
C));
1528 Res =
Xor(
B, Nor(
A, Xnor(
B,
C)));
1539 Res =
Or(Nor(
A,
C),
B);
1543 Res =
Or(Nor(
A, Not(
C)),
B);
1547 Res =
Or(
B, Not(
A));
1559 Res =
Xor(
A, Nor(
B, Not(
C)));
1563 Res =
Or(Xnor(
A,
B), Nor(
B,
C));
1571 Res = Nand(Nand(
A,
B),
C);
1579 Res = Nand(
Xor(
A,
B),
C);
1583 Res =
Xor(Nor(Xnor(
A,
B),
C),
B);
1599 Res =
Or(
B, Nor(
C, Not(
A)));
1603 Res =
Or(
B, Not(
C));
1611 Res =
Or(Nand(
A,
C),
B);
1623 Res =
Xor(
A, Nor(Xnor(
A,
C),
B));
1631 Res =
Xor(
A, Nor(Xnor(
A,
B),
C));
1643 Res =
Or(Xnor(
A,
B), Xnor(
A,
C));
1659 Res =
Or(Xnor(
A,
B),
C);
1667 Res =
Or(Xnor(
A,
C),
B);
1674 Res = Nand(
A, Nor(
B,
C));
1681 Res =
Or(
A, Nor(
B,
C));
1685 Res =
Or(
A, Nor(
B, Not(
C)));
1689 Res =
Or(
A, Not(
B));
1693 Res =
Or(
A, Nor(
C, Not(
B)));
1697 Res =
Or(
A, Not(
C));
1705 Res =
Or(
A, Nand(
B,
C));
1713 Res =
Or(
A, Xnor(
B,
C));
1720 Res = Nand(Nor(
A,
C),
B);
1727 Res = Nand(Nor(
A,
B),
C);
1738 assert((Res.first ==
nullptr || Res.second == Imm) &&
1739 "Simplification of ternary logic does not verify!");
1745 auto *CInt = dyn_cast<ConstantInt>(
II.getArgOperand(2));
1749 auto *VecTy = cast<FixedVectorType>(
II.getType());
1750 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
1757 uint8_t Imm = CInt->getZExtValue();
1759 uint8_t DestLane = (Imm >> 4) & 0x3;
1760 uint8_t SourceLane = (Imm >> 6) & 0x3;
1770 int ShuffleMask[4] = {0, 1, 2, 3};
1773 Value *V1 =
II.getArgOperand(1);
1778 if ((
II.getArgOperand(0) ==
II.getArgOperand(1)) ||
1779 (ZMask & (1 << DestLane))) {
1783 ShuffleMask[DestLane] = SourceLane;
1785 for (
unsigned i = 0; i < 4; ++i)
1786 if ((ZMask >> i) & 0x1)
1787 ShuffleMask[i] = i + 4;
1794 ShuffleMask[DestLane] = SourceLane + 4;
1805 auto LowConstantHighUndef = [&](
uint64_t Val) {
1807 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1813 auto *C0 = dyn_cast<Constant>(Op0);
1815 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1819 if (CILength && CIIndex) {
1844 if ((
Length % 8) == 0 && (Index % 8) == 0) {
1853 for (
int i = 0; i != (int)
Length; ++i)
1855 for (
int i =
Length; i != 8; ++i)
1857 for (
int i = 8; i != 16; ++i)
1869 APInt Elt = CI0->getValue();
1876 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1877 Value *Args[] = {Op0, CILength, CIIndex};
1878 return Builder.
CreateIntrinsic(Intrinsic::x86_sse4a_extrqi, {}, Args);
1883 if (CI0 && CI0->isZero())
1884 return LowConstantHighUndef(0);
1919 if ((
Length % 8) == 0 && (Index % 8) == 0) {
1928 for (
int i = 0; i != (int)Index; ++i)
1930 for (
int i = 0; i != (int)
Length; ++i)
1932 for (
int i = Index +
Length; i != 8; ++i)
1934 for (
int i = 8; i != 16; ++i)
1944 auto *C0 = dyn_cast<Constant>(Op0);
1945 auto *C1 = dyn_cast<Constant>(Op1);
1947 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1950 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1955 APInt V00 = CI00->getValue();
1956 APInt V10 = CI10->getValue();
1960 APInt Val = V00 | V10;
1969 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1972 Constant *CIIndex = ConstantInt::get(IntTy8, Index,
false);
1974 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1975 return Builder.
CreateIntrinsic(Intrinsic::x86_sse4a_insertqi, {}, Args);
1984 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
1988 auto *VecTy = cast<FixedVectorType>(
II.getType());
1989 unsigned NumElts = VecTy->getNumElements();
1990 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1991 "Unexpected number of elements in shuffle mask!");
1998 for (
unsigned I = 0;
I < NumElts; ++
I) {
1999 Constant *COp = V->getAggregateElement(
I);
2000 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2003 if (isa<UndefValue>(COp)) {
2008 int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
2017 Index = ((Index < 0) ? NumElts : Index & 0x0F) + (
I & 0xF0);
2021 auto V1 =
II.getArgOperand(0);
2029 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2033 auto *VecTy = cast<FixedVectorType>(
II.getType());
2034 unsigned NumElts = VecTy->getNumElements();
2035 bool IsPD = VecTy->getScalarType()->isDoubleTy();
2036 unsigned NumLaneElts = IsPD ? 2 : 4;
2037 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
2043 for (
unsigned I = 0;
I < NumElts; ++
I) {
2044 Constant *COp = V->getAggregateElement(
I);
2045 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2048 if (isa<UndefValue>(COp)) {
2053 APInt Index = cast<ConstantInt>(COp)->getValue();
2054 Index = Index.zextOrTrunc(32).getLoBits(2);
2059 Index.lshrInPlace(1);
2064 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
2066 Indexes[
I] = Index.getZExtValue();
2069 auto V1 =
II.getArgOperand(0);
2076 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2080 auto *VecTy = cast<FixedVectorType>(
II.getType());
2081 unsigned Size = VecTy->getNumElements();
2083 "Unexpected shuffle mask size");
2088 for (
unsigned I = 0;
I <
Size; ++
I) {
2089 Constant *COp = V->getAggregateElement(
I);
2090 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2093 if (isa<UndefValue>(COp)) {
2098 uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
2103 auto V1 =
II.getArgOperand(0);
2110 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2114 auto *VecTy = cast<FixedVectorType>(
II.getType());
2115 unsigned Size = VecTy->getNumElements();
2118 "Unexpected shuffle mask size");
2123 for (
unsigned I = 0;
I <
Size; ++
I) {
2124 Constant *COp = V->getAggregateElement(
I);
2125 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2128 if (isa<UndefValue>(COp)) {
2133 uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
2134 Index &= (2 *
Size) - 1;
2138 auto V1 =
II.getArgOperand(0);
2139 auto V2 =
II.getArgOperand(2);
2146 auto *VecTy = cast<FixedVectorType>(
II->getType());
2147 unsigned EltSizeInBits = VecTy->getScalarSizeInBits();
2148 unsigned NumElts = VecTy->getNumElements();
2150 "Unexpected shuffle mask size");
2152 unsigned IdxSizeInBits =
Log2_32(IsBinary ? (2 * NumElts) : NumElts);
2159std::optional<Instruction *>
2161 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2162 unsigned DemandedWidth) {
2163 APInt UndefElts(Width, 0);
2170 case Intrinsic::x86_bmi_bextr_32:
2171 case Intrinsic::x86_bmi_bextr_64:
2172 case Intrinsic::x86_tbm_bextri_u32:
2173 case Intrinsic::x86_tbm_bextri_u64:
2175 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2179 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2185 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2186 uint64_t Result = InC->getZExtValue() >> Shift;
2189 Result &= maskTrailingOnes<uint64_t>(
Length);
2191 ConstantInt::get(
II.getType(), Result));
2198 case Intrinsic::x86_bmi_bzhi_32:
2199 case Intrinsic::x86_bmi_bzhi_64:
2201 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2202 uint64_t Index =
C->getZExtValue() & 0xff;
2203 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2211 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2212 uint64_t Result = InC->getZExtValue();
2213 Result &= maskTrailingOnes<uint64_t>(Index);
2215 ConstantInt::get(
II.getType(), Result));
2220 case Intrinsic::x86_bmi_pext_32:
2221 case Intrinsic::x86_bmi_pext_64:
2222 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2223 if (MaskC->isNullValue()) {
2226 if (MaskC->isAllOnesValue()) {
2230 unsigned MaskIdx, MaskLen;
2231 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2235 Value *Input =
II.getArgOperand(0);
2237 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2242 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2243 uint64_t Src = SrcC->getZExtValue();
2244 uint64_t Mask = MaskC->getZExtValue();
2251 if (BitToTest & Src)
2260 ConstantInt::get(
II.getType(), Result));
2264 case Intrinsic::x86_bmi_pdep_32:
2265 case Intrinsic::x86_bmi_pdep_64:
2266 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2267 if (MaskC->isNullValue()) {
2270 if (MaskC->isAllOnesValue()) {
2274 unsigned MaskIdx, MaskLen;
2275 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2279 Value *Input =
II.getArgOperand(0);
2280 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2286 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2287 uint64_t Src = SrcC->getZExtValue();
2288 uint64_t Mask = MaskC->getZExtValue();
2295 if (BitToTest & Src)
2304 ConstantInt::get(
II.getType(), Result));
2309 case Intrinsic::x86_sse_cvtss2si:
2310 case Intrinsic::x86_sse_cvtss2si64:
2311 case Intrinsic::x86_sse_cvttss2si:
2312 case Intrinsic::x86_sse_cvttss2si64:
2313 case Intrinsic::x86_sse2_cvtsd2si:
2314 case Intrinsic::x86_sse2_cvtsd2si64:
2315 case Intrinsic::x86_sse2_cvttsd2si:
2316 case Intrinsic::x86_sse2_cvttsd2si64:
2317 case Intrinsic::x86_avx512_vcvtss2si32:
2318 case Intrinsic::x86_avx512_vcvtss2si64:
2319 case Intrinsic::x86_avx512_vcvtss2usi32:
2320 case Intrinsic::x86_avx512_vcvtss2usi64:
2321 case Intrinsic::x86_avx512_vcvtsd2si32:
2322 case Intrinsic::x86_avx512_vcvtsd2si64:
2323 case Intrinsic::x86_avx512_vcvtsd2usi32:
2324 case Intrinsic::x86_avx512_vcvtsd2usi64:
2325 case Intrinsic::x86_avx512_cvttss2si:
2326 case Intrinsic::x86_avx512_cvttss2si64:
2327 case Intrinsic::x86_avx512_cvttss2usi:
2328 case Intrinsic::x86_avx512_cvttss2usi64:
2329 case Intrinsic::x86_avx512_cvttsd2si:
2330 case Intrinsic::x86_avx512_cvttsd2si64:
2331 case Intrinsic::x86_avx512_cvttsd2usi:
2332 case Intrinsic::x86_avx512_cvttsd2usi64: {
2335 Value *Arg =
II.getArgOperand(0);
2336 unsigned VWidth = cast<FixedVectorType>(Arg->
getType())->getNumElements();
2337 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2343 case Intrinsic::x86_mmx_pmovmskb:
2344 case Intrinsic::x86_sse_movmsk_ps:
2345 case Intrinsic::x86_sse2_movmsk_pd:
2346 case Intrinsic::x86_sse2_pmovmskb_128:
2347 case Intrinsic::x86_avx_movmsk_pd_256:
2348 case Intrinsic::x86_avx_movmsk_ps_256:
2349 case Intrinsic::x86_avx2_pmovmskb:
2355 case Intrinsic::x86_sse_comieq_ss:
2356 case Intrinsic::x86_sse_comige_ss:
2357 case Intrinsic::x86_sse_comigt_ss:
2358 case Intrinsic::x86_sse_comile_ss:
2359 case Intrinsic::x86_sse_comilt_ss:
2360 case Intrinsic::x86_sse_comineq_ss:
2361 case Intrinsic::x86_sse_ucomieq_ss:
2362 case Intrinsic::x86_sse_ucomige_ss:
2363 case Intrinsic::x86_sse_ucomigt_ss:
2364 case Intrinsic::x86_sse_ucomile_ss:
2365 case Intrinsic::x86_sse_ucomilt_ss:
2366 case Intrinsic::x86_sse_ucomineq_ss:
2367 case Intrinsic::x86_sse2_comieq_sd:
2368 case Intrinsic::x86_sse2_comige_sd:
2369 case Intrinsic::x86_sse2_comigt_sd:
2370 case Intrinsic::x86_sse2_comile_sd:
2371 case Intrinsic::x86_sse2_comilt_sd:
2372 case Intrinsic::x86_sse2_comineq_sd:
2373 case Intrinsic::x86_sse2_ucomieq_sd:
2374 case Intrinsic::x86_sse2_ucomige_sd:
2375 case Intrinsic::x86_sse2_ucomigt_sd:
2376 case Intrinsic::x86_sse2_ucomile_sd:
2377 case Intrinsic::x86_sse2_ucomilt_sd:
2378 case Intrinsic::x86_sse2_ucomineq_sd:
2379 case Intrinsic::x86_avx512_vcomi_ss:
2380 case Intrinsic::x86_avx512_vcomi_sd:
2381 case Intrinsic::x86_avx512_mask_cmp_ss:
2382 case Intrinsic::x86_avx512_mask_cmp_sd: {
2385 bool MadeChange =
false;
2386 Value *Arg0 =
II.getArgOperand(0);
2387 Value *Arg1 =
II.getArgOperand(1);
2388 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2389 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2393 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2403 case Intrinsic::x86_avx512_add_ps_512:
2404 case Intrinsic::x86_avx512_div_ps_512:
2405 case Intrinsic::x86_avx512_mul_ps_512:
2406 case Intrinsic::x86_avx512_sub_ps_512:
2407 case Intrinsic::x86_avx512_add_pd_512:
2408 case Intrinsic::x86_avx512_div_pd_512:
2409 case Intrinsic::x86_avx512_mul_pd_512:
2410 case Intrinsic::x86_avx512_sub_pd_512:
2413 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2414 if (R->getValue() == 4) {
2415 Value *Arg0 =
II.getArgOperand(0);
2416 Value *Arg1 =
II.getArgOperand(1);
2422 case Intrinsic::x86_avx512_add_ps_512:
2423 case Intrinsic::x86_avx512_add_pd_512:
2426 case Intrinsic::x86_avx512_sub_ps_512:
2427 case Intrinsic::x86_avx512_sub_pd_512:
2430 case Intrinsic::x86_avx512_mul_ps_512:
2431 case Intrinsic::x86_avx512_mul_pd_512:
2434 case Intrinsic::x86_avx512_div_ps_512:
2435 case Intrinsic::x86_avx512_div_pd_512:
2445 case Intrinsic::x86_avx512_mask_add_ss_round:
2446 case Intrinsic::x86_avx512_mask_div_ss_round:
2447 case Intrinsic::x86_avx512_mask_mul_ss_round:
2448 case Intrinsic::x86_avx512_mask_sub_ss_round:
2449 case Intrinsic::x86_avx512_mask_add_sd_round:
2450 case Intrinsic::x86_avx512_mask_div_sd_round:
2451 case Intrinsic::x86_avx512_mask_mul_sd_round:
2452 case Intrinsic::x86_avx512_mask_sub_sd_round:
2455 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(4))) {
2456 if (R->getValue() == 4) {
2458 Value *Arg0 =
II.getArgOperand(0);
2459 Value *Arg1 =
II.getArgOperand(1);
2467 case Intrinsic::x86_avx512_mask_add_ss_round:
2468 case Intrinsic::x86_avx512_mask_add_sd_round:
2471 case Intrinsic::x86_avx512_mask_sub_ss_round:
2472 case Intrinsic::x86_avx512_mask_sub_sd_round:
2475 case Intrinsic::x86_avx512_mask_mul_ss_round:
2476 case Intrinsic::x86_avx512_mask_mul_sd_round:
2479 case Intrinsic::x86_avx512_mask_div_ss_round:
2480 case Intrinsic::x86_avx512_mask_div_sd_round:
2486 Value *Mask =
II.getArgOperand(3);
2487 auto *
C = dyn_cast<ConstantInt>(Mask);
2489 if (!
C || !
C->getValue()[0]) {
2493 cast<IntegerType>(Mask->getType())->
getBitWidth());
2513 case Intrinsic::x86_sse2_psrai_d:
2514 case Intrinsic::x86_sse2_psrai_w:
2515 case Intrinsic::x86_avx2_psrai_d:
2516 case Intrinsic::x86_avx2_psrai_w:
2517 case Intrinsic::x86_avx512_psrai_q_128:
2518 case Intrinsic::x86_avx512_psrai_q_256:
2519 case Intrinsic::x86_avx512_psrai_d_512:
2520 case Intrinsic::x86_avx512_psrai_q_512:
2521 case Intrinsic::x86_avx512_psrai_w_512:
2522 case Intrinsic::x86_sse2_psrli_d:
2523 case Intrinsic::x86_sse2_psrli_q:
2524 case Intrinsic::x86_sse2_psrli_w:
2525 case Intrinsic::x86_avx2_psrli_d:
2526 case Intrinsic::x86_avx2_psrli_q:
2527 case Intrinsic::x86_avx2_psrli_w:
2528 case Intrinsic::x86_avx512_psrli_d_512:
2529 case Intrinsic::x86_avx512_psrli_q_512:
2530 case Intrinsic::x86_avx512_psrli_w_512:
2531 case Intrinsic::x86_sse2_pslli_d:
2532 case Intrinsic::x86_sse2_pslli_q:
2533 case Intrinsic::x86_sse2_pslli_w:
2534 case Intrinsic::x86_avx2_pslli_d:
2535 case Intrinsic::x86_avx2_pslli_q:
2536 case Intrinsic::x86_avx2_pslli_w:
2537 case Intrinsic::x86_avx512_pslli_d_512:
2538 case Intrinsic::x86_avx512_pslli_q_512:
2539 case Intrinsic::x86_avx512_pslli_w_512:
2545 case Intrinsic::x86_sse2_psra_d:
2546 case Intrinsic::x86_sse2_psra_w:
2547 case Intrinsic::x86_avx2_psra_d:
2548 case Intrinsic::x86_avx2_psra_w:
2549 case Intrinsic::x86_avx512_psra_q_128:
2550 case Intrinsic::x86_avx512_psra_q_256:
2551 case Intrinsic::x86_avx512_psra_d_512:
2552 case Intrinsic::x86_avx512_psra_q_512:
2553 case Intrinsic::x86_avx512_psra_w_512:
2554 case Intrinsic::x86_sse2_psrl_d:
2555 case Intrinsic::x86_sse2_psrl_q:
2556 case Intrinsic::x86_sse2_psrl_w:
2557 case Intrinsic::x86_avx2_psrl_d:
2558 case Intrinsic::x86_avx2_psrl_q:
2559 case Intrinsic::x86_avx2_psrl_w:
2560 case Intrinsic::x86_avx512_psrl_d_512:
2561 case Intrinsic::x86_avx512_psrl_q_512:
2562 case Intrinsic::x86_avx512_psrl_w_512:
2563 case Intrinsic::x86_sse2_psll_d:
2564 case Intrinsic::x86_sse2_psll_q:
2565 case Intrinsic::x86_sse2_psll_w:
2566 case Intrinsic::x86_avx2_psll_d:
2567 case Intrinsic::x86_avx2_psll_q:
2568 case Intrinsic::x86_avx2_psll_w:
2569 case Intrinsic::x86_avx512_psll_d_512:
2570 case Intrinsic::x86_avx512_psll_q_512:
2571 case Intrinsic::x86_avx512_psll_w_512: {
2578 Value *Arg1 =
II.getArgOperand(1);
2580 "Unexpected packed shift size");
2581 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
2583 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2589 case Intrinsic::x86_avx2_psllv_d:
2590 case Intrinsic::x86_avx2_psllv_d_256:
2591 case Intrinsic::x86_avx2_psllv_q:
2592 case Intrinsic::x86_avx2_psllv_q_256:
2593 case Intrinsic::x86_avx512_psllv_d_512:
2594 case Intrinsic::x86_avx512_psllv_q_512:
2595 case Intrinsic::x86_avx512_psllv_w_128:
2596 case Intrinsic::x86_avx512_psllv_w_256:
2597 case Intrinsic::x86_avx512_psllv_w_512:
2598 case Intrinsic::x86_avx2_psrav_d:
2599 case Intrinsic::x86_avx2_psrav_d_256:
2600 case Intrinsic::x86_avx512_psrav_q_128:
2601 case Intrinsic::x86_avx512_psrav_q_256:
2602 case Intrinsic::x86_avx512_psrav_d_512:
2603 case Intrinsic::x86_avx512_psrav_q_512:
2604 case Intrinsic::x86_avx512_psrav_w_128:
2605 case Intrinsic::x86_avx512_psrav_w_256:
2606 case Intrinsic::x86_avx512_psrav_w_512:
2607 case Intrinsic::x86_avx2_psrlv_d:
2608 case Intrinsic::x86_avx2_psrlv_d_256:
2609 case Intrinsic::x86_avx2_psrlv_q:
2610 case Intrinsic::x86_avx2_psrlv_q_256:
2611 case Intrinsic::x86_avx512_psrlv_d_512:
2612 case Intrinsic::x86_avx512_psrlv_q_512:
2613 case Intrinsic::x86_avx512_psrlv_w_128:
2614 case Intrinsic::x86_avx512_psrlv_w_256:
2615 case Intrinsic::x86_avx512_psrlv_w_512:
2621 case Intrinsic::x86_sse2_packssdw_128:
2622 case Intrinsic::x86_sse2_packsswb_128:
2623 case Intrinsic::x86_avx2_packssdw:
2624 case Intrinsic::x86_avx2_packsswb:
2625 case Intrinsic::x86_avx512_packssdw_512:
2626 case Intrinsic::x86_avx512_packsswb_512:
2632 case Intrinsic::x86_sse2_packuswb_128:
2633 case Intrinsic::x86_sse41_packusdw:
2634 case Intrinsic::x86_avx2_packusdw:
2635 case Intrinsic::x86_avx2_packuswb:
2636 case Intrinsic::x86_avx512_packusdw_512:
2637 case Intrinsic::x86_avx512_packuswb_512:
2643 case Intrinsic::x86_sse2_pmulh_w:
2644 case Intrinsic::x86_avx2_pmulh_w:
2645 case Intrinsic::x86_avx512_pmulh_w_512:
2651 case Intrinsic::x86_sse2_pmulhu_w:
2652 case Intrinsic::x86_avx2_pmulhu_w:
2653 case Intrinsic::x86_avx512_pmulhu_w_512:
2659 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2660 case Intrinsic::x86_avx2_pmul_hr_sw:
2661 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2667 case Intrinsic::x86_sse2_pmadd_wd:
2668 case Intrinsic::x86_avx2_pmadd_wd:
2669 case Intrinsic::x86_avx512_pmaddw_d_512:
2675 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2676 case Intrinsic::x86_avx2_pmadd_ub_sw:
2677 case Intrinsic::x86_avx512_pmaddubs_w_512:
2683 case Intrinsic::x86_pclmulqdq:
2684 case Intrinsic::x86_pclmulqdq_256:
2685 case Intrinsic::x86_pclmulqdq_512: {
2686 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2687 unsigned Imm =
C->getZExtValue();
2689 bool MadeChange =
false;
2690 Value *Arg0 =
II.getArgOperand(0);
2691 Value *Arg1 =
II.getArgOperand(1);
2693 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2695 APInt UndefElts1(VWidth, 0);
2696 APInt DemandedElts1 =
2704 APInt UndefElts2(VWidth, 0);
2705 APInt DemandedElts2 =
2727 case Intrinsic::x86_sse41_insertps:
2733 case Intrinsic::x86_sse4a_extrq: {
2734 Value *Op0 =
II.getArgOperand(0);
2735 Value *Op1 =
II.getArgOperand(1);
2736 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2737 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2740 VWidth1 == 16 &&
"Unexpected operand sizes");
2743 auto *C1 = dyn_cast<Constant>(Op1);
2745 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2748 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2758 bool MadeChange =
false;
2759 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2763 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2773 case Intrinsic::x86_sse4a_extrqi: {
2776 Value *Op0 =
II.getArgOperand(0);
2777 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2779 "Unexpected operand size");
2782 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(1));
2783 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2792 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2798 case Intrinsic::x86_sse4a_insertq: {
2799 Value *Op0 =
II.getArgOperand(0);
2800 Value *Op1 =
II.getArgOperand(1);
2801 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2804 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
2805 "Unexpected operand size");
2808 auto *C1 = dyn_cast<Constant>(Op1);
2810 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2815 const APInt &V11 = CI11->getValue();
2825 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2831 case Intrinsic::x86_sse4a_insertqi: {
2835 Value *Op0 =
II.getArgOperand(0);
2836 Value *Op1 =
II.getArgOperand(1);
2837 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2838 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2841 VWidth1 == 2 &&
"Unexpected operand sizes");
2844 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2845 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(3));
2848 if (CILength && CIIndex) {
2849 APInt Len = CILength->getValue().zextOrTrunc(6);
2850 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2858 bool MadeChange =
false;
2859 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2863 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2873 case Intrinsic::x86_sse41_pblendvb:
2874 case Intrinsic::x86_sse41_blendvps:
2875 case Intrinsic::x86_sse41_blendvpd:
2876 case Intrinsic::x86_avx_blendv_ps_256:
2877 case Intrinsic::x86_avx_blendv_pd_256:
2878 case Intrinsic::x86_avx2_pblendvb: {
2880 Value *Op0 =
II.getArgOperand(0);
2881 Value *Op1 =
II.getArgOperand(1);
2882 Value *Mask =
II.getArgOperand(2);
2888 if (isa<ConstantAggregateZero>(Mask)) {
2893 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2904 Value *MaskSrc =
nullptr;
2907 m_Mask(ShuffleMask))))) {
2909 int NumElts = cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2912 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
2923 auto *MaskTy = cast<FixedVectorType>(Mask->getType());
2924 auto *OpTy = cast<FixedVectorType>(
II.getType());
2925 unsigned NumMaskElts = MaskTy->getNumElements();
2926 unsigned NumOperandElts = OpTy->getNumElements();
2930 unsigned NumMaskSrcElts =
2931 cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2932 NumMaskElts = (ShuffleMask.
size() * NumMaskElts) / NumMaskSrcElts;
2934 if (NumMaskElts > NumOperandElts)
2942 assert(MaskTy->getPrimitiveSizeInBits() ==
2943 OpTy->getPrimitiveSizeInBits() &&
2944 "Not expecting mask and operands with different sizes");
2946 if (NumMaskElts == NumOperandElts) {
2952 if (NumMaskElts < NumOperandElts) {
2963 case Intrinsic::x86_ssse3_pshuf_b_128:
2964 case Intrinsic::x86_avx2_pshuf_b:
2965 case Intrinsic::x86_avx512_pshuf_b_512: {
2976 case Intrinsic::x86_avx_vpermilvar_ps:
2977 case Intrinsic::x86_avx_vpermilvar_ps_256:
2978 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
2989 case Intrinsic::x86_avx_vpermilvar_pd:
2990 case Intrinsic::x86_avx_vpermilvar_pd_256:
2991 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
3002 case Intrinsic::x86_avx2_permd:
3003 case Intrinsic::x86_avx2_permps:
3004 case Intrinsic::x86_avx512_permvar_df_256:
3005 case Intrinsic::x86_avx512_permvar_df_512:
3006 case Intrinsic::x86_avx512_permvar_di_256:
3007 case Intrinsic::x86_avx512_permvar_di_512:
3008 case Intrinsic::x86_avx512_permvar_hi_128:
3009 case Intrinsic::x86_avx512_permvar_hi_256:
3010 case Intrinsic::x86_avx512_permvar_hi_512:
3011 case Intrinsic::x86_avx512_permvar_qi_128:
3012 case Intrinsic::x86_avx512_permvar_qi_256:
3013 case Intrinsic::x86_avx512_permvar_qi_512:
3014 case Intrinsic::x86_avx512_permvar_sf_512:
3015 case Intrinsic::x86_avx512_permvar_si_512:
3023 case Intrinsic::x86_avx512_vpermi2var_d_128:
3024 case Intrinsic::x86_avx512_vpermi2var_d_256:
3025 case Intrinsic::x86_avx512_vpermi2var_d_512:
3026 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3027 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3028 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3029 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3030 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3031 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3032 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3033 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3034 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3035 case Intrinsic::x86_avx512_vpermi2var_q_128:
3036 case Intrinsic::x86_avx512_vpermi2var_q_256:
3037 case Intrinsic::x86_avx512_vpermi2var_q_512:
3038 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3039 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3040 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3048 case Intrinsic::x86_avx_maskload_ps:
3049 case Intrinsic::x86_avx_maskload_pd:
3050 case Intrinsic::x86_avx_maskload_ps_256:
3051 case Intrinsic::x86_avx_maskload_pd_256:
3052 case Intrinsic::x86_avx2_maskload_d:
3053 case Intrinsic::x86_avx2_maskload_q:
3054 case Intrinsic::x86_avx2_maskload_d_256:
3055 case Intrinsic::x86_avx2_maskload_q_256:
3061 case Intrinsic::x86_sse2_maskmov_dqu:
3062 case Intrinsic::x86_avx_maskstore_ps:
3063 case Intrinsic::x86_avx_maskstore_pd:
3064 case Intrinsic::x86_avx_maskstore_ps_256:
3065 case Intrinsic::x86_avx_maskstore_pd_256:
3066 case Intrinsic::x86_avx2_maskstore_d:
3067 case Intrinsic::x86_avx2_maskstore_q:
3068 case Intrinsic::x86_avx2_maskstore_d_256:
3069 case Intrinsic::x86_avx2_maskstore_q_256:
3075 case Intrinsic::x86_addcarry_32:
3076 case Intrinsic::x86_addcarry_64:
3082 case Intrinsic::x86_avx512_pternlog_d_128:
3083 case Intrinsic::x86_avx512_pternlog_d_256:
3084 case Intrinsic::x86_avx512_pternlog_d_512:
3085 case Intrinsic::x86_avx512_pternlog_q_128:
3086 case Intrinsic::x86_avx512_pternlog_q_256:
3087 case Intrinsic::x86_avx512_pternlog_q_512:
3095 return std::nullopt;
3100 bool &KnownBitsComputed)
const {
3101 switch (
II.getIntrinsicID()) {
3104 case Intrinsic::x86_mmx_pmovmskb:
3105 case Intrinsic::x86_sse_movmsk_ps:
3106 case Intrinsic::x86_sse2_movmsk_pd:
3107 case Intrinsic::x86_sse2_pmovmskb_128:
3108 case Intrinsic::x86_avx_movmsk_ps_256:
3109 case Intrinsic::x86_avx_movmsk_pd_256:
3110 case Intrinsic::x86_avx2_pmovmskb: {
3114 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
3117 auto *ArgType = cast<FixedVectorType>(
II.getArgOperand(0)->getType());
3118 ArgWidth = ArgType->getNumElements();
3124 Type *VTy =
II.getType();
3125 if (DemandedElts.
isZero()) {
3131 KnownBitsComputed =
true;
3135 return std::nullopt;
3142 simplifyAndSetOp)
const {
3143 unsigned VWidth = cast<FixedVectorType>(
II.getType())->getNumElements();
3144 switch (
II.getIntrinsicID()) {
3147 case Intrinsic::x86_xop_vfrcz_ss:
3148 case Intrinsic::x86_xop_vfrcz_sd:
3153 if (!DemandedElts[0]) {
3160 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3163 UndefElts = UndefElts[0];
3167 case Intrinsic::x86_sse_rcp_ss:
3168 case Intrinsic::x86_sse_rsqrt_ss:
3169 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3172 if (!DemandedElts[0]) {
3174 return II.getArgOperand(0);
3183 case Intrinsic::x86_sse_min_ss:
3184 case Intrinsic::x86_sse_max_ss:
3185 case Intrinsic::x86_sse_cmp_ss:
3186 case Intrinsic::x86_sse2_min_sd:
3187 case Intrinsic::x86_sse2_max_sd:
3188 case Intrinsic::x86_sse2_cmp_sd: {
3189 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3192 if (!DemandedElts[0]) {
3194 return II.getArgOperand(0);
3199 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3211 case Intrinsic::x86_sse41_round_ss:
3212 case Intrinsic::x86_sse41_round_sd: {
3214 APInt DemandedElts2 = DemandedElts;
3216 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3219 if (!DemandedElts[0]) {
3221 return II.getArgOperand(0);
3226 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3231 UndefElts |= UndefElts2[0];
3238 case Intrinsic::x86_avx512_mask_add_ss_round:
3239 case Intrinsic::x86_avx512_mask_div_ss_round:
3240 case Intrinsic::x86_avx512_mask_mul_ss_round:
3241 case Intrinsic::x86_avx512_mask_sub_ss_round:
3242 case Intrinsic::x86_avx512_mask_max_ss_round:
3243 case Intrinsic::x86_avx512_mask_min_ss_round:
3244 case Intrinsic::x86_avx512_mask_add_sd_round:
3245 case Intrinsic::x86_avx512_mask_div_sd_round:
3246 case Intrinsic::x86_avx512_mask_mul_sd_round:
3247 case Intrinsic::x86_avx512_mask_sub_sd_round:
3248 case Intrinsic::x86_avx512_mask_max_sd_round:
3249 case Intrinsic::x86_avx512_mask_min_sd_round:
3250 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3253 if (!DemandedElts[0]) {
3255 return II.getArgOperand(0);
3260 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3261 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3265 if (!UndefElts2[0] || !UndefElts3[0])
3270 case Intrinsic::x86_sse3_addsub_pd:
3271 case Intrinsic::x86_sse3_addsub_ps:
3272 case Intrinsic::x86_avx_addsub_pd_256:
3273 case Intrinsic::x86_avx_addsub_ps_256: {
3278 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3279 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3280 if (IsSubOnly || IsAddOnly) {
3281 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3284 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3286 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3289 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3290 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3291 UndefElts &= UndefElts2;
3296 case Intrinsic::x86_avx2_psllv_d:
3297 case Intrinsic::x86_avx2_psllv_d_256:
3298 case Intrinsic::x86_avx2_psllv_q:
3299 case Intrinsic::x86_avx2_psllv_q_256:
3300 case Intrinsic::x86_avx2_psrlv_d:
3301 case Intrinsic::x86_avx2_psrlv_d_256:
3302 case Intrinsic::x86_avx2_psrlv_q:
3303 case Intrinsic::x86_avx2_psrlv_q_256:
3304 case Intrinsic::x86_avx2_psrav_d:
3305 case Intrinsic::x86_avx2_psrav_d_256: {
3306 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3307 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3308 UndefElts &= UndefElts2;
3312 case Intrinsic::x86_sse2_pmulh_w:
3313 case Intrinsic::x86_avx2_pmulh_w:
3314 case Intrinsic::x86_avx512_pmulh_w_512:
3315 case Intrinsic::x86_sse2_pmulhu_w:
3316 case Intrinsic::x86_avx2_pmulhu_w:
3317 case Intrinsic::x86_avx512_pmulhu_w_512:
3318 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3319 case Intrinsic::x86_avx2_pmul_hr_sw:
3320 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3321 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3322 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3327 case Intrinsic::x86_sse2_packssdw_128:
3328 case Intrinsic::x86_sse2_packsswb_128:
3329 case Intrinsic::x86_sse2_packuswb_128:
3330 case Intrinsic::x86_sse41_packusdw:
3331 case Intrinsic::x86_avx2_packssdw:
3332 case Intrinsic::x86_avx2_packsswb:
3333 case Intrinsic::x86_avx2_packusdw:
3334 case Intrinsic::x86_avx2_packuswb:
3335 case Intrinsic::x86_avx512_packssdw_512:
3336 case Intrinsic::x86_avx512_packsswb_512:
3337 case Intrinsic::x86_avx512_packusdw_512:
3338 case Intrinsic::x86_avx512_packuswb_512: {
3339 auto *Ty0 =
II.getArgOperand(0)->getType();
3340 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3341 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3343 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3344 unsigned VWidthPerLane = VWidth / NumLanes;
3345 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3351 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3352 APInt OpDemandedElts(InnerVWidth, 0);
3353 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3354 unsigned LaneIdx = Lane * VWidthPerLane;
3355 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3356 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3357 if (DemandedElts[
Idx])
3358 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3363 APInt OpUndefElts(InnerVWidth, 0);
3364 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3367 OpUndefElts = OpUndefElts.
zext(VWidth);
3368 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3369 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3370 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3371 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3372 UndefElts |= LaneElts;
3378 case Intrinsic::x86_sse2_pmadd_wd:
3379 case Intrinsic::x86_avx2_pmadd_wd:
3380 case Intrinsic::x86_avx512_pmaddw_d_512:
3381 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3382 case Intrinsic::x86_avx2_pmadd_ub_sw:
3383 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3385 auto *ArgTy =
II.getArgOperand(0)->getType();
3386 unsigned InnerVWidth = cast<FixedVectorType>(ArgTy)->getNumElements();
3387 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3389 APInt Op0UndefElts(InnerVWidth, 0);
3390 APInt Op1UndefElts(InnerVWidth, 0);
3391 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3392 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3398 case Intrinsic::x86_ssse3_pshuf_b_128:
3399 case Intrinsic::x86_avx2_pshuf_b:
3400 case Intrinsic::x86_avx512_pshuf_b_512:
3402 case Intrinsic::x86_avx_vpermilvar_ps:
3403 case Intrinsic::x86_avx_vpermilvar_ps_256:
3404 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3405 case Intrinsic::x86_avx_vpermilvar_pd:
3406 case Intrinsic::x86_avx_vpermilvar_pd_256:
3407 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3409 case Intrinsic::x86_avx2_permd:
3410 case Intrinsic::x86_avx2_permps: {
3411 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3417 case Intrinsic::x86_sse4a_extrq:
3418 case Intrinsic::x86_sse4a_extrqi:
3419 case Intrinsic::x86_sse4a_insertq:
3420 case Intrinsic::x86_sse4a_insertqi:
3424 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static Value * simplifyX86vpermv3(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86pmulh(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned, bool IsRounding)
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86pmadd(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsPMADDWD)
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static bool simplifyX86VPERMMask(Instruction *II, bool IsBinary, InstCombiner &IC)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * getExtendedElementVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q)=0
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.