45#define DEBUG_TYPE "vector-combine"
51STATISTIC(NumVecLoad,
"Number of vector loads formed");
52STATISTIC(NumVecCmp,
"Number of vector compares formed");
53STATISTIC(NumVecBO,
"Number of vector binops formed");
54STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
55STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
56STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
57STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
58STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
62 cl::desc(
"Disable all vector combine transforms"));
66 cl::desc(
"Disable binop extract to shuffle transforms"));
70 cl::desc(
"Max number of instructions to scan for vector combining."));
72static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
80 bool TryEarlyFoldsOnly)
83 SQ(*
DL, nullptr, &DT, &AC),
84 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
91 const TargetTransformInfo &TTI;
92 const DominatorTree &DT;
96 const SimplifyQuery SQ;
100 bool TryEarlyFoldsOnly;
102 InstructionWorklist Worklist;
111 bool vectorizeLoadInsert(Instruction &
I);
112 bool widenSubvectorLoad(Instruction &
I);
113 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
114 ExtractElementInst *Ext1,
115 unsigned PreferredExtractIndex)
const;
116 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
117 const Instruction &
I,
118 ExtractElementInst *&ConvertToShuffle,
119 unsigned PreferredExtractIndex);
122 bool foldExtractExtract(Instruction &
I);
123 bool foldInsExtFNeg(Instruction &
I);
124 bool foldInsExtBinop(Instruction &
I);
125 bool foldInsExtVectorToShuffle(Instruction &
I);
126 bool foldBitOpOfCastops(Instruction &
I);
127 bool foldBitOpOfCastConstant(Instruction &
I);
128 bool foldBitcastShuffle(Instruction &
I);
129 bool scalarizeOpOrCmp(Instruction &
I);
130 bool scalarizeVPIntrinsic(Instruction &
I);
131 bool foldExtractedCmps(Instruction &
I);
132 bool foldSelectsFromBitcast(Instruction &
I);
133 bool foldBinopOfReductions(Instruction &
I);
134 bool foldSingleElementStore(Instruction &
I);
135 bool scalarizeLoad(Instruction &
I);
136 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
138 bool scalarizeExtExtract(Instruction &
I);
139 bool foldConcatOfBoolMasks(Instruction &
I);
140 bool foldPermuteOfBinops(Instruction &
I);
141 bool foldShuffleOfBinops(Instruction &
I);
142 bool foldShuffleOfSelects(Instruction &
I);
143 bool foldShuffleOfCastops(Instruction &
I);
144 bool foldShuffleOfShuffles(Instruction &
I);
145 bool foldPermuteOfIntrinsic(Instruction &
I);
146 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
147 bool foldShuffleOfIntrinsics(Instruction &
I);
148 bool foldShuffleToIdentity(Instruction &
I);
149 bool foldShuffleFromReductions(Instruction &
I);
150 bool foldShuffleChainsToReduce(Instruction &
I);
151 bool foldCastFromReductions(Instruction &
I);
152 bool foldSignBitReductionCmp(Instruction &
I);
153 bool foldICmpEqZeroVectorReduce(Instruction &
I);
154 bool foldEquivalentReductionCmp(Instruction &
I);
155 bool foldReduceAddCmpZero(Instruction &
I);
156 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
157 bool foldInterleaveIntrinsics(Instruction &
I);
158 bool foldDeinterleaveIntrinsics(Instruction &
I);
159 bool foldBitcastOfVPLoad(Instruction &
I);
160 bool shrinkType(Instruction &
I);
161 bool shrinkLoadForShuffles(Instruction &
I);
162 bool shrinkPhiOfShuffles(Instruction &
I);
164 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
170 Worklist.pushUsersToWorkList(*NewI);
171 Worklist.pushValue(NewI);
188 SmallPtrSet<Value *, 4> Visited;
193 OpI,
nullptr,
nullptr, [&](
Value *V) {
198 NextInst = NextInst->getNextNode();
203 Worklist.pushUsersToWorkList(*OpI);
204 Worklist.pushValue(OpI);
224 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
225 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
231 Type *ScalarTy = Load->getType()->getScalarType();
233 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
234 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
241bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
267 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
270 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
271 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
272 unsigned OffsetEltIndex = 0;
280 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
281 APInt
Offset(OffsetBitWidth, 0);
291 uint64_t ScalarSizeInBytes = ScalarSize / 8;
292 if (
Offset.urem(ScalarSizeInBytes) != 0)
296 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
297 if (OffsetEltIndex >= MinVecNumElts)
314 unsigned AS =
Load->getPointerAddressSpace();
333 unsigned OutputNumElts = Ty->getNumElements();
335 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
336 Mask[0] = OffsetEltIndex;
343 if (OldCost < NewCost || !NewCost.
isValid())
354 replaceValue(
I, *VecLd);
362bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
365 if (!Shuf->isIdentityWithPadding())
371 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
372 return M >= (int)(NumOpElts);
383 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
392 unsigned AS =
Load->getPointerAddressSpace();
407 if (OldCost < NewCost || !NewCost.
isValid())
414 replaceValue(
I, *VecLd);
421ExtractElementInst *VectorCombine::getShuffleExtract(
422 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
426 assert(Index0C && Index1C &&
"Expected constant extract indexes");
428 unsigned Index0 = Index0C->getZExtValue();
429 unsigned Index1 = Index1C->getZExtValue();
432 if (Index0 == Index1)
456 if (PreferredExtractIndex == Index0)
458 if (PreferredExtractIndex == Index1)
462 return Index0 > Index1 ? Ext0 : Ext1;
470bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
471 ExtractElementInst *Ext1,
472 const Instruction &
I,
473 ExtractElementInst *&ConvertToShuffle,
474 unsigned PreferredExtractIndex) {
477 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
479 unsigned Opcode =
I.getOpcode();
492 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
493 "Expected a compare");
503 unsigned Ext0Index = Ext0IndexC->getZExtValue();
504 unsigned Ext1Index = Ext1IndexC->getZExtValue();
518 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
519 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
520 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
525 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
530 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
532 OldCost = CheapExtractCost + ScalarOpCost;
533 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
537 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
538 NewCost = VectorOpCost + CheapExtractCost +
543 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
544 if (ConvertToShuffle) {
556 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
558 ShuffleMask[BestInsIndex] = BestExtIndex;
560 VecTy, VecTy, ShuffleMask,
CostKind, 0,
561 nullptr, {ConvertToShuffle});
564 VecTy, VecTy, {},
CostKind, 0,
nullptr,
572 return OldCost < NewCost;
584 ShufMask[NewIndex] = OldIndex;
585 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
637 V1,
"foldExtExtBinop");
642 VecBOInst->copyIRFlags(&
I);
648bool VectorCombine::foldExtractExtract(Instruction &
I) {
669 unsigned NumElts = FixedVecTy->getNumElements();
670 if (C0 >= NumElts || C1 >= NumElts)
686 ExtractElementInst *ExtractToChange;
687 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
693 if (ExtractToChange) {
694 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
699 if (ExtractToChange == Ext0)
708 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
709 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
712 replaceValue(
I, *NewExt);
718bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
721 uint64_t ExtIdx, InsIdx;
736 auto *DstVecScalarTy = DstVecTy->getScalarType();
738 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
743 unsigned NumDstElts = DstVecTy->getNumElements();
744 unsigned NumSrcElts = SrcVecTy->getNumElements();
745 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
751 SmallVector<int>
Mask(NumDstElts);
752 std::iota(
Mask.begin(),
Mask.end(), 0);
753 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
769 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
772 SmallVector<int> SrcMask;
775 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
777 DstVecTy, SrcVecTy, SrcMask,
CostKind);
781 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
783 if (NewCost > OldCost)
786 Value *NewShuf, *LenChgShuf =
nullptr;
800 replaceValue(
I, *NewShuf);
806bool VectorCombine::foldInsExtBinop(Instruction &
I) {
807 BinaryOperator *VecBinOp, *SclBinOp;
839 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
841 if (NewCost > OldCost)
852 NewInst->copyIRFlags(VecBinOp);
853 NewInst->andIRFlags(SclBinOp);
858 replaceValue(
I, *NewBO);
864bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
867 if (!BinOp || !BinOp->isBitwiseLogicOp())
873 if (!LHSCast || !RHSCast) {
874 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
880 if (CastOpcode != RHSCast->getOpcode())
884 switch (CastOpcode) {
885 case Instruction::BitCast:
886 case Instruction::Trunc:
887 case Instruction::SExt:
888 case Instruction::ZExt:
894 Value *LHSSrc = LHSCast->getOperand(0);
895 Value *RHSSrc = RHSCast->getOperand(0);
901 auto *SrcTy = LHSSrc->
getType();
902 auto *DstTy =
I.getType();
905 if (CastOpcode != Instruction::BitCast &&
910 if (!SrcTy->getScalarType()->isIntegerTy() ||
911 !DstTy->getScalarType()->isIntegerTy())
926 LHSCastCost + RHSCastCost;
937 if (!LHSCast->hasOneUse())
938 NewCost += LHSCastCost;
939 if (!RHSCast->hasOneUse())
940 NewCost += RHSCastCost;
943 <<
" NewCost=" << NewCost <<
"\n");
945 if (NewCost > OldCost)
950 BinOp->getName() +
".inner");
952 NewBinOp->copyIRFlags(BinOp);
966 replaceValue(
I, *Result);
975bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
991 switch (CastOpcode) {
992 case Instruction::BitCast:
993 case Instruction::ZExt:
994 case Instruction::SExt:
995 case Instruction::Trunc:
1001 Value *LHSSrc = LHSCast->getOperand(0);
1003 auto *SrcTy = LHSSrc->
getType();
1004 auto *DstTy =
I.getType();
1007 if (CastOpcode != Instruction::BitCast &&
1012 if (!SrcTy->getScalarType()->isIntegerTy() ||
1013 !DstTy->getScalarType()->isIntegerTy())
1017 PreservedCastFlags RHSFlags;
1042 if (!LHSCast->hasOneUse())
1043 NewCost += LHSCastCost;
1045 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1046 <<
" NewCost=" << NewCost <<
"\n");
1048 if (NewCost > OldCost)
1053 LHSSrc, InvC,
I.getName() +
".inner");
1055 NewBinOp->copyIRFlags(&
I);
1075 replaceValue(
I, *Result);
1082bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1096 if (!DestTy || !SrcTy)
1099 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1100 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1101 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1111 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1112 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1116 SmallVector<int, 16> NewMask;
1117 if (DestEltSize <= SrcEltSize) {
1120 if (SrcEltSize % DestEltSize != 0)
1122 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1127 if (DestEltSize % SrcEltSize != 0)
1129 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1136 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1137 auto *NewShuffleTy =
1139 auto *OldShuffleTy =
1141 unsigned NumOps = IsUnary ? 1 : 2;
1151 TargetTransformInfo::CastContextHint::None,
1156 TargetTransformInfo::CastContextHint::None,
1159 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1160 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1162 if (NewCost > OldCost || !NewCost.
isValid())
1170 replaceValue(
I, *Shuf);
1177bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1191 if (!ScalarOp0 || !ScalarOp1)
1199 auto IsAllTrueMask = [](
Value *MaskVal) {
1202 return ConstValue->isAllOnesValue();
1216 SmallVector<int>
Mask;
1218 Mask.resize(FVTy->getNumElements(), 0);
1227 Args.push_back(
V->getType());
1228 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1233 std::optional<unsigned> FunctionalOpcode =
1235 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1236 if (!FunctionalOpcode) {
1245 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1255 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1257 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1260 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1263 if (OldCost < NewCost || !NewCost.
isValid())
1274 bool SafeToSpeculate;
1280 *FunctionalOpcode, &VPI,
nullptr, SQ.
AC, SQ.
DT);
1281 if (!SafeToSpeculate &&
1288 {ScalarOp0, ScalarOp1})
1290 ScalarOp0, ScalarOp1);
1299bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1304 if (!UO && !BO && !CI && !
II)
1312 if (Arg->getType() !=
II->getType() &&
1322 for (User *U :
I.users())
1329 std::optional<uint64_t>
Index;
1331 auto Ops =
II ?
II->args() :
I.operands();
1335 uint64_t InsIdx = 0;
1340 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1346 else if (InsIdx != *Index)
1363 if (!
Index.has_value())
1367 Type *ScalarTy = VecTy->getScalarType();
1368 assert(VecTy->isVectorTy() &&
1371 "Unexpected types for insert element into binop or cmp");
1373 unsigned Opcode =
I.getOpcode();
1381 }
else if (UO || BO) {
1385 IntrinsicCostAttributes ScalarICA(
1386 II->getIntrinsicID(), ScalarTy,
1389 IntrinsicCostAttributes VectorICA(
1390 II->getIntrinsicID(), VecTy,
1397 Value *NewVecC =
nullptr;
1399 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1402 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1404 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1418 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1420 II->getIntrinsicID(), Idx, &
TTI)))
1423 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1424 OldCost += InsertCost;
1425 NewCost += !
Op->hasOneUse() * InsertCost;
1429 if (OldCost < NewCost || !NewCost.
isValid())
1439 ++NumScalarIntrinsic;
1449 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1455 Scalar->setName(
I.getName() +
".scalar");
1460 ScalarInst->copyIRFlags(&
I);
1463 replaceValue(
I, *Insert);
1470bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1475 if (!BI || !
I.getType()->isIntegerTy(1))
1480 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1483 CmpPredicate
P0,
P1;
1495 uint64_t Index0, Index1;
1502 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1505 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1506 "Unknown ExtractElementInst");
1511 unsigned CmpOpcode =
1526 Ext0Cost + Ext1Cost + CmpCost * 2 +
1532 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1533 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1538 ShufMask[CheapIndex] = ExpensiveIndex;
1543 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1544 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1549 if (OldCost < NewCost || !NewCost.
isValid())
1559 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1560 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1563 replaceValue(
I, *NewExt);
1590bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1597 if (!SrcVecTy || !DstVecTy)
1607 if (SrcEltBits != 32 && SrcEltBits != 64)
1610 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1627 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1630 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1633 if (!BC->hasNUsesOrMore(MinSelects))
1638 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1640 for (User *U : BC->users()) {
1645 for (User *ExtUser : Ext->users()) {
1649 Cond->getType()->isIntegerTy(1))
1654 if (CondToSelects.
empty())
1657 bool MadeChange =
false;
1658 Value *SrcVec = BC->getOperand(0);
1661 for (
auto [
Cond, Selects] : CondToSelects) {
1663 if (Selects.size() < MinSelects) {
1664 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1665 <<
"profitable (VecCost=" << VecSelCost
1666 <<
", ScalarCost=" << ScalarSelCost
1667 <<
", NumSelects=" << Selects.size() <<
")\n");
1672 auto InsertPt = std::next(BC->getIterator());
1676 InsertPt = std::next(CondInst->getIterator());
1684 for (SelectInst *Sel : Selects) {
1686 Value *Idx = Ext->getIndexOperand();
1690 replaceValue(*Sel, *NewExt);
1695 <<
" selects into vector select\n");
1709 unsigned ReductionOpc =
1715 CostBeforeReduction =
1716 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1718 CostAfterReduction =
1719 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1723 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1729 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1736 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1739 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1741 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1744 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1745 CostAfterReduction =
TTI.getMulAccReductionCost(
1746 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1749 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1753bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1756 if (BinOpOpc == Instruction::Sub)
1757 ReductionIID = Intrinsic::vector_reduce_add;
1761 if (ReductionIID == Intrinsic::vector_reduce_fadd ||
1762 ReductionIID == Intrinsic::vector_reduce_fmul)
1765 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1770 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1771 return II->getArgOperand(0);
1775 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1778 Value *
V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1783 if (
V1->getType() != VTy)
1787 unsigned ReductionOpc =
1800 CostOfRedOperand0 + CostOfRedOperand1 +
1803 if (NewCost >= OldCost || !NewCost.
isValid())
1807 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1810 if (BinOpOpc == Instruction::Or)
1817 replaceValue(
I, *Rdx);
1825 unsigned NumScanned = 0;
1826 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1835class ScalarizationResult {
1836 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1841 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1842 : Status(Status), ToFreeze(ToFreeze) {}
1845 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1846 ~ScalarizationResult() {
1847 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1850 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1851 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1852 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1853 return {StatusTy::SafeWithFreeze, ToFreeze};
1857 bool isSafe()
const {
return Status == StatusTy::Safe; }
1859 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1862 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1867 Status = StatusTy::Unsafe;
1871 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1872 assert(isSafeWithFreeze() &&
1873 "should only be used when freezing is required");
1875 "UserI must be a user of ToFreeze");
1876 IRBuilder<>::InsertPointGuard Guard(Builder);
1881 if (
U.get() == ToFreeze)
1896 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1900 if (
C->getValue().ult(NumElements))
1901 return ScalarizationResult::safe();
1902 return ScalarizationResult::unsafe();
1907 return ScalarizationResult::unsafe();
1909 APInt Zero(IntWidth, 0);
1910 APInt MaxElts(IntWidth, NumElements);
1917 return ScalarizationResult::safe();
1918 return ScalarizationResult::unsafe();
1931 if (ValidIndices.
contains(IdxRange))
1932 return ScalarizationResult::safeWithFreeze(IdxBase);
1933 return ScalarizationResult::unsafe();
1945 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1957bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1969 if (!
match(
SI->getValueOperand(),
1976 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1979 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1980 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1981 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1984 auto ScalarizableIdx =
1986 if (ScalarizableIdx.isUnsafe() ||
1993 Worklist.
push(Load);
1995 if (ScalarizableIdx.isSafeWithFreeze())
1998 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1999 {ConstantInt::get(Idx->getType(), 0), Idx});
2003 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
2006 replaceValue(
I, *NSI);
2016bool VectorCombine::scalarizeLoad(Instruction &
I) {
2026 if (!LI->isSimple() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2029 bool AllExtracts =
true;
2030 bool AllBitcasts =
true;
2032 unsigned NumInstChecked = 0;
2037 for (User *U : LI->users()) {
2039 if (!UI || UI->getParent() != LI->getParent())
2044 if (UI->use_empty())
2048 AllExtracts =
false;
2050 AllBitcasts =
false;
2054 for (Instruction &
I :
2055 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2062 LastCheckedInst = UI;
2067 return scalarizeLoadExtract(LI, VecTy, Ptr);
2069 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2074bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2079 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2082 for (
auto &Pair : NeedFreeze)
2083 Pair.second.discard();
2091 for (User *U : LI->
users()) {
2096 if (ScalarIdx.isUnsafe())
2098 if (ScalarIdx.isSafeWithFreeze()) {
2099 NeedFreeze.try_emplace(UI, ScalarIdx);
2100 ScalarIdx.discard();
2106 Index ?
Index->getZExtValue() : -1);
2114 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2115 <<
"\n LoadExtractCost: " << OriginalCost
2116 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2118 if (ScalarizedCost >= OriginalCost)
2125 Type *ElemType = VecTy->getElementType();
2128 for (User *U : LI->
users()) {
2130 Value *Idx = EI->getIndexOperand();
2133 auto It = NeedFreeze.find(EI);
2134 if (It != NeedFreeze.end())
2141 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2143 Align ScalarOpAlignment =
2145 NewLoad->setAlignment(ScalarOpAlignment);
2148 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2153 replaceValue(*EI, *NewLoad,
false);
2156 FailureGuard.release();
2161bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2167 Type *TargetScalarType =
nullptr;
2168 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2170 for (User *U : LI->
users()) {
2173 Type *DestTy = BC->getDestTy();
2177 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2178 if (DestBitWidth != VecBitWidth)
2182 if (!TargetScalarType)
2183 TargetScalarType = DestTy;
2184 else if (TargetScalarType != DestTy)
2192 if (!TargetScalarType)
2200 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2201 <<
"\n OriginalCost: " << OriginalCost
2202 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2204 if (ScalarizedCost >= OriginalCost)
2215 ScalarLoad->copyMetadata(*LI);
2218 for (User *U : LI->
users()) {
2220 replaceValue(*BC, *ScalarLoad,
false);
2226bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2241 Type *ScalarDstTy = DstTy->getElementType();
2242 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2248 unsigned ExtCnt = 0;
2249 bool ExtLane0 =
false;
2250 for (User *U : Ext->users()) {
2264 Instruction::And, ScalarDstTy,
CostKind,
2267 (ExtCnt - ExtLane0) *
2269 Instruction::LShr, ScalarDstTy,
CostKind,
2272 if (ScalarCost > VectorCost)
2275 Value *ScalarV = Ext->getOperand(0);
2282 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2283 bool AllExtractsTriggerUB =
true;
2284 ExtractElementInst *LastExtract =
nullptr;
2286 for (User *U : Ext->users()) {
2289 AllExtractsTriggerUB =
false;
2293 if (!LastExtract || LastExtract->
comesBefore(Extract))
2294 LastExtract = Extract;
2296 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2297 !AllExtractsTriggerUB ||
2305 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2306 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2309 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2310 for (User *U : Ext->users()) {
2316 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2317 : (Idx * SrcEltSizeInBits);
2320 U->replaceAllUsesWith(
And);
2328bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2329 Type *Ty =
I.getType();
2334 if (
DL->isBigEndian())
2345 uint64_t ShAmtX = 0;
2353 uint64_t ShAmtY = 0;
2361 if (ShAmtX > ShAmtY) {
2369 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2370 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2375 MaskTy->getNumElements() != ShAmtDiff ||
2376 MaskTy->getNumElements() > (
BitWidth / 2))
2381 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2382 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2385 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2402 if (Ty != ConcatIntTy)
2408 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2409 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2412 if (NewCost > OldCost)
2422 if (Ty != ConcatIntTy) {
2432 replaceValue(
I, *Result);
2438bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2439 BinaryOperator *BinOp;
2440 ArrayRef<int> OuterMask;
2448 Value *Op00, *Op01, *Op10, *Op11;
2449 ArrayRef<int> Mask0, Mask1;
2454 if (!Match0 && !Match1)
2467 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2470 unsigned NumSrcElts = BinOpTy->getNumElements();
2475 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2479 SmallVector<int> NewMask0, NewMask1;
2480 for (
int M : OuterMask) {
2481 if (M < 0 || M >= (
int)NumSrcElts) {
2485 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2486 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2490 unsigned NumOpElts = Op0Ty->getNumElements();
2491 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2492 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2494 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2495 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2504 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2505 0,
nullptr, {BinOp}, &
I);
2507 NewCost += BinOpCost;
2513 OldCost += Shuf0Cost;
2515 NewCost += Shuf0Cost;
2521 OldCost += Shuf1Cost;
2523 NewCost += Shuf1Cost;
2531 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2535 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2537 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2538 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2542 if (NewCost > OldCost)
2553 NewInst->copyIRFlags(BinOp);
2557 replaceValue(
I, *NewBO);
2563bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2564 ArrayRef<int> OldMask;
2571 if (
LHS->getOpcode() !=
RHS->getOpcode())
2575 bool IsCommutative =
false;
2584 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2595 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2598 bool SameBinOp =
LHS ==
RHS;
2599 unsigned NumSrcElts = BinOpTy->getNumElements();
2602 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2605 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2606 if (M >= (
int)NumSrcElts)
2610 SmallVector<int> NewMask0(OldMask);
2619 SmallVector<int> NewMask1(OldMask);
2638 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2648 ArrayRef<int> InnerMask;
2650 m_Mask(InnerMask)))) &&
2653 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2665 bool ReducedInstCount =
false;
2666 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2667 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2668 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2669 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2670 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2675 auto *ShuffleCmpTy =
2678 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2679 if (!SingleSrcBinOp)
2689 PredLHS,
CostKind, Op0Info, Op1Info);
2699 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2706 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2715 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2719 NewInst->copyIRFlags(
LHS);
2720 NewInst->andIRFlags(
RHS);
2725 replaceValue(
I, *NewBO);
2732bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2734 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2745 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2751 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2752 ((SI0FOp !=
nullptr) &&
2753 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2759 auto SelOp = Instruction::Select;
2767 CostSel1 + CostSel2 +
2769 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2773 Mask,
CostKind, 0,
nullptr, {C1, C2});
2783 if (!Sel1->hasOneUse())
2784 NewCost += CostSel1;
2785 if (!Sel2->hasOneUse())
2786 NewCost += CostSel2;
2789 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2791 if (NewCost > OldCost)
2800 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2801 SI0FOp->getFastMathFlags());
2803 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2808 replaceValue(
I, *NewSel);
2814bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2816 ArrayRef<int> OldMask;
2825 if (!C0 || (IsBinaryShuffle && !C1))
2832 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2835 if (IsBinaryShuffle) {
2836 if (C0->getSrcTy() != C1->getSrcTy())
2839 if (Opcode != C1->getOpcode()) {
2841 Opcode = Instruction::SExt;
2850 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2853 unsigned NumSrcElts = CastSrcTy->getNumElements();
2854 unsigned NumDstElts = CastDstTy->getNumElements();
2855 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2856 "Only bitcasts expected to alter src/dst element counts");
2860 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2861 (NumDstElts % NumSrcElts) != 0)
2864 SmallVector<int, 16> NewMask;
2865 if (NumSrcElts >= NumDstElts) {
2868 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2869 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2874 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2875 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2880 auto *NewShuffleDstTy =
2889 if (IsBinaryShuffle)
2904 if (IsBinaryShuffle) {
2914 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2916 if (NewCost > OldCost)
2920 if (IsBinaryShuffle)
2930 NewInst->copyIRFlags(C0);
2931 if (IsBinaryShuffle)
2932 NewInst->andIRFlags(C1);
2936 replaceValue(
I, *Cast);
2946bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2947 ArrayRef<int> OuterMask;
2948 Value *OuterV0, *OuterV1;
2953 ArrayRef<int> InnerMask0, InnerMask1;
2954 Value *X0, *X1, *Y0, *Y1;
2959 if (!Match0 && !Match1)
2964 SmallVector<int, 16> PoisonMask1;
2969 InnerMask1 = PoisonMask1;
2973 X0 = Match0 ? X0 : OuterV0;
2974 Y0 = Match0 ? Y0 : OuterV0;
2975 X1 = Match1 ? X1 : OuterV1;
2976 Y1 = Match1 ? Y1 : OuterV1;
2980 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2984 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2985 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2990 SmallVector<int, 16> NewMask(OuterMask);
2991 Value *NewX =
nullptr, *NewY =
nullptr;
2992 for (
int &M : NewMask) {
2993 Value *Src =
nullptr;
2994 if (0 <= M && M < (
int)NumImmElts) {
2998 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2999 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3001 }
else if (M >= (
int)NumImmElts) {
3006 Src =
M >= (int)NumSrcElts ? Y1 : X1;
3007 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3011 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3020 if (!NewX || NewX == Src) {
3024 if (!NewY || NewY == Src) {
3043 replaceValue(
I, *NewX);
3060 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3066 nullptr, {NewX, NewY});
3068 NewCost += InnerCost0;
3070 NewCost += InnerCost1;
3073 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3075 if (NewCost > OldCost)
3079 replaceValue(
I, *Shuf);
3095bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3100 unsigned ChainLength = 0;
3101 SmallVector<int>
Mask;
3102 SmallVector<int> YMask;
3112 ArrayRef<int> OuterMask;
3113 Value *OuterV0, *OuterV1;
3114 if (ChainLength != 0 && !Trunk->
hasOneUse())
3117 m_Mask(OuterMask))))
3119 if (OuterV0->
getType() != TrunkType) {
3125 ArrayRef<int> InnerMask0, InnerMask1;
3126 Value *A0, *A1, *B0, *B1;
3131 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3132 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3133 if (Match0Leaf == Match1Leaf) {
3139 SmallVector<int> CommutedOuterMask;
3146 for (
int &M : CommutedOuterMask) {
3149 if (M < (
int)NumTrunkElts)
3154 OuterMask = CommutedOuterMask;
3173 int NumLeafElts = YType->getNumElements();
3174 SmallVector<int> LocalYMask(InnerMask1);
3175 for (
int &M : LocalYMask) {
3176 if (M >= NumLeafElts)
3186 Mask.assign(OuterMask);
3187 YMask.
assign(LocalYMask);
3188 OldCost = NewCost = LocalOldCost;
3195 SmallVector<int> NewYMask(YMask);
3197 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3198 if (LeafM == -1 || CombinedM == LeafM)
3200 if (CombinedM == -1) {
3210 SmallVector<int> NewMask;
3211 NewMask.
reserve(NumTrunkElts);
3212 for (
int M : Mask) {
3213 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3228 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3232 if (ChainLength == 1) {
3233 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3236 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3237 <<
" old cost: " << (OldCost + LocalOldCost)
3238 <<
" new cost: " << LocalNewCost <<
'\n';
3243 OldCost += LocalOldCost;
3244 NewCost = LocalNewCost;
3248 if (ChainLength <= 1)
3252 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3255 for (
int &M : Mask) {
3256 if (M >=
static_cast<int>(NumTrunkElts))
3257 M = YMask[
M - NumTrunkElts];
3261 replaceValue(
I, *Root);
3268 replaceValue(
I, *Root);
3274bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3276 ArrayRef<int> OldMask;
3286 if (IID != II1->getIntrinsicID())
3295 if (!ShuffleDstTy || !II0Ty)
3301 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3302 Value *Arg0 = II0->getArgOperand(
I);
3303 Value *Arg1 = II1->getArgOperand(
I);
3320 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3324 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3325 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3327 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3331 ShuffleDstTy->getNumElements());
3333 std::pair<Value *, Value *> OperandPair =
3334 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3335 if (!SeenOperandPairs.
insert(OperandPair).second) {
3341 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3344 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3347 if (!II0->hasOneUse())
3349 if (II1 != II0 && !II1->hasOneUse())
3353 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3356 if (NewCost > OldCost)
3360 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3361 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3365 std::pair<Value *, Value *> OperandPair =
3366 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3367 auto It = ShuffleCache.
find(OperandPair);
3368 if (It != ShuffleCache.
end()) {
3374 II1->getArgOperand(
I), OldMask);
3375 ShuffleCache[OperandPair] = Shuf;
3383 NewInst->copyIRFlags(II0);
3384 NewInst->andIRFlags(II1);
3387 replaceValue(
I, *NewIntrinsic);
3393bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3405 if (!ShuffleDstTy || !IntrinsicSrcTy)
3409 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3410 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3423 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3427 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3429 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3433 ShuffleDstTy->getNumElements());
3436 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3437 {II0->getArgOperand(
I)});
3440 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3445 if (!II0->hasOneUse())
3448 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3449 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3451 if (NewCost > OldCost)
3456 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3471 replaceValue(
I, *NewIntrinsic);
3481 int M = SV->getMaskValue(Lane);
3484 if (
static_cast<unsigned>(M) < NumElts) {
3485 V = SV->getOperand(0);
3488 V = SV->getOperand(1);
3499 auto [U, Lane] = IL;
3512 unsigned NumElts = Ty->getNumElements();
3513 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3519 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3525 unsigned NumSlices = Item.
size() / NumElts;
3530 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3531 Value *SliceV = Item[Slice * NumElts].first;
3532 if (!SliceV || SliceV->
getType() != Ty)
3534 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3535 auto [V, Lane] = Item[Slice * NumElts + Elt];
3536 if (Lane !=
static_cast<int>(Elt) || SliceV != V)
3545 const DenseSet<std::pair<Value *, Use *>> &IdentityLeafs,
3546 const DenseSet<std::pair<Value *, Use *>> &SplatLeafs,
3547 const DenseSet<std::pair<Value *, Use *>> &ConcatLeafs,
3549 auto [FrontV, FrontLane] = Item.
front();
3551 if (IdentityLeafs.contains(std::make_pair(FrontV, From))) {
3554 if (SplatLeafs.contains(std::make_pair(FrontV, From))) {
3556 return Builder.CreateShuffleVector(FrontV, Mask);
3558 if (ConcatLeafs.contains(std::make_pair(FrontV, From))) {
3562 for (
unsigned S = 0; S < Values.
size(); ++S)
3563 Values[S] = Item[S * NumElts].first;
3565 while (Values.
size() > 1) {
3568 std::iota(Mask.begin(), Mask.end(), 0);
3570 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3572 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3580 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3582 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3585 Ops[Idx] =
II->getOperand(Idx);
3589 &
I->getOperandUse(Idx), Ty, IdentityLeafs,
3590 SplatLeafs, ConcatLeafs, Builder,
TTI);
3594 for (
const auto &Lane : Item)
3607 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3617 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3622 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3636bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3638 if (!Ty ||
I.use_empty())
3642 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3646 Worklist.
push_back(std::make_pair(Start, &*
I.use_begin()));
3647 DenseSet<std::pair<Value *, Use *>> IdentityLeafs, SplatLeafs, ConcatLeafs;
3648 unsigned NumVisited = 0;
3650 while (!Worklist.
empty()) {
3655 auto Item = ItemFrom.first;
3656 auto From = ItemFrom.second;
3657 auto [FrontV, FrontLane] = Item.front();
3665 return X->getType() ==
Y->getType() &&
3670 if (FrontLane == 0 &&
3672 Ty->getNumElements() &&
3674 Value *FrontV = Item.front().first;
3675 return !
E.value().first || (IsEquiv(
E.value().first, FrontV) &&
3676 E.value().second == (int)
E.index());
3678 IdentityLeafs.
insert(std::make_pair(FrontV, From));
3683 C &&
C->getSplatValue() &&
3685 Value *FrontV = Item.front().first;
3691 SplatLeafs.
insert(std::make_pair(FrontV, From));
3696 auto [FrontV, FrontLane] = Item.front();
3697 auto [
V, Lane] = IL;
3698 return !
V || (
V == FrontV && Lane == FrontLane);
3700 SplatLeafs.
insert(std::make_pair(FrontV, From));
3706 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3707 Value *FrontV = Item.front().first;
3716 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3719 if (CI->getSrcTy()->getScalarType() !=
3724 SI->getOperand(0)->getType() !=
3731 II->getIntrinsicID() ==
3733 !
II->hasOperandBundles());
3740 BO && BO->isIntDivRem())
3747 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3748 FPToUIInst, SIToFPInst, UIToFPInst>(FrontV)) {
3756 if (DstTy && SrcTy &&
3757 SrcTy->getNumElements() == DstTy->getNumElements()) {
3759 &BitCast->getOperandUse(0));
3764 &Sel->getOperandUse(0));
3766 &Sel->getOperandUse(1));
3768 &Sel->getOperandUse(2));
3772 !
II->hasOperandBundles()) {
3773 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3777 Value *FrontV = Item.front().first;
3793 ConcatLeafs.
insert(std::make_pair(FrontV, From));
3800 if (NumVisited <= 1)
3803 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3809 SplatLeafs, ConcatLeafs, Builder, &
TTI);
3810 replaceValue(
I, *V);
3817bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3821 switch (
II->getIntrinsicID()) {
3822 case Intrinsic::vector_reduce_add:
3823 case Intrinsic::vector_reduce_mul:
3824 case Intrinsic::vector_reduce_and:
3825 case Intrinsic::vector_reduce_or:
3826 case Intrinsic::vector_reduce_xor:
3827 case Intrinsic::vector_reduce_smin:
3828 case Intrinsic::vector_reduce_smax:
3829 case Intrinsic::vector_reduce_umin:
3830 case Intrinsic::vector_reduce_umax:
3839 std::queue<Value *> Worklist;
3840 SmallPtrSet<Value *, 4> Visited;
3841 ShuffleVectorInst *Shuffle =
nullptr;
3845 while (!Worklist.empty()) {
3846 Value *CV = Worklist.front();
3858 if (CI->isBinaryOp()) {
3859 for (
auto *
Op : CI->operand_values())
3863 if (Shuffle && Shuffle != SV)
3880 for (
auto *V : Visited)
3881 for (
auto *U :
V->users())
3882 if (!Visited.contains(U) && U != &
I)
3885 FixedVectorType *VecType =
3889 FixedVectorType *ShuffleInputType =
3891 if (!ShuffleInputType)
3897 SmallVector<int> ConcatMask;
3899 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3900 bool UsesSecondVec =
3901 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3908 ShuffleInputType, ConcatMask,
CostKind);
3910 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3912 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3914 bool MadeChanges =
false;
3915 if (NewCost < OldCost) {
3919 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3920 replaceValue(*Shuffle, *NewShuffle);
3926 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3972bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3974 std::queue<Value *> InstWorklist;
3978 std::optional<unsigned int> CommonCallOp = std::nullopt;
3979 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3982 FastMathFlags CommonFMF;
3983 bool IsFloatReduction =
false;
3985 bool IsFirstCallOrBinInst =
true;
3986 bool ShouldBeCallOrBinInst =
true;
3992 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
4002 int64_t
VecSize = FVT->getNumElements();
4008 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
4009 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
4019 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
4020 Cur = (Cur + 1) / 2, --
Mask) {
4022 ExpectedParityMask |= (1ll <<
Mask);
4025 InstWorklist.push(VecOpEE);
4027 bool IsPartialReduction =
false;
4028 bool HasLaneDuplication =
false;
4030 while (!InstWorklist.empty()) {
4031 Value *CI = InstWorklist.front();
4035 if (!ShouldBeCallOrBinInst)
4038 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4043 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4045 IsFirstCallOrBinInst =
false;
4048 CommonCallOp =
II->getIntrinsicID();
4049 if (
II->getIntrinsicID() != *CommonCallOp)
4052 switch (
II->getIntrinsicID()) {
4053 case Intrinsic::umin:
4054 case Intrinsic::umax:
4055 case Intrinsic::smin:
4056 case Intrinsic::smax: {
4057 auto *Op0 =
II->getOperand(0);
4058 auto *Op1 =
II->getOperand(1);
4066 ShouldBeCallOrBinInst ^= 1;
4068 IntrinsicCostAttributes ICA(
4069 *CommonCallOp,
II->getType(),
4070 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4077 InstWorklist.push(PrevVecV[1]);
4078 InstWorklist.push(PrevVecV[0]);
4082 if (!ShouldBeCallOrBinInst)
4085 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4088 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4090 IsFirstCallOrBinInst =
false;
4098 switch (*CommonBinOp) {
4099 case BinaryOperator::Add:
4100 case BinaryOperator::Mul:
4101 case BinaryOperator::Or:
4102 case BinaryOperator::And:
4103 case BinaryOperator::Xor:
4104 case BinaryOperator::FAdd:
4105 case BinaryOperator::FMul: {
4117 if (*CommonBinOp == Instruction::FAdd ||
4118 *CommonBinOp == Instruction::FMul) {
4121 if (!IsFloatReduction) {
4123 IsFloatReduction =
true;
4129 ShouldBeCallOrBinInst ^= 1;
4136 InstWorklist.push(PrevVecV[1]);
4137 InstWorklist.push(PrevVecV[0]);
4141 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4144 if (SVInst != PrevVecV[1])
4147 ArrayRef<int> CurMask;
4153 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4154 if (Mask < ShuffleMaskHalf &&
4155 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4157 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4162 ShuffleMaskHalf *= 2;
4163 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4164 HasLaneDuplication |= (ExpectedParityMask & 1) != 0;
4165 ExpectedParityMask >>= 1;
4168 SVInst->getType(), SVInst->getType(),
4172 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4175 ShouldBeCallOrBinInst ^= 1;
4182 if (ShouldBeCallOrBinInst && VisitedCnt >= 1 && CI == PrevVecV[0] &&
4184 IsPartialReduction =
true;
4193 if (ShouldBeCallOrBinInst && !IsPartialReduction)
4198 if (HasLaneDuplication && CommonBinOp &&
4202 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4204 Value *FinalVecV = PrevVecV[0];
4217 FixedVectorType *ReduceVecTy = FinalVecVTy;
4218 SmallVector<int> ExtractMask;
4220 if (IsPartialReduction) {
4221 unsigned SubVecSize = ShuffleMaskHalf;
4223 ExtractMask.
resize(SubVecSize);
4224 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
4226 ReduceVecTy, FinalVecVTy, ExtractMask,
4230 IntrinsicCostAttributes ICA(
4235 IsFloatReduction ? CommonFMF : FastMathFlags());
4238 LLVM_DEBUG(
dbgs() <<
"Found reduction shuffle chain: " <<
I <<
"\n OldCost : "
4239 << OrigCost <<
" vs NewCost: " << NewCost <<
"\n");
4241 if (VecOpEE->
hasOneUse() ? (NewCost > OrigCost) : (NewCost >= OrigCost))
4244 Value *ReduceInput = FinalVecV;
4245 if (IsPartialReduction)
4248 CallInst *ReducedResult;
4249 if (IsFloatReduction) {
4254 {Identity, ReduceInput});
4260 replaceValue(
I, *ReducedResult);
4269bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4274 bool TruncOnly =
false;
4277 case Intrinsic::vector_reduce_add:
4278 case Intrinsic::vector_reduce_mul:
4281 case Intrinsic::vector_reduce_and:
4282 case Intrinsic::vector_reduce_or:
4283 case Intrinsic::vector_reduce_xor:
4290 Value *ReductionSrc =
I.getOperand(0);
4302 Type *ResultTy =
I.getType();
4305 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4315 if (OldCost <= NewCost || !NewCost.
isValid())
4319 II->getIntrinsicID(), {Src});
4321 replaceValue(
I, *NewCast);
4349bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4351 IntrinsicInst *ReduceOp;
4352 const APInt *CmpVal;
4359 case Intrinsic::vector_reduce_or:
4360 case Intrinsic::vector_reduce_umax:
4361 case Intrinsic::vector_reduce_and:
4362 case Intrinsic::vector_reduce_umin:
4363 case Intrinsic::vector_reduce_add:
4374 unsigned BitWidth = VecTy->getScalarSizeInBits();
4378 unsigned NumElts = VecTy->getNumElements();
4387 case Intrinsic::vector_reduce_or:
4388 case Intrinsic::vector_reduce_umax:
4389 TreeOpcode = Instruction::Or;
4391 case Intrinsic::vector_reduce_and:
4392 case Intrinsic::vector_reduce_umin:
4393 TreeOpcode = Instruction::And;
4395 case Intrinsic::vector_reduce_add:
4396 TreeOpcode = Instruction::Add;
4404 SmallVector<Value *, 8> Worklist;
4405 SmallVector<Value *, 8> Sources;
4407 std::optional<bool> IsAShr;
4408 constexpr unsigned MaxSources = 8;
4413 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4414 Sources.
size() <= MaxSources) {
4423 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4425 IsAShr = ThisIsAShr;
4426 else if (*IsAShr != ThisIsAShr)
4452 if (Sources.
empty() || Sources.
size() > MaxSources ||
4453 Worklist.
size() > MaxSources || !IsAShr)
4456 unsigned NumSources = Sources.
size();
4460 if (OrigIID == Intrinsic::vector_reduce_add &&
4468 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4471 NegativeVal.negate();
4503 TestsNegative =
false;
4504 }
else if (*CmpVal == NegativeVal) {
4505 TestsNegative =
true;
4509 IsEq = Pred == ICmpInst::ICMP_EQ;
4510 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4512 TestsNegative = (RangeHigh == NegativeVal);
4513 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4515 TestsNegative = (RangeHigh == NegativeVal);
4516 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4518 TestsNegative = (RangeLow == NegativeVal);
4519 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4521 TestsNegative = (RangeLow == NegativeVal);
4564 enum CheckKind :
unsigned {
4571 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4573 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4575 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4579 case Intrinsic::vector_reduce_or:
4580 case Intrinsic::vector_reduce_umax:
4581 Base = TestsNegative ? AnyNeg : AllNonNeg;
4583 case Intrinsic::vector_reduce_and:
4584 case Intrinsic::vector_reduce_umin:
4585 Base = TestsNegative ? AllNeg : AnyNonNeg;
4587 case Intrinsic::vector_reduce_add:
4588 Base = TestsNegative ? AllNeg : AllNonNeg;
4603 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4604 : std::make_pair(MinMax, MinMaxCost);
4608 auto [NewIID, NewCost] = RequiresOr(
Check)
4609 ? PickCheaper(Intrinsic::vector_reduce_or,
4610 Intrinsic::vector_reduce_umax)
4611 : PickCheaper(
Intrinsic::vector_reduce_and,
4615 if (NumSources > 1) {
4616 unsigned CombineOpc =
4617 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4622 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4623 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4625 if (NewCost > OldCost)
4630 Type *ScalarTy = VecTy->getScalarType();
4633 if (NumSources == 1) {
4644 replaceValue(
I, *NewCmp);
4669bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4680 switch (
II->getIntrinsicID()) {
4681 case Intrinsic::vector_reduce_add:
4682 case Intrinsic::vector_reduce_or:
4683 case Intrinsic::vector_reduce_umin:
4684 case Intrinsic::vector_reduce_umax:
4685 case Intrinsic::vector_reduce_smin:
4686 case Intrinsic::vector_reduce_smax:
4692 Value *InnerOp =
II->getArgOperand(0);
4735 switch (
II->getIntrinsicID()) {
4736 case Intrinsic::vector_reduce_add: {
4741 unsigned NumElems = XTy->getNumElements();
4747 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4755 case Intrinsic::vector_reduce_smin:
4756 case Intrinsic::vector_reduce_smax:
4766 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4782 case Intrinsic::vector_reduce_add:
4783 case Intrinsic::vector_reduce_or:
4789 case Intrinsic::vector_reduce_umin:
4790 case Intrinsic::vector_reduce_umax:
4791 case Intrinsic::vector_reduce_smin:
4792 case Intrinsic::vector_reduce_smax:
4804 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4806 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4807 << *InnerOp <<
"\n OldCost: " << OldCost
4808 <<
" vs NewCost: " << NewCost <<
"\n");
4814 if (NewCost > OldCost)
4823 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4824 replaceValue(
I, *NewCmp);
4855bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4858 const APInt *CmpVal;
4863 if (!
II || !
II->hasOneUse())
4866 const auto IsValidOrUmaxCmp = [&]() {
4875 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4877 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4878 Pred == ICmpInst::ICMP_SLT;
4879 return IsEquality || IsPositive || IsNegative;
4882 const auto IsValidAndUminCmp = [&]() {
4887 const auto LeadingOnes = CmpVal->
countl_one();
4894 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4903 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4904 return IsEquality || IsNegative || IsPositive;
4912 switch (OriginalIID) {
4913 case Intrinsic::vector_reduce_or:
4914 if (!IsValidOrUmaxCmp())
4916 AlternativeIID = Intrinsic::vector_reduce_umax;
4918 case Intrinsic::vector_reduce_umax:
4919 if (!IsValidOrUmaxCmp())
4921 AlternativeIID = Intrinsic::vector_reduce_or;
4923 case Intrinsic::vector_reduce_and:
4924 if (!IsValidAndUminCmp())
4926 AlternativeIID = Intrinsic::vector_reduce_umin;
4928 case Intrinsic::vector_reduce_umin:
4929 if (!IsValidAndUminCmp())
4931 AlternativeIID = Intrinsic::vector_reduce_and;
4944 if (ReductionOpc != Instruction::ICmp)
4955 <<
"\n OrigCost: " << OrigCost
4956 <<
" vs AltCost: " << AltCost <<
"\n");
4958 if (AltCost >= OrigCost)
4962 Type *ScalarTy = VecTy->getScalarType();
4965 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4967 replaceValue(
I, *NewCmp);
4981 unsigned Depth = 0) {
4982 constexpr unsigned MaxLocalDepth = 2;
4983 if (
Depth > MaxLocalDepth)
4986 auto NumSignBits = [&](
const Value *
X) {
4989 if (NumSignBits(V) == V->getType()->getScalarSizeInBits())
4994 return NumSignBits(
A) >= 2 && NumSignBits(
B) >= 2 &&
5005bool VectorCombine::foldReduceAddCmpZero(Instruction &
I) {
5015 if (!VecTy || VecTy->getNumElements() < 2)
5021 if (!IsNonNegative && !IsNonPositive)
5026 unsigned NumElts = VecTy->getNumElements();
5028 if (
Log2_32(NumElts) >= NumSignBits)
5031 ICmpInst::Predicate NewPred;
5033 case ICmpInst::ICMP_EQ:
5034 case ICmpInst::ICMP_ULE:
5035 case ICmpInst::ICMP_SLE:
5036 case ICmpInst::ICMP_SGE:
5037 NewPred = ICmpInst::ICMP_EQ;
5039 case ICmpInst::ICMP_NE:
5040 case ICmpInst::ICMP_UGT:
5041 case ICmpInst::ICMP_SGT:
5042 case ICmpInst::ICMP_SLT:
5043 NewPred = ICmpInst::ICMP_NE;
5053 if (!IsNonNegative &&
5054 (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE))
5056 if (!IsNonPositive &&
5057 (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE))
5059 if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE ||
5060 Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) &&
5061 Log2_32(NumElts) >= NumSignBits - 1)
5065 Instruction::Add, VecTy, std::nullopt,
CostKind);
5067 Instruction::Or, VecTy, std::nullopt,
CostKind);
5069 Intrinsic::umax, VecTy, FastMathFlags(),
CostKind);
5072 bool UseOr = OrCost.
isValid() && (!UmaxCost.
isValid() || OrCost <= UmaxCost);
5074 if (AltCost > OrigCost)
5080 Intrinsic::vector_reduce_umax, {VecTy}, {Vec});
5081 Worklist.pushValue(NewReduce);
5083 NewPred, NewReduce, ConstantInt::getNullValue(VecTy->getScalarType()));
5084 replaceValue(
I, *NewCmp);
5093 constexpr unsigned MaxVisited = 32;
5096 bool FoundReduction =
false;
5099 while (!WorkList.
empty()) {
5101 for (
User *U :
I->users()) {
5103 if (!UI || !Visited.
insert(UI).second)
5105 if (Visited.
size() > MaxVisited)
5111 switch (
II->getIntrinsicID()) {
5112 case Intrinsic::vector_reduce_add:
5113 case Intrinsic::vector_reduce_mul:
5114 case Intrinsic::vector_reduce_and:
5115 case Intrinsic::vector_reduce_or:
5116 case Intrinsic::vector_reduce_xor:
5117 case Intrinsic::vector_reduce_smin:
5118 case Intrinsic::vector_reduce_smax:
5119 case Intrinsic::vector_reduce_umin:
5120 case Intrinsic::vector_reduce_umax:
5121 FoundReduction =
true;
5134 return FoundReduction;
5147bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
5152 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
5160 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
5162 if (!
I ||
I->getOperand(0)->getType() != VT)
5164 return any_of(
I->users(), [&](User *U) {
5165 return U != Op0 && U != Op1 &&
5166 !(isa<ShuffleVectorInst>(U) &&
5167 (InputShuffles.contains(cast<Instruction>(U)) ||
5168 isInstructionTriviallyDead(cast<Instruction>(U))));
5171 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
5172 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
5180 for (
auto *U :
I->users()) {
5182 if (!SV || SV->getType() != VT)
5184 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
5185 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
5192 if (!collectShuffles(Op0) || !collectShuffles(Op1))
5196 if (FromReduction && Shuffles.
size() > 1)
5201 if (!FromReduction) {
5202 for (
size_t Idx = 0,
E = Shuffles.
size(); Idx !=
E; ++Idx) {
5203 for (
auto *U : Shuffles[Idx]->
users()) {
5218 int MaxV1Elt = 0, MaxV2Elt = 0;
5219 unsigned NumElts = VT->getNumElements();
5220 for (ShuffleVectorInst *SVN : Shuffles) {
5221 SmallVector<int>
Mask;
5222 SVN->getShuffleMask(Mask);
5226 Value *SVOp0 = SVN->getOperand(0);
5227 Value *SVOp1 = SVN->getOperand(1);
5232 for (
int &Elem : Mask) {
5238 if (SVOp0 == Op1 && SVOp1 == Op0) {
5242 if (SVOp0 != Op0 || SVOp1 != Op1)
5248 SmallVector<int> ReconstructMask;
5249 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5252 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5253 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5254 auto It =
find_if(
V1, [&](
const std::pair<int, int> &
A) {
5255 return Mask[
I] ==
A.first;
5261 V1.emplace_back(Mask[
I],
V1.size());
5264 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5265 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5266 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5280 sort(ReconstructMask);
5281 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5288 if (
V1.empty() || V2.
empty() ||
5289 (MaxV1Elt ==
static_cast<int>(
V1.size()) - 1 &&
5290 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5302 if (InputShuffles.contains(SSV))
5304 return SV->getMaskValue(M);
5312 std::pair<int, int>
Y) {
5313 int MXA = GetBaseMaskValue(
A,
X.first);
5314 int MYA = GetBaseMaskValue(
A,
Y.first);
5318 return SortBase(SVI0A,
A,
B);
5320 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5321 return SortBase(SVI1A,
A,
B);
5326 for (
const auto &Mask : OrigReconstructMasks) {
5327 SmallVector<int> ReconstructMask;
5328 for (
int M : Mask) {
5330 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5331 assert(It !=
V.end() &&
"Expected all entries in Mask");
5332 return std::distance(
V.begin(), It);
5336 else if (M <
static_cast<int>(NumElts)) {
5339 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5342 ReconstructMasks.
push_back(std::move(ReconstructMask));
5347 SmallVector<int> V1A, V1B, V2A, V2B;
5348 for (
unsigned I = 0;
I <
V1.size();
I++) {
5352 for (
unsigned I = 0;
I < V2.
size();
I++) {
5353 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5354 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5356 while (V1A.
size() < NumElts) {
5360 while (V2A.
size() < NumElts) {
5372 VT, VT, SV->getShuffleMask(),
CostKind);
5379 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5380 unsigned MaxVectorSize =
5382 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5383 if (MaxElementsInVector == 0)
5392 std::set<SmallVector<int, 4>> UniqueShuffles;
5397 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5398 if (NumFullVectors < 2)
5399 return C + ShuffleCost;
5400 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5401 unsigned NumUniqueGroups = 0;
5402 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5405 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5406 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5407 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5408 if (UniqueShuffles.insert(SubShuffle).second)
5409 NumUniqueGroups += 1;
5411 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5417 SmallVector<int, 16>
Mask;
5418 SV->getShuffleMask(Mask);
5419 return AddShuffleMaskAdjustedCost(
C, Mask);
5422 auto AllShufflesHaveSameOperands =
5423 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5424 if (InputShuffles.size() < 2)
5426 ShuffleVectorInst *FirstSV =
5433 std::next(InputShuffles.begin()), InputShuffles.end(),
5434 [&](Instruction *
I) {
5435 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5436 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5445 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5447 if (AllShufflesHaveSameOperands(InputShuffles)) {
5448 UniqueShuffles.clear();
5449 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5452 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5458 FixedVectorType *Op0SmallVT =
5460 FixedVectorType *Op1SmallVT =
5465 UniqueShuffles.clear();
5466 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5468 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5470 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5473 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5475 <<
" vs CostAfter: " << CostAfter <<
"\n");
5476 if (CostBefore < CostAfter ||
5487 if (InputShuffles.contains(SSV))
5489 return SV->getOperand(
Op);
5493 GetShuffleOperand(SVI0A, 1), V1A);
5496 GetShuffleOperand(SVI0B, 1), V1B);
5499 GetShuffleOperand(SVI1A, 1), V2A);
5502 GetShuffleOperand(SVI1B, 1), V2B);
5507 I->copyIRFlags(Op0,
true);
5512 I->copyIRFlags(Op1,
true);
5514 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5517 replaceValue(*Shuffles[S], *NSV,
false);
5520 Worklist.pushValue(NSV0A);
5521 Worklist.pushValue(NSV0B);
5522 Worklist.pushValue(NSV1A);
5523 Worklist.pushValue(NSV1B);
5533bool VectorCombine::shrinkType(Instruction &
I) {
5534 Value *ZExted, *OtherOperand;
5540 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5544 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5546 if (
I.getOpcode() == Instruction::LShr) {
5563 Instruction::ZExt, BigTy, SmallTy,
5564 TargetTransformInfo::CastContextHint::None,
CostKind);
5569 for (User *U : ZExtOperand->
users()) {
5576 ShrinkCost += ZExtCost;
5591 ShrinkCost += ZExtCost;
5598 Instruction::Trunc, SmallTy, BigTy,
5599 TargetTransformInfo::CastContextHint::None,
CostKind);
5604 if (ShrinkCost > CurrentCost)
5608 Value *Op0 = ZExted;
5611 if (
I.getOperand(0) == OtherOperand)
5618 replaceValue(
I, *NewZExtr);
5624bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5625 Value *DstVec, *SrcVec;
5626 uint64_t ExtIdx, InsIdx;
5636 if (!DstVecTy || !SrcVecTy ||
5642 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5649 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5651 if (NeedDstSrcSwap) {
5653 Mask[InsIdx] = ExtIdx % NumDstElts;
5657 std::iota(
Mask.begin(),
Mask.end(), 0);
5658 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5671 SmallVector<int> ExtToVecMask;
5672 if (!NeedExpOrNarrow) {
5677 nullptr, {DstVec, SrcVec});
5683 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5686 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5690 if (!Ext->hasOneUse())
5693 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5694 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5697 if (OldCost < NewCost)
5700 if (NeedExpOrNarrow) {
5701 if (!NeedDstSrcSwap)
5714 replaceValue(
I, *Shuf);
5723bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5724 const APInt *SplatVal0, *SplatVal1;
5734 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5735 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5744 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5745 << *
I.getType() <<
" is too high.\n");
5749 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5750 NewSplatVal <<= Width;
5751 NewSplatVal |= SplatVal0->
zext(Width * 2);
5753 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5788bool VectorCombine::foldDeinterleaveIntrinsics(Instruction &
I) {
5790 if (
DL->isBigEndian())
5793 using namespace PatternMatch;
5794 Value *DeinterleavedVal;
5805 unsigned HalfElementWidth = ElementWidth / 2;
5809 std::array<ExtractValueInst *, 2> OrigFields{};
5810 for (User *Usr :
I.users()) {
5813 if (!
E ||
E->getNumIndices() != 1)
5815 unsigned Idx = *
E->idx_begin();
5817 if (Idx >= 2 || OrigFields[Idx] || !
E->hasNUses(2))
5819 OrigFields[Idx] =
E;
5823 SmallVector<Instruction *, 2> MergeInsts;
5824 for (
auto *FieldUsr : OrigFields[0]->
users()) {
5832 auto MatchMerge = [&](void) ->
bool {
5835 return match(MergeInsts[0],
5839 match(MergeInsts[1],
5844 if (!MatchMerge()) {
5845 std::swap(MergeInsts[0], MergeInsts[1]);
5860 auto *NewFieldTy = VecTy->getWithNewBitWidth(HalfElementWidth);
5870 if (OldCost <= NewCost || !NewCost.
isValid()) {
5872 dbgs() <<
"VC: New deinterleave2 sequence cost (" << NewCost <<
")"
5873 <<
" is higher than that of the old one (" << OldCost <<
")\n");
5881 Intrinsic::vector_deinterleave2, {NewVecTy}, {NewVecCast});
5882 for (
auto [Idx, MergeInst] :
enumerate(MergeInsts)) {
5884 NewField = Builder.
CreateBitCast(NewField, MergeInst->getType());
5885 replaceValue(*MergeInst, *NewField);
5891bool VectorCombine::foldBitcastOfVPLoad(Instruction &
I) {
5892 const DataLayout &
DL =
I.getDataLayout();
5907 DL.getValueOrABITypeAlignment(
II->getPointerAlignment(), OrigVecTy);
5908 ElementCount OrigVecCnt = OrigVecTy->getElementCount();
5910 ElementCount NewVecCnt = NewVecTy->getElementCount();
5922 II->getMemoryPointerParam(),
false,
5928 {Intrinsic::vp_load, NewVecTy,
II->getMemoryPointerParam(),
false,
5932 <<
" NewCost=" << NewCost <<
"\n");
5933 if (NewCost > OldCost || !NewCost.
isValid())
5941 {
II->getMemoryPointerParam(), NewMask, NewEVL});
5944 0, AttrBuilder(
II->getContext()).addAlignmentAttr(OrigAlign));
5945 replaceValue(*Cast, *NewVP);
5950bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5952 if (!OldLoad || !OldLoad->isSimple())
5959 unsigned const OldNumElements = OldLoadTy->getNumElements();
5965 using IndexRange = std::pair<int, int>;
5966 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5967 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5968 for (llvm::Use &Use :
I.uses()) {
5970 User *Shuffle =
Use.getUser();
5975 return std::nullopt;
5982 for (
int Index : Mask) {
5983 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5984 OutputRange.first = std::min(Index, OutputRange.first);
5985 OutputRange.second = std::max(Index, OutputRange.second);
5990 if (OutputRange.second < OutputRange.first)
5991 return std::nullopt;
5997 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5998 unsigned const NewNumElements = Indices->second + 1u;
6002 if (NewNumElements < OldNumElements) {
6007 Type *ElemTy = OldLoadTy->getElementType();
6009 Value *PtrOp = OldLoad->getPointerOperand();
6012 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
6013 OldLoad->getPointerAddressSpace(),
CostKind);
6016 OldLoad->getPointerAddressSpace(),
CostKind);
6018 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
6020 unsigned const MaxIndex = NewNumElements * 2u;
6022 for (llvm::Use &Use :
I.uses()) {
6029 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
6035 for (
int Index : OldMask) {
6036 if (Index >=
static_cast<int>(MaxIndex))
6050 dbgs() <<
"Found a load used only by shufflevector instructions: "
6051 <<
I <<
"\n OldCost: " << OldCost
6052 <<
" vs NewCost: " << NewCost <<
"\n");
6054 if (OldCost < NewCost || !NewCost.
isValid())
6060 NewLoad->copyMetadata(
I);
6063 for (UseEntry &Use : NewUses) {
6064 ShuffleVectorInst *Shuffle =
Use.first;
6065 std::vector<int> &NewMask =
Use.second;
6072 replaceValue(*Shuffle, *NewShuffle,
false);
6085bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
6087 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
6091 ArrayRef<int> Mask0;
6092 ArrayRef<int> Mask1;
6105 auto const InputNumElements = InputVT->getNumElements();
6107 if (InputNumElements >= ResultVT->getNumElements())
6112 SmallVector<int, 16> NewMask;
6115 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
6116 if (
M0 >= 0 &&
M1 >= 0)
6118 else if (
M0 == -1 &&
M1 == -1)
6131 int MaskOffset = NewMask[0
u];
6132 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
6135 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
6149 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
6152 if (NewCost > OldCost)
6164 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
6166 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
6172 replaceValue(*Phi, *NewShuf1);
6178bool VectorCombine::run() {
6192 auto Opcode =
I.getOpcode();
6200 if (IsFixedVectorType) {
6202 case Instruction::InsertElement:
6203 if (vectorizeLoadInsert(
I))
6206 case Instruction::ShuffleVector:
6207 if (widenSubvectorLoad(
I))
6218 if (scalarizeOpOrCmp(
I))
6220 if (scalarizeLoad(
I))
6222 if (scalarizeExtExtract(
I))
6224 if (scalarizeVPIntrinsic(
I))
6226 if (foldInterleaveIntrinsics(
I))
6228 if (foldBitcastOfVPLoad(
I))
6232 if (foldDeinterleaveIntrinsics(
I))
6235 if (Opcode == Instruction::Store)
6236 if (foldSingleElementStore(
I))
6240 if (TryEarlyFoldsOnly)
6247 if (IsFixedVectorType) {
6249 case Instruction::InsertElement:
6250 if (foldInsExtFNeg(
I))
6252 if (foldInsExtBinop(
I))
6254 if (foldInsExtVectorToShuffle(
I))
6257 case Instruction::ShuffleVector:
6258 if (foldPermuteOfBinops(
I))
6260 if (foldShuffleOfBinops(
I))
6262 if (foldShuffleOfSelects(
I))
6264 if (foldShuffleOfCastops(
I))
6266 if (foldShuffleOfShuffles(
I))
6268 if (foldPermuteOfIntrinsic(
I))
6270 if (foldShufflesOfLengthChangingShuffles(
I))
6272 if (foldShuffleOfIntrinsics(
I))
6274 if (foldSelectShuffle(
I))
6276 if (foldShuffleToIdentity(
I))
6279 case Instruction::Load:
6280 if (shrinkLoadForShuffles(
I))
6283 case Instruction::BitCast:
6284 if (foldBitcastShuffle(
I))
6286 if (foldSelectsFromBitcast(
I))
6289 case Instruction::And:
6290 case Instruction::Or:
6291 case Instruction::Xor:
6292 if (foldBitOpOfCastops(
I))
6294 if (foldBitOpOfCastConstant(
I))
6297 case Instruction::PHI:
6298 if (shrinkPhiOfShuffles(
I))
6308 case Instruction::Call:
6309 if (foldShuffleFromReductions(
I))
6311 if (foldCastFromReductions(
I))
6314 case Instruction::ExtractElement:
6315 if (foldShuffleChainsToReduce(
I))
6318 case Instruction::ICmp:
6319 if (foldSignBitReductionCmp(
I))
6321 if (foldICmpEqZeroVectorReduce(
I))
6323 if (foldEquivalentReductionCmp(
I))
6325 if (foldReduceAddCmpZero(
I))
6328 case Instruction::FCmp:
6329 if (foldExtractExtract(
I))
6332 case Instruction::Or:
6333 if (foldConcatOfBoolMasks(
I))
6338 if (foldExtractExtract(
I))
6340 if (foldExtractedCmps(
I))
6342 if (foldBinopOfReductions(
I))
6351 bool MadeChange =
false;
6352 for (BasicBlock &BB :
F) {
6364 if (!
I->isDebugOrPseudoInst())
6365 MadeChange |= FoldInst(*
I);
6372 while (!Worklist.isEmpty()) {
6382 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, Use *From, FixedVectorType *Ty, const DenseSet< std::pair< Value *, Use * > > &IdentityLeafs, const DenseSet< std::pair< Value *, Use * > > &SplatLeafs, const DenseSet< std::pair< Value *, Use * > > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
std::pair< Value *, int > InstLane
static bool isKnownNonPositive(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Used by foldReduceAddCmpZero to check if we can prove that a value is non-positive.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, const SimplifyQuery &SQ)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static InstLane lookThroughShuffles(Value *V, int Lane)
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void addParamAttrs(unsigned ArgNo, const AttrBuilder &B)
Adds attributes to the indicated argument.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isIdempotent() const
Return true if the instruction is idempotent:
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI bool hasAllowReassoc() const LLVM_READONLY
Determine whether the allow-reassociation flag is set.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
auto m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd >::Ty m_Deinterleave2(const Opnd &Op)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const