45#define DEBUG_TYPE "vector-combine"
51STATISTIC(NumVecLoad,
"Number of vector loads formed");
52STATISTIC(NumVecCmp,
"Number of vector compares formed");
53STATISTIC(NumVecBO,
"Number of vector binops formed");
54STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
55STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
56STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
57STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
58STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
62 cl::desc(
"Disable all vector combine transforms"));
66 cl::desc(
"Disable binop extract to shuffle transforms"));
70 cl::desc(
"Max number of instructions to scan for vector combining."));
72static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
80 bool TryEarlyFoldsOnly)
83 SQ(*
DL, nullptr, &DT, &AC),
84 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
91 const TargetTransformInfo &TTI;
92 const DominatorTree &DT;
96 const SimplifyQuery SQ;
100 bool TryEarlyFoldsOnly;
102 InstructionWorklist Worklist;
111 bool vectorizeLoadInsert(Instruction &
I);
112 bool widenSubvectorLoad(Instruction &
I);
113 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
114 ExtractElementInst *Ext1,
115 unsigned PreferredExtractIndex)
const;
116 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
117 const Instruction &
I,
118 ExtractElementInst *&ConvertToShuffle,
119 unsigned PreferredExtractIndex);
122 bool foldExtractExtract(Instruction &
I);
123 bool foldInsExtFNeg(Instruction &
I);
124 bool foldInsExtBinop(Instruction &
I);
125 bool foldInsExtVectorToShuffle(Instruction &
I);
126 bool foldBitOpOfCastops(Instruction &
I);
127 bool foldBitOpOfCastConstant(Instruction &
I);
128 bool foldBitcastShuffle(Instruction &
I);
129 bool scalarizeOpOrCmp(Instruction &
I);
130 bool scalarizeVPIntrinsic(Instruction &
I);
131 bool foldExtractedCmps(Instruction &
I);
132 bool foldSelectsFromBitcast(Instruction &
I);
133 bool foldBinopOfReductions(Instruction &
I);
134 bool foldSingleElementStore(Instruction &
I);
135 bool scalarizeLoad(Instruction &
I);
136 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
138 bool scalarizeExtExtract(Instruction &
I);
139 bool foldConcatOfBoolMasks(Instruction &
I);
140 bool foldPermuteOfBinops(Instruction &
I);
141 bool foldShuffleOfBinops(Instruction &
I);
142 bool foldShuffleOfSelects(Instruction &
I);
143 bool foldShuffleOfCastops(Instruction &
I);
144 bool foldShuffleOfShuffles(Instruction &
I);
145 bool foldPermuteOfIntrinsic(Instruction &
I);
146 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
147 bool foldShuffleOfIntrinsics(Instruction &
I);
148 bool foldShuffleToIdentity(Instruction &
I);
149 bool foldShuffleFromReductions(Instruction &
I);
150 bool foldShuffleChainsToReduce(Instruction &
I);
151 bool foldCastFromReductions(Instruction &
I);
152 bool foldSignBitReductionCmp(Instruction &
I);
153 bool foldICmpEqZeroVectorReduce(Instruction &
I);
154 bool foldEquivalentReductionCmp(Instruction &
I);
155 bool foldReduceAddCmpZero(Instruction &
I);
156 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
157 bool foldInterleaveIntrinsics(Instruction &
I);
158 bool foldDeinterleaveIntrinsics(Instruction &
I);
159 bool foldBitcastOfVPLoad(Instruction &
I);
160 bool shrinkType(Instruction &
I);
161 bool shrinkLoadForShuffles(Instruction &
I);
162 bool shrinkPhiOfShuffles(Instruction &
I);
164 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
170 Worklist.pushUsersToWorkList(*NewI);
171 Worklist.pushValue(NewI);
188 SmallPtrSet<Value *, 4> Visited;
193 OpI,
nullptr,
nullptr, [&](
Value *V) {
198 NextInst = NextInst->getNextNode();
203 Worklist.pushUsersToWorkList(*OpI);
204 Worklist.pushValue(OpI);
224 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
225 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
231 Type *ScalarTy = Load->getType()->getScalarType();
233 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
234 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
241bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
267 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
270 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
271 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
272 unsigned OffsetEltIndex = 0;
280 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
281 APInt
Offset(OffsetBitWidth, 0);
291 uint64_t ScalarSizeInBytes = ScalarSize / 8;
292 if (
Offset.urem(ScalarSizeInBytes) != 0)
296 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
297 if (OffsetEltIndex >= MinVecNumElts)
314 unsigned AS =
Load->getPointerAddressSpace();
333 unsigned OutputNumElts = Ty->getNumElements();
335 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
336 Mask[0] = OffsetEltIndex;
343 if (OldCost < NewCost || !NewCost.
isValid())
354 replaceValue(
I, *VecLd);
362bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
365 if (!Shuf->isIdentityWithPadding())
371 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
372 return M >= (int)(NumOpElts);
383 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
392 unsigned AS =
Load->getPointerAddressSpace();
407 if (OldCost < NewCost || !NewCost.
isValid())
414 replaceValue(
I, *VecLd);
421ExtractElementInst *VectorCombine::getShuffleExtract(
422 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
426 assert(Index0C && Index1C &&
"Expected constant extract indexes");
428 unsigned Index0 = Index0C->getZExtValue();
429 unsigned Index1 = Index1C->getZExtValue();
432 if (Index0 == Index1)
456 if (PreferredExtractIndex == Index0)
458 if (PreferredExtractIndex == Index1)
462 return Index0 > Index1 ? Ext0 : Ext1;
470bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
471 ExtractElementInst *Ext1,
472 const Instruction &
I,
473 ExtractElementInst *&ConvertToShuffle,
474 unsigned PreferredExtractIndex) {
477 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
479 unsigned Opcode =
I.getOpcode();
492 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
493 "Expected a compare");
503 unsigned Ext0Index = Ext0IndexC->getZExtValue();
504 unsigned Ext1Index = Ext1IndexC->getZExtValue();
518 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
519 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
520 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
525 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
530 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
532 OldCost = CheapExtractCost + ScalarOpCost;
533 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
537 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
538 NewCost = VectorOpCost + CheapExtractCost +
543 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
544 if (ConvertToShuffle) {
556 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
558 ShuffleMask[BestInsIndex] = BestExtIndex;
560 VecTy, VecTy, ShuffleMask,
CostKind, 0,
561 nullptr, {ConvertToShuffle});
564 VecTy, VecTy, {},
CostKind, 0,
nullptr,
572 return OldCost < NewCost;
584 ShufMask[NewIndex] = OldIndex;
585 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
637 V1,
"foldExtExtBinop");
642 VecBOInst->copyIRFlags(&
I);
648bool VectorCombine::foldExtractExtract(Instruction &
I) {
669 unsigned NumElts = FixedVecTy->getNumElements();
670 if (C0 >= NumElts || C1 >= NumElts)
686 ExtractElementInst *ExtractToChange;
687 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
693 if (ExtractToChange) {
694 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
699 if (ExtractToChange == Ext0)
708 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
709 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
712 replaceValue(
I, *NewExt);
718bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
721 uint64_t ExtIdx, InsIdx;
736 auto *DstVecScalarTy = DstVecTy->getScalarType();
738 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
743 unsigned NumDstElts = DstVecTy->getNumElements();
744 unsigned NumSrcElts = SrcVecTy->getNumElements();
745 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
751 SmallVector<int>
Mask(NumDstElts);
752 std::iota(
Mask.begin(),
Mask.end(), 0);
753 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
769 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
772 SmallVector<int> SrcMask;
775 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
777 DstVecTy, SrcVecTy, SrcMask,
CostKind);
781 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
783 if (NewCost > OldCost)
786 Value *NewShuf, *LenChgShuf =
nullptr;
800 replaceValue(
I, *NewShuf);
806bool VectorCombine::foldInsExtBinop(Instruction &
I) {
807 BinaryOperator *VecBinOp, *SclBinOp;
839 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
841 if (NewCost > OldCost)
852 NewInst->copyIRFlags(VecBinOp);
853 NewInst->andIRFlags(SclBinOp);
858 replaceValue(
I, *NewBO);
864bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
867 if (!BinOp || !BinOp->isBitwiseLogicOp())
873 if (!LHSCast || !RHSCast) {
874 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
880 if (CastOpcode != RHSCast->getOpcode())
884 switch (CastOpcode) {
885 case Instruction::BitCast:
886 case Instruction::Trunc:
887 case Instruction::SExt:
888 case Instruction::ZExt:
894 Value *LHSSrc = LHSCast->getOperand(0);
895 Value *RHSSrc = RHSCast->getOperand(0);
901 auto *SrcTy = LHSSrc->
getType();
902 auto *DstTy =
I.getType();
905 if (CastOpcode != Instruction::BitCast &&
910 if (!SrcTy->getScalarType()->isIntegerTy() ||
911 !DstTy->getScalarType()->isIntegerTy())
926 LHSCastCost + RHSCastCost;
937 if (!LHSCast->hasOneUse())
938 NewCost += LHSCastCost;
939 if (!RHSCast->hasOneUse())
940 NewCost += RHSCastCost;
943 <<
" NewCost=" << NewCost <<
"\n");
945 if (NewCost > OldCost)
950 BinOp->getName() +
".inner");
952 NewBinOp->copyIRFlags(BinOp);
966 replaceValue(
I, *Result);
975bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
991 switch (CastOpcode) {
992 case Instruction::BitCast:
993 case Instruction::ZExt:
994 case Instruction::SExt:
995 case Instruction::Trunc:
1001 Value *LHSSrc = LHSCast->getOperand(0);
1003 auto *SrcTy = LHSSrc->
getType();
1004 auto *DstTy =
I.getType();
1007 if (CastOpcode != Instruction::BitCast &&
1012 if (!SrcTy->getScalarType()->isIntegerTy() ||
1013 !DstTy->getScalarType()->isIntegerTy())
1017 PreservedCastFlags RHSFlags;
1042 if (!LHSCast->hasOneUse())
1043 NewCost += LHSCastCost;
1045 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1046 <<
" NewCost=" << NewCost <<
"\n");
1048 if (NewCost > OldCost)
1053 LHSSrc, InvC,
I.getName() +
".inner");
1055 NewBinOp->copyIRFlags(&
I);
1075 replaceValue(
I, *Result);
1082bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1096 if (!DestTy || !SrcTy)
1099 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1100 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1101 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1111 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1112 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1116 SmallVector<int, 16> NewMask;
1117 if (DestEltSize <= SrcEltSize) {
1120 if (SrcEltSize % DestEltSize != 0)
1122 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1127 if (DestEltSize % SrcEltSize != 0)
1129 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1136 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1137 auto *NewShuffleTy =
1139 auto *OldShuffleTy =
1141 unsigned NumOps = IsUnary ? 1 : 2;
1151 TargetTransformInfo::CastContextHint::None,
1156 TargetTransformInfo::CastContextHint::None,
1159 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1160 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1162 if (NewCost > OldCost || !NewCost.
isValid())
1170 replaceValue(
I, *Shuf);
1177bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1191 if (!ScalarOp0 || !ScalarOp1)
1199 auto IsAllTrueMask = [](
Value *MaskVal) {
1202 return ConstValue->isAllOnesValue();
1216 SmallVector<int>
Mask;
1218 Mask.resize(FVTy->getNumElements(), 0);
1227 Args.push_back(
V->getType());
1228 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1233 std::optional<unsigned> FunctionalOpcode =
1235 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1236 if (!FunctionalOpcode) {
1245 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1255 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1257 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1260 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1263 if (OldCost < NewCost || !NewCost.
isValid())
1274 bool SafeToSpeculate;
1280 *FunctionalOpcode, &VPI,
nullptr, SQ.
AC, SQ.
DT);
1281 if (!SafeToSpeculate &&
1288 {ScalarOp0, ScalarOp1})
1290 ScalarOp0, ScalarOp1);
1299bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1304 if (!UO && !BO && !CI && !
II)
1312 if (Arg->getType() !=
II->getType() &&
1322 for (User *U :
I.users())
1329 std::optional<uint64_t>
Index;
1331 auto Ops =
II ?
II->args() :
I.operands();
1335 uint64_t InsIdx = 0;
1340 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1346 else if (InsIdx != *Index)
1363 if (!
Index.has_value())
1367 Type *ScalarTy = VecTy->getScalarType();
1368 assert(VecTy->isVectorTy() &&
1371 "Unexpected types for insert element into binop or cmp");
1373 unsigned Opcode =
I.getOpcode();
1381 }
else if (UO || BO) {
1385 IntrinsicCostAttributes ScalarICA(
1386 II->getIntrinsicID(), ScalarTy,
1389 IntrinsicCostAttributes VectorICA(
1390 II->getIntrinsicID(), VecTy,
1397 Value *NewVecC =
nullptr;
1399 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1402 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1404 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1418 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1420 II->getIntrinsicID(), Idx, &
TTI)))
1423 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1424 OldCost += InsertCost;
1425 NewCost += !
Op->hasOneUse() * InsertCost;
1429 if (OldCost < NewCost || !NewCost.
isValid())
1439 ++NumScalarIntrinsic;
1449 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1455 Scalar->setName(
I.getName() +
".scalar");
1460 ScalarInst->copyIRFlags(&
I);
1463 replaceValue(
I, *Insert);
1470bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1475 if (!BI || !
I.getType()->isIntegerTy(1))
1480 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1483 CmpPredicate
P0,
P1;
1495 uint64_t Index0, Index1;
1502 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1505 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1506 "Unknown ExtractElementInst");
1511 unsigned CmpOpcode =
1526 Ext0Cost + Ext1Cost + CmpCost * 2 +
1532 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1533 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1538 ShufMask[CheapIndex] = ExpensiveIndex;
1543 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1544 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1549 if (OldCost < NewCost || !NewCost.
isValid())
1559 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1560 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1563 replaceValue(
I, *NewExt);
1590bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1597 if (!SrcVecTy || !DstVecTy)
1607 if (SrcEltBits != 32 && SrcEltBits != 64)
1610 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1627 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1630 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1633 if (!BC->hasNUsesOrMore(MinSelects))
1638 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1640 for (User *U : BC->users()) {
1645 for (User *ExtUser : Ext->users()) {
1649 Cond->getType()->isIntegerTy(1))
1654 if (CondToSelects.
empty())
1657 bool MadeChange =
false;
1658 Value *SrcVec = BC->getOperand(0);
1661 for (
auto [
Cond, Selects] : CondToSelects) {
1663 if (Selects.size() < MinSelects) {
1664 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1665 <<
"profitable (VecCost=" << VecSelCost
1666 <<
", ScalarCost=" << ScalarSelCost
1667 <<
", NumSelects=" << Selects.size() <<
")\n");
1672 auto InsertPt = std::next(BC->getIterator());
1676 InsertPt = std::next(CondInst->getIterator());
1684 for (SelectInst *Sel : Selects) {
1686 Value *Idx = Ext->getIndexOperand();
1690 replaceValue(*Sel, *NewExt);
1695 <<
" selects into vector select\n");
1709 unsigned ReductionOpc =
1715 CostBeforeReduction =
1716 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1718 CostAfterReduction =
1719 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1723 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1729 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1736 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1739 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1741 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1744 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1745 CostAfterReduction =
TTI.getMulAccReductionCost(
1746 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1749 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1753bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1756 if (BinOpOpc == Instruction::Sub)
1757 ReductionIID = Intrinsic::vector_reduce_add;
1761 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1766 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1767 return II->getArgOperand(0);
1771 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1774 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1783 unsigned ReductionOpc =
1796 CostOfRedOperand0 + CostOfRedOperand1 +
1799 if (NewCost >= OldCost || !NewCost.
isValid())
1803 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1806 if (BinOpOpc == Instruction::Or)
1807 VectorBO = Builder.
CreateOr(V0, V1,
"",
1813 replaceValue(
I, *Rdx);
1821 unsigned NumScanned = 0;
1822 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1831class ScalarizationResult {
1832 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1837 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1838 : Status(Status), ToFreeze(ToFreeze) {}
1841 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1842 ~ScalarizationResult() {
1843 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1846 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1847 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1848 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1849 return {StatusTy::SafeWithFreeze, ToFreeze};
1853 bool isSafe()
const {
return Status == StatusTy::Safe; }
1855 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1858 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1863 Status = StatusTy::Unsafe;
1867 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1868 assert(isSafeWithFreeze() &&
1869 "should only be used when freezing is required");
1871 "UserI must be a user of ToFreeze");
1872 IRBuilder<>::InsertPointGuard Guard(Builder);
1877 if (
U.get() == ToFreeze)
1892 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1896 if (
C->getValue().ult(NumElements))
1897 return ScalarizationResult::safe();
1898 return ScalarizationResult::unsafe();
1903 return ScalarizationResult::unsafe();
1905 APInt Zero(IntWidth, 0);
1906 APInt MaxElts(IntWidth, NumElements);
1913 return ScalarizationResult::safe();
1914 return ScalarizationResult::unsafe();
1927 if (ValidIndices.
contains(IdxRange))
1928 return ScalarizationResult::safeWithFreeze(IdxBase);
1929 return ScalarizationResult::unsafe();
1941 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1953bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1965 if (!
match(
SI->getValueOperand(),
1972 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1975 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1976 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1977 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1980 auto ScalarizableIdx =
1982 if (ScalarizableIdx.isUnsafe() ||
1989 Worklist.
push(Load);
1991 if (ScalarizableIdx.isSafeWithFreeze())
1994 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1995 {ConstantInt::get(Idx->getType(), 0), Idx});
1999 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
2002 replaceValue(
I, *NSI);
2012bool VectorCombine::scalarizeLoad(Instruction &
I) {
2022 if (!LI->isSimple() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2025 bool AllExtracts =
true;
2026 bool AllBitcasts =
true;
2028 unsigned NumInstChecked = 0;
2033 for (User *U : LI->users()) {
2035 if (!UI || UI->getParent() != LI->getParent())
2040 if (UI->use_empty())
2044 AllExtracts =
false;
2046 AllBitcasts =
false;
2050 for (Instruction &
I :
2051 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2058 LastCheckedInst = UI;
2063 return scalarizeLoadExtract(LI, VecTy, Ptr);
2065 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2070bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2075 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2078 for (
auto &Pair : NeedFreeze)
2079 Pair.second.discard();
2087 for (User *U : LI->
users()) {
2092 if (ScalarIdx.isUnsafe())
2094 if (ScalarIdx.isSafeWithFreeze()) {
2095 NeedFreeze.try_emplace(UI, ScalarIdx);
2096 ScalarIdx.discard();
2102 Index ?
Index->getZExtValue() : -1);
2110 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2111 <<
"\n LoadExtractCost: " << OriginalCost
2112 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2114 if (ScalarizedCost >= OriginalCost)
2121 Type *ElemType = VecTy->getElementType();
2124 for (User *U : LI->
users()) {
2126 Value *Idx = EI->getIndexOperand();
2129 auto It = NeedFreeze.find(EI);
2130 if (It != NeedFreeze.end())
2137 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2139 Align ScalarOpAlignment =
2141 NewLoad->setAlignment(ScalarOpAlignment);
2144 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2149 replaceValue(*EI, *NewLoad,
false);
2152 FailureGuard.release();
2157bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2163 Type *TargetScalarType =
nullptr;
2164 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2166 for (User *U : LI->
users()) {
2169 Type *DestTy = BC->getDestTy();
2173 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2174 if (DestBitWidth != VecBitWidth)
2178 if (!TargetScalarType)
2179 TargetScalarType = DestTy;
2180 else if (TargetScalarType != DestTy)
2188 if (!TargetScalarType)
2196 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2197 <<
"\n OriginalCost: " << OriginalCost
2198 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2200 if (ScalarizedCost >= OriginalCost)
2211 ScalarLoad->copyMetadata(*LI);
2214 for (User *U : LI->
users()) {
2216 replaceValue(*BC, *ScalarLoad,
false);
2222bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2237 Type *ScalarDstTy = DstTy->getElementType();
2238 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2244 unsigned ExtCnt = 0;
2245 bool ExtLane0 =
false;
2246 for (User *U : Ext->users()) {
2260 Instruction::And, ScalarDstTy,
CostKind,
2263 (ExtCnt - ExtLane0) *
2265 Instruction::LShr, ScalarDstTy,
CostKind,
2268 if (ScalarCost > VectorCost)
2271 Value *ScalarV = Ext->getOperand(0);
2278 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2279 bool AllExtractsTriggerUB =
true;
2280 ExtractElementInst *LastExtract =
nullptr;
2282 for (User *U : Ext->users()) {
2285 AllExtractsTriggerUB =
false;
2289 if (!LastExtract || LastExtract->
comesBefore(Extract))
2290 LastExtract = Extract;
2292 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2293 !AllExtractsTriggerUB ||
2301 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2302 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2305 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2306 for (User *U : Ext->users()) {
2312 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2313 : (Idx * SrcEltSizeInBits);
2316 U->replaceAllUsesWith(
And);
2324bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2325 Type *Ty =
I.getType();
2330 if (
DL->isBigEndian())
2341 uint64_t ShAmtX = 0;
2349 uint64_t ShAmtY = 0;
2357 if (ShAmtX > ShAmtY) {
2365 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2366 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2371 MaskTy->getNumElements() != ShAmtDiff ||
2372 MaskTy->getNumElements() > (
BitWidth / 2))
2377 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2378 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2381 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2398 if (Ty != ConcatIntTy)
2404 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2405 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2408 if (NewCost > OldCost)
2418 if (Ty != ConcatIntTy) {
2428 replaceValue(
I, *Result);
2434bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2435 BinaryOperator *BinOp;
2436 ArrayRef<int> OuterMask;
2444 Value *Op00, *Op01, *Op10, *Op11;
2445 ArrayRef<int> Mask0, Mask1;
2450 if (!Match0 && !Match1)
2463 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2466 unsigned NumSrcElts = BinOpTy->getNumElements();
2471 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2475 SmallVector<int> NewMask0, NewMask1;
2476 for (
int M : OuterMask) {
2477 if (M < 0 || M >= (
int)NumSrcElts) {
2481 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2482 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2486 unsigned NumOpElts = Op0Ty->getNumElements();
2487 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2488 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2490 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2491 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2500 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2501 0,
nullptr, {BinOp}, &
I);
2503 NewCost += BinOpCost;
2509 OldCost += Shuf0Cost;
2511 NewCost += Shuf0Cost;
2517 OldCost += Shuf1Cost;
2519 NewCost += Shuf1Cost;
2527 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2531 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2533 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2534 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2538 if (NewCost > OldCost)
2549 NewInst->copyIRFlags(BinOp);
2553 replaceValue(
I, *NewBO);
2559bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2560 ArrayRef<int> OldMask;
2567 if (
LHS->getOpcode() !=
RHS->getOpcode())
2571 bool IsCommutative =
false;
2580 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2591 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2594 bool SameBinOp =
LHS ==
RHS;
2595 unsigned NumSrcElts = BinOpTy->getNumElements();
2598 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2601 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2602 if (M >= (
int)NumSrcElts)
2606 SmallVector<int> NewMask0(OldMask);
2615 SmallVector<int> NewMask1(OldMask);
2634 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2644 ArrayRef<int> InnerMask;
2646 m_Mask(InnerMask)))) &&
2649 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2661 bool ReducedInstCount =
false;
2662 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2663 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2664 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2665 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2666 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2671 auto *ShuffleCmpTy =
2674 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2675 if (!SingleSrcBinOp)
2685 PredLHS,
CostKind, Op0Info, Op1Info);
2695 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2702 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2711 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2715 NewInst->copyIRFlags(
LHS);
2716 NewInst->andIRFlags(
RHS);
2721 replaceValue(
I, *NewBO);
2728bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2730 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2741 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2747 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2748 ((SI0FOp !=
nullptr) &&
2749 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2755 auto SelOp = Instruction::Select;
2763 CostSel1 + CostSel2 +
2765 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2769 Mask,
CostKind, 0,
nullptr, {C1, C2});
2779 if (!Sel1->hasOneUse())
2780 NewCost += CostSel1;
2781 if (!Sel2->hasOneUse())
2782 NewCost += CostSel2;
2785 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2787 if (NewCost > OldCost)
2796 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2797 SI0FOp->getFastMathFlags());
2799 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2804 replaceValue(
I, *NewSel);
2810bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2812 ArrayRef<int> OldMask;
2821 if (!C0 || (IsBinaryShuffle && !C1))
2828 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2831 if (IsBinaryShuffle) {
2832 if (C0->getSrcTy() != C1->getSrcTy())
2835 if (Opcode != C1->getOpcode()) {
2837 Opcode = Instruction::SExt;
2846 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2849 unsigned NumSrcElts = CastSrcTy->getNumElements();
2850 unsigned NumDstElts = CastDstTy->getNumElements();
2851 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2852 "Only bitcasts expected to alter src/dst element counts");
2856 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2857 (NumDstElts % NumSrcElts) != 0)
2860 SmallVector<int, 16> NewMask;
2861 if (NumSrcElts >= NumDstElts) {
2864 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2865 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2870 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2871 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2876 auto *NewShuffleDstTy =
2885 if (IsBinaryShuffle)
2900 if (IsBinaryShuffle) {
2910 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2912 if (NewCost > OldCost)
2916 if (IsBinaryShuffle)
2926 NewInst->copyIRFlags(C0);
2927 if (IsBinaryShuffle)
2928 NewInst->andIRFlags(C1);
2932 replaceValue(
I, *Cast);
2942bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2943 ArrayRef<int> OuterMask;
2944 Value *OuterV0, *OuterV1;
2949 ArrayRef<int> InnerMask0, InnerMask1;
2950 Value *X0, *X1, *Y0, *Y1;
2955 if (!Match0 && !Match1)
2960 SmallVector<int, 16> PoisonMask1;
2965 InnerMask1 = PoisonMask1;
2969 X0 = Match0 ? X0 : OuterV0;
2970 Y0 = Match0 ? Y0 : OuterV0;
2971 X1 = Match1 ? X1 : OuterV1;
2972 Y1 = Match1 ? Y1 : OuterV1;
2976 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2980 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2981 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2986 SmallVector<int, 16> NewMask(OuterMask);
2987 Value *NewX =
nullptr, *NewY =
nullptr;
2988 for (
int &M : NewMask) {
2989 Value *Src =
nullptr;
2990 if (0 <= M && M < (
int)NumImmElts) {
2994 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2995 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2997 }
else if (M >= (
int)NumImmElts) {
3002 Src =
M >= (int)NumSrcElts ? Y1 : X1;
3003 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3007 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3016 if (!NewX || NewX == Src) {
3020 if (!NewY || NewY == Src) {
3036 replaceValue(
I, *NewX);
3053 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3059 nullptr, {NewX, NewY});
3061 NewCost += InnerCost0;
3063 NewCost += InnerCost1;
3066 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3068 if (NewCost > OldCost)
3072 replaceValue(
I, *Shuf);
3088bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3093 unsigned ChainLength = 0;
3094 SmallVector<int>
Mask;
3095 SmallVector<int> YMask;
3105 ArrayRef<int> OuterMask;
3106 Value *OuterV0, *OuterV1;
3107 if (ChainLength != 0 && !Trunk->
hasOneUse())
3110 m_Mask(OuterMask))))
3112 if (OuterV0->
getType() != TrunkType) {
3118 ArrayRef<int> InnerMask0, InnerMask1;
3119 Value *A0, *A1, *B0, *B1;
3124 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3125 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3126 if (Match0Leaf == Match1Leaf) {
3132 SmallVector<int> CommutedOuterMask;
3139 for (
int &M : CommutedOuterMask) {
3142 if (M < (
int)NumTrunkElts)
3147 OuterMask = CommutedOuterMask;
3166 int NumLeafElts = YType->getNumElements();
3167 SmallVector<int> LocalYMask(InnerMask1);
3168 for (
int &M : LocalYMask) {
3169 if (M >= NumLeafElts)
3179 Mask.assign(OuterMask);
3180 YMask.
assign(LocalYMask);
3181 OldCost = NewCost = LocalOldCost;
3188 SmallVector<int> NewYMask(YMask);
3190 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3191 if (LeafM == -1 || CombinedM == LeafM)
3193 if (CombinedM == -1) {
3203 SmallVector<int> NewMask;
3204 NewMask.
reserve(NumTrunkElts);
3205 for (
int M : Mask) {
3206 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3221 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3225 if (ChainLength == 1) {
3226 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3229 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3230 <<
" old cost: " << (OldCost + LocalOldCost)
3231 <<
" new cost: " << LocalNewCost <<
'\n';
3236 OldCost += LocalOldCost;
3237 NewCost = LocalNewCost;
3241 if (ChainLength <= 1)
3245 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3248 for (
int &M : Mask) {
3249 if (M >=
static_cast<int>(NumTrunkElts))
3250 M = YMask[
M - NumTrunkElts];
3254 replaceValue(
I, *Root);
3261 replaceValue(
I, *Root);
3267bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3269 ArrayRef<int> OldMask;
3279 if (IID != II1->getIntrinsicID())
3288 if (!ShuffleDstTy || !II0Ty)
3294 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3296 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3302 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3306 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3307 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3309 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3313 ShuffleDstTy->getNumElements());
3315 std::pair<Value *, Value *> OperandPair =
3316 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3317 if (!SeenOperandPairs.
insert(OperandPair).second) {
3323 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3326 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3329 if (!II0->hasOneUse())
3331 if (II1 != II0 && !II1->hasOneUse())
3335 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3338 if (NewCost > OldCost)
3342 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3343 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3347 std::pair<Value *, Value *> OperandPair =
3348 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3349 auto It = ShuffleCache.
find(OperandPair);
3350 if (It != ShuffleCache.
end()) {
3356 II1->getArgOperand(
I), OldMask);
3357 ShuffleCache[OperandPair] = Shuf;
3365 NewInst->copyIRFlags(II0);
3366 NewInst->andIRFlags(II1);
3369 replaceValue(
I, *NewIntrinsic);
3375bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3387 if (!ShuffleDstTy || !IntrinsicSrcTy)
3391 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3392 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3405 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3409 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3411 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3415 ShuffleDstTy->getNumElements());
3418 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3419 {II0->getArgOperand(
I)});
3422 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3427 if (!II0->hasOneUse())
3430 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3431 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3433 if (NewCost > OldCost)
3438 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3453 replaceValue(
I, *NewIntrinsic);
3463 int M = SV->getMaskValue(Lane);
3466 if (
static_cast<unsigned>(M) < NumElts) {
3467 V = SV->getOperand(0);
3470 V = SV->getOperand(1);
3481 auto [U, Lane] = IL;
3494 unsigned NumElts = Ty->getNumElements();
3495 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3501 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3507 unsigned NumSlices = Item.
size() / NumElts;
3512 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3513 Value *SliceV = Item[Slice * NumElts].first;
3514 if (!SliceV || SliceV->
getType() != Ty)
3516 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3517 auto [V, Lane] = Item[Slice * NumElts + Elt];
3518 if (Lane !=
static_cast<int>(Elt) || SliceV != V)
3527 const DenseSet<std::pair<Value *, Use *>> &IdentityLeafs,
3528 const DenseSet<std::pair<Value *, Use *>> &SplatLeafs,
3529 const DenseSet<std::pair<Value *, Use *>> &ConcatLeafs,
3531 auto [FrontV, FrontLane] = Item.
front();
3533 if (IdentityLeafs.contains(std::make_pair(FrontV, From))) {
3536 if (SplatLeafs.contains(std::make_pair(FrontV, From))) {
3538 return Builder.CreateShuffleVector(FrontV, Mask);
3540 if (ConcatLeafs.contains(std::make_pair(FrontV, From))) {
3544 for (
unsigned S = 0; S < Values.
size(); ++S)
3545 Values[S] = Item[S * NumElts].first;
3547 while (Values.
size() > 1) {
3550 std::iota(Mask.begin(), Mask.end(), 0);
3552 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3554 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3562 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3564 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3567 Ops[Idx] =
II->getOperand(Idx);
3571 &
I->getOperandUse(Idx), Ty, IdentityLeafs,
3572 SplatLeafs, ConcatLeafs, Builder,
TTI);
3576 for (
const auto &Lane : Item)
3589 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3599 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3604 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3618bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3620 if (!Ty ||
I.use_empty())
3624 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3628 Worklist.
push_back(std::make_pair(Start, &*
I.use_begin()));
3629 DenseSet<std::pair<Value *, Use *>> IdentityLeafs, SplatLeafs, ConcatLeafs;
3630 unsigned NumVisited = 0;
3632 while (!Worklist.
empty()) {
3637 auto Item = ItemFrom.first;
3638 auto From = ItemFrom.second;
3639 auto [FrontV, FrontLane] = Item.front();
3647 return X->getType() ==
Y->getType() &&
3652 if (FrontLane == 0 &&
3654 Ty->getNumElements() &&
3656 Value *FrontV = Item.front().first;
3657 return !
E.value().first || (IsEquiv(
E.value().first, FrontV) &&
3658 E.value().second == (int)
E.index());
3660 IdentityLeafs.
insert(std::make_pair(FrontV, From));
3665 C &&
C->getSplatValue() &&
3667 Value *FrontV = Item.front().first;
3673 SplatLeafs.
insert(std::make_pair(FrontV, From));
3678 auto [FrontV, FrontLane] = Item.front();
3679 auto [
V, Lane] = IL;
3680 return !
V || (
V == FrontV && Lane == FrontLane);
3682 SplatLeafs.
insert(std::make_pair(FrontV, From));
3688 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3689 Value *FrontV = Item.front().first;
3698 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3701 if (CI->getSrcTy()->getScalarType() !=
3706 SI->getOperand(0)->getType() !=
3713 II->getIntrinsicID() ==
3715 !
II->hasOperandBundles());
3722 BO && BO->isIntDivRem())
3729 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3730 FPToUIInst, SIToFPInst, UIToFPInst>(FrontV)) {
3738 if (DstTy && SrcTy &&
3739 SrcTy->getNumElements() == DstTy->getNumElements()) {
3741 &BitCast->getOperandUse(0));
3746 &Sel->getOperandUse(0));
3748 &Sel->getOperandUse(1));
3750 &Sel->getOperandUse(2));
3754 !
II->hasOperandBundles()) {
3755 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3759 Value *FrontV = Item.front().first;
3775 ConcatLeafs.
insert(std::make_pair(FrontV, From));
3782 if (NumVisited <= 1)
3785 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3791 SplatLeafs, ConcatLeafs, Builder, &
TTI);
3792 replaceValue(
I, *V);
3799bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3803 switch (
II->getIntrinsicID()) {
3804 case Intrinsic::vector_reduce_add:
3805 case Intrinsic::vector_reduce_mul:
3806 case Intrinsic::vector_reduce_and:
3807 case Intrinsic::vector_reduce_or:
3808 case Intrinsic::vector_reduce_xor:
3809 case Intrinsic::vector_reduce_smin:
3810 case Intrinsic::vector_reduce_smax:
3811 case Intrinsic::vector_reduce_umin:
3812 case Intrinsic::vector_reduce_umax:
3821 std::queue<Value *> Worklist;
3822 SmallPtrSet<Value *, 4> Visited;
3823 ShuffleVectorInst *Shuffle =
nullptr;
3827 while (!Worklist.empty()) {
3828 Value *CV = Worklist.front();
3840 if (CI->isBinaryOp()) {
3841 for (
auto *
Op : CI->operand_values())
3845 if (Shuffle && Shuffle != SV)
3862 for (
auto *V : Visited)
3863 for (
auto *U :
V->users())
3864 if (!Visited.contains(U) && U != &
I)
3867 FixedVectorType *VecType =
3871 FixedVectorType *ShuffleInputType =
3873 if (!ShuffleInputType)
3879 SmallVector<int> ConcatMask;
3881 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3882 bool UsesSecondVec =
3883 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3890 ShuffleInputType, ConcatMask,
CostKind);
3892 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3894 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3896 bool MadeChanges =
false;
3897 if (NewCost < OldCost) {
3901 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3902 replaceValue(*Shuffle, *NewShuffle);
3908 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3954bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3956 std::queue<Value *> InstWorklist;
3960 std::optional<unsigned int> CommonCallOp = std::nullopt;
3961 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3963 bool IsFirstCallOrBinInst =
true;
3964 bool ShouldBeCallOrBinInst =
true;
3970 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3980 int64_t
VecSize = FVT->getNumElements();
3986 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3987 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3997 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3998 Cur = (Cur + 1) / 2, --
Mask) {
4000 ExpectedParityMask |= (1ll <<
Mask);
4003 InstWorklist.push(VecOpEE);
4005 bool IsPartialReduction =
false;
4007 while (!InstWorklist.empty()) {
4008 Value *CI = InstWorklist.front();
4012 if (!ShouldBeCallOrBinInst)
4015 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4020 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4022 IsFirstCallOrBinInst =
false;
4025 CommonCallOp =
II->getIntrinsicID();
4026 if (
II->getIntrinsicID() != *CommonCallOp)
4029 switch (
II->getIntrinsicID()) {
4030 case Intrinsic::umin:
4031 case Intrinsic::umax:
4032 case Intrinsic::smin:
4033 case Intrinsic::smax: {
4034 auto *Op0 =
II->getOperand(0);
4035 auto *Op1 =
II->getOperand(1);
4043 ShouldBeCallOrBinInst ^= 1;
4045 IntrinsicCostAttributes ICA(
4046 *CommonCallOp,
II->getType(),
4047 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4054 InstWorklist.push(PrevVecV[1]);
4055 InstWorklist.push(PrevVecV[0]);
4059 if (!ShouldBeCallOrBinInst)
4062 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4065 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4067 IsFirstCallOrBinInst =
false;
4075 switch (*CommonBinOp) {
4076 case BinaryOperator::Add:
4077 case BinaryOperator::Mul:
4078 case BinaryOperator::Or:
4079 case BinaryOperator::And:
4080 case BinaryOperator::Xor: {
4090 ShouldBeCallOrBinInst ^= 1;
4097 InstWorklist.push(PrevVecV[1]);
4098 InstWorklist.push(PrevVecV[0]);
4102 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4105 if (SVInst != PrevVecV[1])
4108 ArrayRef<int> CurMask;
4114 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4115 if (Mask < ShuffleMaskHalf &&
4116 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4118 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4123 ShuffleMaskHalf *= 2;
4124 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4125 ExpectedParityMask >>= 1;
4128 SVInst->getType(), SVInst->getType(),
4132 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4135 ShouldBeCallOrBinInst ^= 1;
4142 if (ShouldBeCallOrBinInst && VisitedCnt >= 1 && CI == PrevVecV[0] &&
4144 IsPartialReduction =
true;
4153 if (ShouldBeCallOrBinInst && !IsPartialReduction)
4156 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4158 Value *FinalVecV = PrevVecV[0];
4171 FixedVectorType *ReduceVecTy = FinalVecVTy;
4172 SmallVector<int> ExtractMask;
4174 if (IsPartialReduction) {
4175 unsigned SubVecSize = ShuffleMaskHalf;
4177 ExtractMask.
resize(SubVecSize);
4178 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
4180 ReduceVecTy, FinalVecVTy, ExtractMask,
4184 IntrinsicCostAttributes ICA(ReducedOp, ReduceVecTy, {ReduceVecTy});
4187 LLVM_DEBUG(
dbgs() <<
"Found reduction shuffle chain: " <<
I <<
"\n OldCost : "
4188 << OrigCost <<
" vs NewCost: " << NewCost <<
"\n");
4190 if (VecOpEE->
hasOneUse() ? (NewCost > OrigCost) : (NewCost >= OrigCost))
4193 Value *ReduceInput = FinalVecV;
4194 if (IsPartialReduction)
4198 ReducedOp, {ReduceInput->
getType()}, {ReduceInput});
4199 replaceValue(
I, *ReducedResult);
4208bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4213 bool TruncOnly =
false;
4216 case Intrinsic::vector_reduce_add:
4217 case Intrinsic::vector_reduce_mul:
4220 case Intrinsic::vector_reduce_and:
4221 case Intrinsic::vector_reduce_or:
4222 case Intrinsic::vector_reduce_xor:
4229 Value *ReductionSrc =
I.getOperand(0);
4241 Type *ResultTy =
I.getType();
4244 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4254 if (OldCost <= NewCost || !NewCost.
isValid())
4258 II->getIntrinsicID(), {Src});
4260 replaceValue(
I, *NewCast);
4288bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4290 IntrinsicInst *ReduceOp;
4291 const APInt *CmpVal;
4298 case Intrinsic::vector_reduce_or:
4299 case Intrinsic::vector_reduce_umax:
4300 case Intrinsic::vector_reduce_and:
4301 case Intrinsic::vector_reduce_umin:
4302 case Intrinsic::vector_reduce_add:
4313 unsigned BitWidth = VecTy->getScalarSizeInBits();
4317 unsigned NumElts = VecTy->getNumElements();
4326 case Intrinsic::vector_reduce_or:
4327 case Intrinsic::vector_reduce_umax:
4328 TreeOpcode = Instruction::Or;
4330 case Intrinsic::vector_reduce_and:
4331 case Intrinsic::vector_reduce_umin:
4332 TreeOpcode = Instruction::And;
4334 case Intrinsic::vector_reduce_add:
4335 TreeOpcode = Instruction::Add;
4343 SmallVector<Value *, 8> Worklist;
4344 SmallVector<Value *, 8> Sources;
4346 std::optional<bool> IsAShr;
4347 constexpr unsigned MaxSources = 8;
4352 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4353 Sources.
size() <= MaxSources) {
4362 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4364 IsAShr = ThisIsAShr;
4365 else if (*IsAShr != ThisIsAShr)
4391 if (Sources.
empty() || Sources.
size() > MaxSources ||
4392 Worklist.
size() > MaxSources || !IsAShr)
4395 unsigned NumSources = Sources.
size();
4399 if (OrigIID == Intrinsic::vector_reduce_add &&
4407 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4410 NegativeVal.negate();
4442 TestsNegative =
false;
4443 }
else if (*CmpVal == NegativeVal) {
4444 TestsNegative =
true;
4448 IsEq = Pred == ICmpInst::ICMP_EQ;
4449 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4451 TestsNegative = (RangeHigh == NegativeVal);
4452 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4454 TestsNegative = (RangeHigh == NegativeVal);
4455 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4457 TestsNegative = (RangeLow == NegativeVal);
4458 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4460 TestsNegative = (RangeLow == NegativeVal);
4503 enum CheckKind :
unsigned {
4510 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4512 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4514 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4518 case Intrinsic::vector_reduce_or:
4519 case Intrinsic::vector_reduce_umax:
4520 Base = TestsNegative ? AnyNeg : AllNonNeg;
4522 case Intrinsic::vector_reduce_and:
4523 case Intrinsic::vector_reduce_umin:
4524 Base = TestsNegative ? AllNeg : AnyNonNeg;
4526 case Intrinsic::vector_reduce_add:
4527 Base = TestsNegative ? AllNeg : AllNonNeg;
4542 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4543 : std::make_pair(MinMax, MinMaxCost);
4547 auto [NewIID, NewCost] = RequiresOr(
Check)
4548 ? PickCheaper(Intrinsic::vector_reduce_or,
4549 Intrinsic::vector_reduce_umax)
4550 : PickCheaper(
Intrinsic::vector_reduce_and,
4554 if (NumSources > 1) {
4555 unsigned CombineOpc =
4556 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4561 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4562 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4564 if (NewCost > OldCost)
4569 Type *ScalarTy = VecTy->getScalarType();
4572 if (NumSources == 1) {
4583 replaceValue(
I, *NewCmp);
4608bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4619 switch (
II->getIntrinsicID()) {
4620 case Intrinsic::vector_reduce_add:
4621 case Intrinsic::vector_reduce_or:
4622 case Intrinsic::vector_reduce_umin:
4623 case Intrinsic::vector_reduce_umax:
4624 case Intrinsic::vector_reduce_smin:
4625 case Intrinsic::vector_reduce_smax:
4631 Value *InnerOp =
II->getArgOperand(0);
4674 switch (
II->getIntrinsicID()) {
4675 case Intrinsic::vector_reduce_add: {
4680 unsigned NumElems = XTy->getNumElements();
4686 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4694 case Intrinsic::vector_reduce_smin:
4695 case Intrinsic::vector_reduce_smax:
4705 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4721 case Intrinsic::vector_reduce_add:
4722 case Intrinsic::vector_reduce_or:
4728 case Intrinsic::vector_reduce_umin:
4729 case Intrinsic::vector_reduce_umax:
4730 case Intrinsic::vector_reduce_smin:
4731 case Intrinsic::vector_reduce_smax:
4743 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4745 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4746 << *InnerOp <<
"\n OldCost: " << OldCost
4747 <<
" vs NewCost: " << NewCost <<
"\n");
4753 if (NewCost > OldCost)
4762 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4763 replaceValue(
I, *NewCmp);
4794bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4797 const APInt *CmpVal;
4802 if (!
II || !
II->hasOneUse())
4805 const auto IsValidOrUmaxCmp = [&]() {
4814 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4816 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4817 Pred == ICmpInst::ICMP_SLT;
4818 return IsEquality || IsPositive || IsNegative;
4821 const auto IsValidAndUminCmp = [&]() {
4826 const auto LeadingOnes = CmpVal->
countl_one();
4833 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4842 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4843 return IsEquality || IsNegative || IsPositive;
4851 switch (OriginalIID) {
4852 case Intrinsic::vector_reduce_or:
4853 if (!IsValidOrUmaxCmp())
4855 AlternativeIID = Intrinsic::vector_reduce_umax;
4857 case Intrinsic::vector_reduce_umax:
4858 if (!IsValidOrUmaxCmp())
4860 AlternativeIID = Intrinsic::vector_reduce_or;
4862 case Intrinsic::vector_reduce_and:
4863 if (!IsValidAndUminCmp())
4865 AlternativeIID = Intrinsic::vector_reduce_umin;
4867 case Intrinsic::vector_reduce_umin:
4868 if (!IsValidAndUminCmp())
4870 AlternativeIID = Intrinsic::vector_reduce_and;
4883 if (ReductionOpc != Instruction::ICmp)
4894 <<
"\n OrigCost: " << OrigCost
4895 <<
" vs AltCost: " << AltCost <<
"\n");
4897 if (AltCost >= OrigCost)
4901 Type *ScalarTy = VecTy->getScalarType();
4904 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4906 replaceValue(
I, *NewCmp);
4920 unsigned Depth = 0) {
4921 constexpr unsigned MaxLocalDepth = 2;
4922 if (
Depth > MaxLocalDepth)
4925 auto NumSignBits = [&](
const Value *
X) {
4928 if (NumSignBits(V) == V->getType()->getScalarSizeInBits())
4933 return NumSignBits(
A) >= 2 && NumSignBits(
B) >= 2 &&
4944bool VectorCombine::foldReduceAddCmpZero(Instruction &
I) {
4954 if (!VecTy || VecTy->getNumElements() < 2)
4960 if (!IsNonNegative && !IsNonPositive)
4965 unsigned NumElts = VecTy->getNumElements();
4967 if (
Log2_32(NumElts) >= NumSignBits)
4970 ICmpInst::Predicate NewPred;
4972 case ICmpInst::ICMP_EQ:
4973 case ICmpInst::ICMP_ULE:
4974 case ICmpInst::ICMP_SLE:
4975 case ICmpInst::ICMP_SGE:
4976 NewPred = ICmpInst::ICMP_EQ;
4978 case ICmpInst::ICMP_NE:
4979 case ICmpInst::ICMP_UGT:
4980 case ICmpInst::ICMP_SGT:
4981 case ICmpInst::ICMP_SLT:
4982 NewPred = ICmpInst::ICMP_NE;
4992 if (!IsNonNegative &&
4993 (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE))
4995 if (!IsNonPositive &&
4996 (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE))
4998 if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE ||
4999 Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) &&
5000 Log2_32(NumElts) >= NumSignBits - 1)
5004 Instruction::Add, VecTy, std::nullopt,
CostKind);
5006 Instruction::Or, VecTy, std::nullopt,
CostKind);
5008 Intrinsic::umax, VecTy, FastMathFlags(),
CostKind);
5011 bool UseOr = OrCost.
isValid() && (!UmaxCost.
isValid() || OrCost <= UmaxCost);
5013 if (AltCost > OrigCost)
5019 Intrinsic::vector_reduce_umax, {VecTy}, {Vec});
5020 Worklist.pushValue(NewReduce);
5022 NewPred, NewReduce, ConstantInt::getNullValue(VecTy->getScalarType()));
5023 replaceValue(
I, *NewCmp);
5032 constexpr unsigned MaxVisited = 32;
5035 bool FoundReduction =
false;
5038 while (!WorkList.
empty()) {
5040 for (
User *U :
I->users()) {
5042 if (!UI || !Visited.
insert(UI).second)
5044 if (Visited.
size() > MaxVisited)
5050 switch (
II->getIntrinsicID()) {
5051 case Intrinsic::vector_reduce_add:
5052 case Intrinsic::vector_reduce_mul:
5053 case Intrinsic::vector_reduce_and:
5054 case Intrinsic::vector_reduce_or:
5055 case Intrinsic::vector_reduce_xor:
5056 case Intrinsic::vector_reduce_smin:
5057 case Intrinsic::vector_reduce_smax:
5058 case Intrinsic::vector_reduce_umin:
5059 case Intrinsic::vector_reduce_umax:
5060 FoundReduction =
true;
5073 return FoundReduction;
5086bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
5091 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
5099 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
5101 if (!
I ||
I->getOperand(0)->getType() != VT)
5103 return any_of(
I->users(), [&](User *U) {
5104 return U != Op0 && U != Op1 &&
5105 !(isa<ShuffleVectorInst>(U) &&
5106 (InputShuffles.contains(cast<Instruction>(U)) ||
5107 isInstructionTriviallyDead(cast<Instruction>(U))));
5110 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
5111 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
5119 for (
auto *U :
I->users()) {
5121 if (!SV || SV->getType() != VT)
5123 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
5124 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
5131 if (!collectShuffles(Op0) || !collectShuffles(Op1))
5135 if (FromReduction && Shuffles.
size() > 1)
5140 if (!FromReduction) {
5141 for (
size_t Idx = 0,
E = Shuffles.
size(); Idx !=
E; ++Idx) {
5142 for (
auto *U : Shuffles[Idx]->
users()) {
5157 int MaxV1Elt = 0, MaxV2Elt = 0;
5158 unsigned NumElts = VT->getNumElements();
5159 for (ShuffleVectorInst *SVN : Shuffles) {
5160 SmallVector<int>
Mask;
5161 SVN->getShuffleMask(Mask);
5165 Value *SVOp0 = SVN->getOperand(0);
5166 Value *SVOp1 = SVN->getOperand(1);
5171 for (
int &Elem : Mask) {
5177 if (SVOp0 == Op1 && SVOp1 == Op0) {
5181 if (SVOp0 != Op0 || SVOp1 != Op1)
5187 SmallVector<int> ReconstructMask;
5188 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5191 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5192 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5193 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
5194 return Mask[
I] ==
A.first;
5203 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5204 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5205 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5219 sort(ReconstructMask);
5220 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5228 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
5229 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5241 if (InputShuffles.contains(SSV))
5243 return SV->getMaskValue(M);
5251 std::pair<int, int>
Y) {
5252 int MXA = GetBaseMaskValue(
A,
X.first);
5253 int MYA = GetBaseMaskValue(
A,
Y.first);
5256 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5257 return SortBase(SVI0A,
A,
B);
5259 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5260 return SortBase(SVI1A,
A,
B);
5265 for (
const auto &Mask : OrigReconstructMasks) {
5266 SmallVector<int> ReconstructMask;
5267 for (
int M : Mask) {
5269 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5270 assert(It !=
V.end() &&
"Expected all entries in Mask");
5271 return std::distance(
V.begin(), It);
5275 else if (M <
static_cast<int>(NumElts)) {
5276 ReconstructMask.
push_back(FindIndex(V1, M));
5278 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5281 ReconstructMasks.
push_back(std::move(ReconstructMask));
5286 SmallVector<int> V1A, V1B, V2A, V2B;
5287 for (
unsigned I = 0;
I < V1.
size();
I++) {
5288 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
5289 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
5291 for (
unsigned I = 0;
I < V2.
size();
I++) {
5292 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5293 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5295 while (V1A.
size() < NumElts) {
5299 while (V2A.
size() < NumElts) {
5311 VT, VT, SV->getShuffleMask(),
CostKind);
5318 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5319 unsigned MaxVectorSize =
5321 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5322 if (MaxElementsInVector == 0)
5331 std::set<SmallVector<int, 4>> UniqueShuffles;
5336 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5337 if (NumFullVectors < 2)
5338 return C + ShuffleCost;
5339 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5340 unsigned NumUniqueGroups = 0;
5341 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5344 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5345 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5346 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5347 if (UniqueShuffles.insert(SubShuffle).second)
5348 NumUniqueGroups += 1;
5350 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5356 SmallVector<int, 16>
Mask;
5357 SV->getShuffleMask(Mask);
5358 return AddShuffleMaskAdjustedCost(
C, Mask);
5361 auto AllShufflesHaveSameOperands =
5362 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5363 if (InputShuffles.size() < 2)
5365 ShuffleVectorInst *FirstSV =
5372 std::next(InputShuffles.begin()), InputShuffles.end(),
5373 [&](Instruction *
I) {
5374 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5375 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5384 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5386 if (AllShufflesHaveSameOperands(InputShuffles)) {
5387 UniqueShuffles.clear();
5388 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5391 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5397 FixedVectorType *Op0SmallVT =
5399 FixedVectorType *Op1SmallVT =
5404 UniqueShuffles.clear();
5405 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5407 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5409 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5412 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5414 <<
" vs CostAfter: " << CostAfter <<
"\n");
5415 if (CostBefore < CostAfter ||
5426 if (InputShuffles.contains(SSV))
5428 return SV->getOperand(
Op);
5432 GetShuffleOperand(SVI0A, 1), V1A);
5435 GetShuffleOperand(SVI0B, 1), V1B);
5438 GetShuffleOperand(SVI1A, 1), V2A);
5441 GetShuffleOperand(SVI1B, 1), V2B);
5446 I->copyIRFlags(Op0,
true);
5451 I->copyIRFlags(Op1,
true);
5453 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5456 replaceValue(*Shuffles[S], *NSV,
false);
5459 Worklist.pushValue(NSV0A);
5460 Worklist.pushValue(NSV0B);
5461 Worklist.pushValue(NSV1A);
5462 Worklist.pushValue(NSV1B);
5472bool VectorCombine::shrinkType(Instruction &
I) {
5473 Value *ZExted, *OtherOperand;
5479 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5483 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5485 if (
I.getOpcode() == Instruction::LShr) {
5502 Instruction::ZExt, BigTy, SmallTy,
5503 TargetTransformInfo::CastContextHint::None,
CostKind);
5508 for (User *U : ZExtOperand->
users()) {
5515 ShrinkCost += ZExtCost;
5530 ShrinkCost += ZExtCost;
5537 Instruction::Trunc, SmallTy, BigTy,
5538 TargetTransformInfo::CastContextHint::None,
CostKind);
5543 if (ShrinkCost > CurrentCost)
5547 Value *Op0 = ZExted;
5550 if (
I.getOperand(0) == OtherOperand)
5557 replaceValue(
I, *NewZExtr);
5563bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5564 Value *DstVec, *SrcVec;
5565 uint64_t ExtIdx, InsIdx;
5575 if (!DstVecTy || !SrcVecTy ||
5581 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5588 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5590 if (NeedDstSrcSwap) {
5592 Mask[InsIdx] = ExtIdx % NumDstElts;
5596 std::iota(
Mask.begin(),
Mask.end(), 0);
5597 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5610 SmallVector<int> ExtToVecMask;
5611 if (!NeedExpOrNarrow) {
5616 nullptr, {DstVec, SrcVec});
5622 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5625 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5629 if (!Ext->hasOneUse())
5632 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5633 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5636 if (OldCost < NewCost)
5639 if (NeedExpOrNarrow) {
5640 if (!NeedDstSrcSwap)
5653 replaceValue(
I, *Shuf);
5662bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5663 const APInt *SplatVal0, *SplatVal1;
5673 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5674 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5683 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5684 << *
I.getType() <<
" is too high.\n");
5688 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5689 NewSplatVal <<= Width;
5690 NewSplatVal |= SplatVal0->
zext(Width * 2);
5692 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5727bool VectorCombine::foldDeinterleaveIntrinsics(Instruction &
I) {
5729 if (
DL->isBigEndian())
5732 using namespace PatternMatch;
5733 Value *DeinterleavedVal;
5744 unsigned HalfElementWidth = ElementWidth / 2;
5748 std::array<ExtractValueInst *, 2> OrigFields{};
5749 for (User *Usr :
I.users()) {
5752 if (!
E ||
E->getNumIndices() != 1)
5754 unsigned Idx = *
E->idx_begin();
5756 if (Idx >= 2 || OrigFields[Idx] || !
E->hasNUses(2))
5758 OrigFields[Idx] =
E;
5762 SmallVector<Instruction *, 2> MergeInsts;
5763 for (
auto *FieldUsr : OrigFields[0]->
users()) {
5771 auto MatchMerge = [&](void) ->
bool {
5774 return match(MergeInsts[0],
5778 match(MergeInsts[1],
5783 if (!MatchMerge()) {
5784 std::swap(MergeInsts[0], MergeInsts[1]);
5799 auto *NewFieldTy = VecTy->getWithNewBitWidth(HalfElementWidth);
5809 if (OldCost <= NewCost || !NewCost.
isValid()) {
5811 dbgs() <<
"VC: New deinterleave2 sequence cost (" << NewCost <<
")"
5812 <<
" is higher than that of the old one (" << OldCost <<
")\n");
5820 Intrinsic::vector_deinterleave2, {NewVecTy}, {NewVecCast});
5821 for (
auto [Idx, MergeInst] :
enumerate(MergeInsts)) {
5823 NewField = Builder.
CreateBitCast(NewField, MergeInst->getType());
5824 replaceValue(*MergeInst, *NewField);
5830bool VectorCombine::foldBitcastOfVPLoad(Instruction &
I) {
5831 const DataLayout &
DL =
I.getDataLayout();
5846 DL.getValueOrABITypeAlignment(
II->getPointerAlignment(), OrigVecTy);
5847 ElementCount OrigVecCnt = OrigVecTy->getElementCount();
5849 ElementCount NewVecCnt = NewVecTy->getElementCount();
5861 II->getMemoryPointerParam(),
false,
5867 {Intrinsic::vp_load, NewVecTy,
II->getMemoryPointerParam(),
false,
5871 <<
" NewCost=" << NewCost <<
"\n");
5872 if (NewCost > OldCost || !NewCost.
isValid())
5880 {
II->getMemoryPointerParam(), NewMask, NewEVL});
5883 0, AttrBuilder(
II->getContext()).addAlignmentAttr(OrigAlign));
5884 replaceValue(*Cast, *NewVP);
5889bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5891 if (!OldLoad || !OldLoad->isSimple())
5898 unsigned const OldNumElements = OldLoadTy->getNumElements();
5904 using IndexRange = std::pair<int, int>;
5905 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5906 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5907 for (llvm::Use &Use :
I.uses()) {
5909 User *Shuffle =
Use.getUser();
5914 return std::nullopt;
5921 for (
int Index : Mask) {
5922 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5923 OutputRange.first = std::min(Index, OutputRange.first);
5924 OutputRange.second = std::max(Index, OutputRange.second);
5929 if (OutputRange.second < OutputRange.first)
5930 return std::nullopt;
5936 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5937 unsigned const NewNumElements = Indices->second + 1u;
5941 if (NewNumElements < OldNumElements) {
5946 Type *ElemTy = OldLoadTy->getElementType();
5948 Value *PtrOp = OldLoad->getPointerOperand();
5951 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
5952 OldLoad->getPointerAddressSpace(),
CostKind);
5955 OldLoad->getPointerAddressSpace(),
CostKind);
5957 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
5959 unsigned const MaxIndex = NewNumElements * 2u;
5961 for (llvm::Use &Use :
I.uses()) {
5968 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
5974 for (
int Index : OldMask) {
5975 if (Index >=
static_cast<int>(MaxIndex))
5989 dbgs() <<
"Found a load used only by shufflevector instructions: "
5990 <<
I <<
"\n OldCost: " << OldCost
5991 <<
" vs NewCost: " << NewCost <<
"\n");
5993 if (OldCost < NewCost || !NewCost.
isValid())
5999 NewLoad->copyMetadata(
I);
6002 for (UseEntry &Use : NewUses) {
6003 ShuffleVectorInst *Shuffle =
Use.first;
6004 std::vector<int> &NewMask =
Use.second;
6011 replaceValue(*Shuffle, *NewShuffle,
false);
6024bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
6026 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
6030 ArrayRef<int> Mask0;
6031 ArrayRef<int> Mask1;
6044 auto const InputNumElements = InputVT->getNumElements();
6046 if (InputNumElements >= ResultVT->getNumElements())
6051 SmallVector<int, 16> NewMask;
6054 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
6055 if (
M0 >= 0 &&
M1 >= 0)
6057 else if (
M0 == -1 &&
M1 == -1)
6070 int MaskOffset = NewMask[0
u];
6071 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
6074 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
6088 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
6091 if (NewCost > OldCost)
6103 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
6105 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
6111 replaceValue(*Phi, *NewShuf1);
6117bool VectorCombine::run() {
6131 auto Opcode =
I.getOpcode();
6139 if (IsFixedVectorType) {
6141 case Instruction::InsertElement:
6142 if (vectorizeLoadInsert(
I))
6145 case Instruction::ShuffleVector:
6146 if (widenSubvectorLoad(
I))
6157 if (scalarizeOpOrCmp(
I))
6159 if (scalarizeLoad(
I))
6161 if (scalarizeExtExtract(
I))
6163 if (scalarizeVPIntrinsic(
I))
6165 if (foldInterleaveIntrinsics(
I))
6167 if (foldBitcastOfVPLoad(
I))
6171 if (foldDeinterleaveIntrinsics(
I))
6174 if (Opcode == Instruction::Store)
6175 if (foldSingleElementStore(
I))
6179 if (TryEarlyFoldsOnly)
6186 if (IsFixedVectorType) {
6188 case Instruction::InsertElement:
6189 if (foldInsExtFNeg(
I))
6191 if (foldInsExtBinop(
I))
6193 if (foldInsExtVectorToShuffle(
I))
6196 case Instruction::ShuffleVector:
6197 if (foldPermuteOfBinops(
I))
6199 if (foldShuffleOfBinops(
I))
6201 if (foldShuffleOfSelects(
I))
6203 if (foldShuffleOfCastops(
I))
6205 if (foldShuffleOfShuffles(
I))
6207 if (foldPermuteOfIntrinsic(
I))
6209 if (foldShufflesOfLengthChangingShuffles(
I))
6211 if (foldShuffleOfIntrinsics(
I))
6213 if (foldSelectShuffle(
I))
6215 if (foldShuffleToIdentity(
I))
6218 case Instruction::Load:
6219 if (shrinkLoadForShuffles(
I))
6222 case Instruction::BitCast:
6223 if (foldBitcastShuffle(
I))
6225 if (foldSelectsFromBitcast(
I))
6228 case Instruction::And:
6229 case Instruction::Or:
6230 case Instruction::Xor:
6231 if (foldBitOpOfCastops(
I))
6233 if (foldBitOpOfCastConstant(
I))
6236 case Instruction::PHI:
6237 if (shrinkPhiOfShuffles(
I))
6247 case Instruction::Call:
6248 if (foldShuffleFromReductions(
I))
6250 if (foldCastFromReductions(
I))
6253 case Instruction::ExtractElement:
6254 if (foldShuffleChainsToReduce(
I))
6257 case Instruction::ICmp:
6258 if (foldSignBitReductionCmp(
I))
6260 if (foldICmpEqZeroVectorReduce(
I))
6262 if (foldEquivalentReductionCmp(
I))
6264 if (foldReduceAddCmpZero(
I))
6267 case Instruction::FCmp:
6268 if (foldExtractExtract(
I))
6271 case Instruction::Or:
6272 if (foldConcatOfBoolMasks(
I))
6277 if (foldExtractExtract(
I))
6279 if (foldExtractedCmps(
I))
6281 if (foldBinopOfReductions(
I))
6290 bool MadeChange =
false;
6291 for (BasicBlock &BB :
F) {
6303 if (!
I->isDebugOrPseudoInst())
6304 MadeChange |= FoldInst(*
I);
6311 while (!Worklist.isEmpty()) {
6321 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, Use *From, FixedVectorType *Ty, const DenseSet< std::pair< Value *, Use * > > &IdentityLeafs, const DenseSet< std::pair< Value *, Use * > > &SplatLeafs, const DenseSet< std::pair< Value *, Use * > > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
std::pair< Value *, int > InstLane
static bool isKnownNonPositive(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Used by foldReduceAddCmpZero to check if we can prove that a value is non-positive.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, const SimplifyQuery &SQ)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static InstLane lookThroughShuffles(Value *V, int Lane)
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void addParamAttrs(unsigned ArgNo, const AttrBuilder &B)
Adds attributes to the indicated argument.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
auto m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd >::Ty m_Deinterleave2(const Opnd &Op)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const