45#define DEBUG_TYPE "vector-combine"
51STATISTIC(NumVecLoad,
"Number of vector loads formed");
52STATISTIC(NumVecCmp,
"Number of vector compares formed");
53STATISTIC(NumVecBO,
"Number of vector binops formed");
54STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
55STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
56STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
57STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
58STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
62 cl::desc(
"Disable all vector combine transforms"));
66 cl::desc(
"Disable binop extract to shuffle transforms"));
70 cl::desc(
"Max number of instructions to scan for vector combining."));
72static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
80 bool TryEarlyFoldsOnly)
83 SQ(*
DL, nullptr, &DT, &AC),
84 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
91 const TargetTransformInfo &TTI;
92 const DominatorTree &DT;
96 const SimplifyQuery SQ;
100 bool TryEarlyFoldsOnly;
102 InstructionWorklist Worklist;
111 bool vectorizeLoadInsert(Instruction &
I);
112 bool widenSubvectorLoad(Instruction &
I);
113 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
114 ExtractElementInst *Ext1,
115 unsigned PreferredExtractIndex)
const;
116 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
117 const Instruction &
I,
118 ExtractElementInst *&ConvertToShuffle,
119 unsigned PreferredExtractIndex);
122 bool foldExtractExtract(Instruction &
I);
123 bool foldInsExtFNeg(Instruction &
I);
124 bool foldInsExtBinop(Instruction &
I);
125 bool foldInsExtVectorToShuffle(Instruction &
I);
126 bool foldBitOpOfCastops(Instruction &
I);
127 bool foldBitOpOfCastConstant(Instruction &
I);
128 bool foldBitcastShuffle(Instruction &
I);
129 bool scalarizeOpOrCmp(Instruction &
I);
130 bool scalarizeVPIntrinsic(Instruction &
I);
131 bool foldExtractedCmps(Instruction &
I);
132 bool foldSelectsFromBitcast(Instruction &
I);
133 bool foldBinopOfReductions(Instruction &
I);
134 bool foldSingleElementStore(Instruction &
I);
135 bool scalarizeLoad(Instruction &
I);
136 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
138 bool scalarizeExtExtract(Instruction &
I);
139 bool foldConcatOfBoolMasks(Instruction &
I);
140 bool foldPermuteOfBinops(Instruction &
I);
141 bool foldShuffleOfBinops(Instruction &
I);
142 bool foldShuffleOfSelects(Instruction &
I);
143 bool foldShuffleOfCastops(Instruction &
I);
144 bool foldShuffleOfShuffles(Instruction &
I);
145 bool foldPermuteOfIntrinsic(Instruction &
I);
146 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
147 bool foldShuffleOfIntrinsics(Instruction &
I);
148 bool foldShuffleToIdentity(Instruction &
I);
149 bool foldShuffleFromReductions(Instruction &
I);
150 bool foldShuffleChainsToReduce(Instruction &
I);
151 bool foldCastFromReductions(Instruction &
I);
152 bool foldSignBitReductionCmp(Instruction &
I);
153 bool foldReductionZeroTest(Instruction &
I);
154 bool foldICmpEqZeroVectorReduce(Instruction &
I);
155 bool foldEquivalentReductionCmp(Instruction &
I);
156 bool foldReduceAddCmpZero(Instruction &
I);
157 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
158 bool foldInterleaveIntrinsics(Instruction &
I);
159 bool foldDeinterleaveIntrinsics(Instruction &
I);
160 bool foldBitcastOfVPLoad(Instruction &
I);
161 bool foldBitOrderReverseAndSwap(Instruction &
I);
162 bool shrinkType(Instruction &
I);
163 bool shrinkLoadForShuffles(Instruction &
I);
164 bool shrinkPhiOfShuffles(Instruction &
I);
166 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
172 Worklist.pushUsersToWorkList(*NewI);
173 Worklist.pushValue(NewI);
190 SmallPtrSet<Value *, 4> Visited;
195 OpI,
nullptr,
nullptr, [&](
Value *V) {
200 NextInst = NextInst->getNextNode();
205 Worklist.pushUsersToWorkList(*OpI);
206 Worklist.pushValue(OpI);
226 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
227 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
233 Type *ScalarTy = Load->getType()->getScalarType();
235 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
236 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
243bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
269 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
272 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
273 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
274 unsigned OffsetEltIndex = 0;
282 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
283 APInt
Offset(OffsetBitWidth, 0);
293 uint64_t ScalarSizeInBytes = ScalarSize / 8;
294 if (
Offset.urem(ScalarSizeInBytes) != 0)
298 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
299 if (OffsetEltIndex >= MinVecNumElts)
316 unsigned AS =
Load->getPointerAddressSpace();
335 unsigned OutputNumElts = Ty->getNumElements();
337 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
338 Mask[0] = OffsetEltIndex;
345 if (OldCost < NewCost || !NewCost.
isValid())
356 replaceValue(
I, *VecLd);
364bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
367 if (!Shuf->isIdentityWithPadding())
373 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
374 return M >= (int)(NumOpElts);
385 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
394 unsigned AS =
Load->getPointerAddressSpace();
409 if (OldCost < NewCost || !NewCost.
isValid())
416 replaceValue(
I, *VecLd);
423ExtractElementInst *VectorCombine::getShuffleExtract(
424 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
428 assert(Index0C && Index1C &&
"Expected constant extract indexes");
430 unsigned Index0 = Index0C->getZExtValue();
431 unsigned Index1 = Index1C->getZExtValue();
434 if (Index0 == Index1)
458 if (PreferredExtractIndex == Index0)
460 if (PreferredExtractIndex == Index1)
464 return Index0 > Index1 ? Ext0 : Ext1;
472bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
473 ExtractElementInst *Ext1,
474 const Instruction &
I,
475 ExtractElementInst *&ConvertToShuffle,
476 unsigned PreferredExtractIndex) {
479 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
481 unsigned Opcode =
I.getOpcode();
494 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
495 "Expected a compare");
505 unsigned Ext0Index = Ext0IndexC->getZExtValue();
506 unsigned Ext1Index = Ext1IndexC->getZExtValue();
520 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
521 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
522 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
527 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
532 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
534 OldCost = CheapExtractCost + ScalarOpCost;
535 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
539 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
540 NewCost = VectorOpCost + CheapExtractCost +
545 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
546 if (ConvertToShuffle) {
558 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
560 ShuffleMask[BestInsIndex] = BestExtIndex;
562 VecTy, VecTy, ShuffleMask,
CostKind, 0,
563 nullptr, {ConvertToShuffle});
566 VecTy, VecTy, {},
CostKind, 0,
nullptr,
571 LLVM_DEBUG(
dbgs() <<
"Found a binop of extractions: " <<
I <<
"\n OldCost: "
572 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
577 return OldCost < NewCost;
589 ShufMask[NewIndex] = OldIndex;
590 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
642 V1,
"foldExtExtBinop");
647 VecBOInst->copyIRFlags(&
I);
653bool VectorCombine::foldExtractExtract(Instruction &
I) {
674 unsigned NumElts = FixedVecTy->getNumElements();
675 if (C0 >= NumElts || C1 >= NumElts)
691 ExtractElementInst *ExtractToChange;
692 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
698 if (ExtractToChange) {
699 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
704 if (ExtractToChange == Ext0)
713 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
714 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
717 replaceValue(
I, *NewExt);
723bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
726 uint64_t ExtIdx, InsIdx;
741 auto *DstVecScalarTy = DstVecTy->getScalarType();
743 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
748 unsigned NumDstElts = DstVecTy->getNumElements();
749 unsigned NumSrcElts = SrcVecTy->getNumElements();
750 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
756 SmallVector<int>
Mask(NumDstElts);
757 std::iota(
Mask.begin(),
Mask.end(), 0);
758 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
774 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
777 SmallVector<int> SrcMask;
780 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
782 DstVecTy, SrcVecTy, SrcMask,
CostKind);
786 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
788 if (NewCost > OldCost)
791 Value *NewShuf, *LenChgShuf =
nullptr;
805 replaceValue(
I, *NewShuf);
811bool VectorCombine::foldInsExtBinop(Instruction &
I) {
812 BinaryOperator *VecBinOp, *SclBinOp;
844 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
846 if (NewCost > OldCost)
857 NewInst->copyIRFlags(VecBinOp);
858 NewInst->andIRFlags(SclBinOp);
863 replaceValue(
I, *NewBO);
869bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
872 if (!BinOp || !BinOp->isBitwiseLogicOp())
878 if (!LHSCast || !RHSCast) {
879 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
885 if (CastOpcode != RHSCast->getOpcode())
889 switch (CastOpcode) {
890 case Instruction::BitCast:
891 case Instruction::Trunc:
892 case Instruction::SExt:
893 case Instruction::ZExt:
899 Value *LHSSrc = LHSCast->getOperand(0);
900 Value *RHSSrc = RHSCast->getOperand(0);
906 auto *SrcTy = LHSSrc->
getType();
907 auto *DstTy =
I.getType();
910 if (CastOpcode != Instruction::BitCast &&
915 if (!SrcTy->getScalarType()->isIntegerTy() ||
916 !DstTy->getScalarType()->isIntegerTy())
931 LHSCastCost + RHSCastCost;
942 if (!LHSCast->hasOneUse())
943 NewCost += LHSCastCost;
944 if (!RHSCast->hasOneUse())
945 NewCost += RHSCastCost;
948 <<
" NewCost=" << NewCost <<
"\n");
950 if (NewCost > OldCost)
955 BinOp->getName() +
".inner");
957 NewBinOp->copyIRFlags(BinOp);
971 replaceValue(
I, *Result);
980bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
996 switch (CastOpcode) {
997 case Instruction::BitCast:
998 case Instruction::ZExt:
999 case Instruction::SExt:
1000 case Instruction::Trunc:
1006 Value *LHSSrc = LHSCast->getOperand(0);
1008 auto *SrcTy = LHSSrc->
getType();
1009 auto *DstTy =
I.getType();
1012 if (CastOpcode != Instruction::BitCast &&
1017 if (!SrcTy->getScalarType()->isIntegerTy() ||
1018 !DstTy->getScalarType()->isIntegerTy())
1022 PreservedCastFlags RHSFlags;
1047 if (!LHSCast->hasOneUse())
1048 NewCost += LHSCastCost;
1050 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1051 <<
" NewCost=" << NewCost <<
"\n");
1053 if (NewCost > OldCost)
1058 LHSSrc, InvC,
I.getName() +
".inner");
1060 NewBinOp->copyIRFlags(&
I);
1080 replaceValue(
I, *Result);
1087bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1101 if (!DestTy || !SrcTy)
1104 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1105 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1106 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1116 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1117 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1121 SmallVector<int, 16> NewMask;
1122 if (DestEltSize <= SrcEltSize) {
1125 if (SrcEltSize % DestEltSize != 0)
1127 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1132 if (DestEltSize % SrcEltSize != 0)
1134 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1141 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1142 auto *NewShuffleTy =
1144 auto *OldShuffleTy =
1146 unsigned NumOps = IsUnary ? 1 : 2;
1156 TargetTransformInfo::CastContextHint::None,
1161 TargetTransformInfo::CastContextHint::None,
1164 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1165 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1167 if (NewCost > OldCost || !NewCost.
isValid())
1175 replaceValue(
I, *Shuf);
1182bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1196 if (!ScalarOp0 || !ScalarOp1)
1204 auto IsAllTrueMask = [](
Value *MaskVal) {
1207 return ConstValue->isAllOnesValue();
1221 SmallVector<int>
Mask;
1223 Mask.resize(FVTy->getNumElements(), 0);
1232 Args.push_back(
V->getType());
1233 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1238 std::optional<unsigned> FunctionalOpcode =
1240 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1241 if (!FunctionalOpcode) {
1250 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1260 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1262 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1265 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1268 if (OldCost < NewCost || !NewCost.
isValid())
1279 bool SafeToSpeculate;
1285 *FunctionalOpcode, &VPI,
nullptr, SQ.
AC, SQ.
DT);
1286 if (!SafeToSpeculate &&
1293 {ScalarOp0, ScalarOp1})
1295 ScalarOp0, ScalarOp1);
1304bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1309 if (!UO && !BO && !CI && !
II)
1317 if (Arg->getType() !=
II->getType() &&
1327 for (User *U :
I.users())
1334 std::optional<uint64_t>
Index;
1336 auto Ops =
II ?
II->args() :
I.operands();
1340 uint64_t InsIdx = 0;
1345 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1351 else if (InsIdx != *Index)
1368 if (!
Index.has_value())
1372 Type *ScalarTy = VecTy->getScalarType();
1373 assert(VecTy->isVectorTy() &&
1376 "Unexpected types for insert element into binop or cmp");
1378 unsigned Opcode =
I.getOpcode();
1386 }
else if (UO || BO) {
1390 IntrinsicCostAttributes ScalarICA(
1391 II->getIntrinsicID(), ScalarTy,
1394 IntrinsicCostAttributes VectorICA(
1395 II->getIntrinsicID(), VecTy,
1402 Value *NewVecC =
nullptr;
1404 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1407 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1409 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1423 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1425 II->getIntrinsicID(), Idx, &
TTI)))
1428 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1429 OldCost += InsertCost;
1430 NewCost += !
Op->hasOneUse() * InsertCost;
1434 if (OldCost < NewCost || !NewCost.
isValid())
1444 ++NumScalarIntrinsic;
1454 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1460 Scalar->setName(
I.getName() +
".scalar");
1465 ScalarInst->copyIRFlags(&
I);
1468 replaceValue(
I, *Insert);
1475bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1480 if (!BI || !
I.getType()->isIntegerTy(1))
1485 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1488 CmpPredicate
P0,
P1;
1500 uint64_t Index0, Index1;
1507 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1510 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1511 "Unknown ExtractElementInst");
1516 unsigned CmpOpcode =
1522 if (Index0 >= VecTy->getNumElements() || Index1 >= VecTy->getNumElements())
1534 Ext0Cost + Ext1Cost + CmpCost * 2 +
1540 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1541 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1546 ShufMask[CheapIndex] = ExpensiveIndex;
1551 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1552 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1557 if (OldCost < NewCost || !NewCost.
isValid())
1567 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1568 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1571 replaceValue(
I, *NewExt);
1598bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1605 if (!SrcVecTy || !DstVecTy)
1615 if (SrcEltBits != 32 && SrcEltBits != 64)
1618 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1635 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1638 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1641 if (!BC->hasNUsesOrMore(MinSelects))
1646 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1648 for (User *U : BC->users()) {
1653 for (User *ExtUser : Ext->users()) {
1657 Cond->getType()->isIntegerTy(1))
1662 if (CondToSelects.
empty())
1665 bool MadeChange =
false;
1666 Value *SrcVec = BC->getOperand(0);
1669 for (
auto [
Cond, Selects] : CondToSelects) {
1671 if (Selects.size() < MinSelects) {
1672 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1673 <<
"profitable (VecCost=" << VecSelCost
1674 <<
", ScalarCost=" << ScalarSelCost
1675 <<
", NumSelects=" << Selects.size() <<
")\n");
1680 auto InsertPt = std::next(BC->getIterator());
1684 InsertPt = std::next(CondInst->getIterator());
1692 for (SelectInst *Sel : Selects) {
1694 Value *Idx = Ext->getIndexOperand();
1698 replaceValue(*Sel, *NewExt);
1703 <<
" selects into vector select\n");
1717 unsigned ReductionOpc =
1723 CostBeforeReduction =
1724 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1726 CostAfterReduction =
1727 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1731 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1737 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1744 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1747 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1749 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1752 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1753 CostAfterReduction =
TTI.getMulAccReductionCost(
1754 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1757 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1761bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1764 if (BinOpOpc == Instruction::Sub)
1765 ReductionIID = Intrinsic::vector_reduce_add;
1769 if (ReductionIID == Intrinsic::vector_reduce_fadd ||
1770 ReductionIID == Intrinsic::vector_reduce_fmul)
1773 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1778 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1779 return II->getArgOperand(0);
1783 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1786 Value *
V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1791 if (
V1->getType() != VTy)
1795 unsigned ReductionOpc =
1808 CostOfRedOperand0 + CostOfRedOperand1 +
1811 if (NewCost >= OldCost || !NewCost.
isValid())
1815 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1818 if (BinOpOpc == Instruction::Or)
1825 replaceValue(
I, *Rdx);
1833 unsigned NumScanned = 0;
1834 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1843class ScalarizationResult {
1844 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1849 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1850 : Status(Status), ToFreeze(ToFreeze) {}
1853 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1854 ~ScalarizationResult() {
1855 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1858 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1859 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1860 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1861 return {StatusTy::SafeWithFreeze, ToFreeze};
1865 bool isSafe()
const {
return Status == StatusTy::Safe; }
1867 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1870 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1875 Status = StatusTy::Unsafe;
1879 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1880 assert(isSafeWithFreeze() &&
1881 "should only be used when freezing is required");
1883 "UserI must be a user of ToFreeze");
1884 IRBuilder<>::InsertPointGuard Guard(Builder);
1889 if (
U.get() == ToFreeze)
1904 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1908 if (
C->getValue().ult(NumElements))
1909 return ScalarizationResult::safe();
1910 return ScalarizationResult::unsafe();
1915 return ScalarizationResult::unsafe();
1917 APInt Zero(IntWidth, 0);
1918 APInt MaxElts(IntWidth, NumElements);
1925 return ScalarizationResult::safe();
1926 return ScalarizationResult::unsafe();
1939 if (ValidIndices.
contains(IdxRange))
1940 return ScalarizationResult::safeWithFreeze(IdxBase);
1941 return ScalarizationResult::unsafe();
1953 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1965bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1977 if (!
match(
SI->getValueOperand(),
1984 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1987 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1988 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1989 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1992 auto ScalarizableIdx =
1994 if (ScalarizableIdx.isUnsafe() ||
2001 Worklist.
push(Load);
2003 if (ScalarizableIdx.isSafeWithFreeze())
2006 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
2007 {ConstantInt::get(Idx->getType(), 0), Idx});
2011 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
2014 replaceValue(
I, *NSI);
2024bool VectorCombine::scalarizeLoad(Instruction &
I) {
2034 if (!LI->isSimple() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2037 bool AllExtracts =
true;
2038 bool AllBitcasts =
true;
2040 unsigned NumInstChecked = 0;
2045 for (User *U : LI->users()) {
2047 if (!UI || UI->getParent() != LI->getParent())
2052 if (UI->use_empty())
2056 AllExtracts =
false;
2058 AllBitcasts =
false;
2062 for (Instruction &
I :
2063 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2070 LastCheckedInst = UI;
2075 return scalarizeLoadExtract(LI, VecTy, Ptr);
2077 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2082bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2087 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2090 for (
auto &Pair : NeedFreeze)
2091 Pair.second.discard();
2099 for (User *U : LI->
users()) {
2104 if (ScalarIdx.isUnsafe())
2106 if (ScalarIdx.isSafeWithFreeze()) {
2107 NeedFreeze.try_emplace(UI, ScalarIdx);
2108 ScalarIdx.discard();
2114 Index ?
Index->getZExtValue() : -1);
2122 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2123 <<
"\n LoadExtractCost: " << OriginalCost
2124 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2126 if (ScalarizedCost >= OriginalCost)
2133 Type *ElemType = VecTy->getElementType();
2136 for (User *U : LI->
users()) {
2138 Value *Idx = EI->getIndexOperand();
2141 auto It = NeedFreeze.find(EI);
2142 if (It != NeedFreeze.end())
2149 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2151 Align ScalarOpAlignment =
2153 NewLoad->setAlignment(ScalarOpAlignment);
2156 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2161 replaceValue(*EI, *NewLoad,
false);
2164 FailureGuard.release();
2169bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2175 Type *TargetScalarType =
nullptr;
2176 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2178 for (User *U : LI->
users()) {
2181 Type *DestTy = BC->getDestTy();
2185 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2186 if (DestBitWidth != VecBitWidth)
2190 if (!TargetScalarType)
2191 TargetScalarType = DestTy;
2192 else if (TargetScalarType != DestTy)
2200 if (!TargetScalarType)
2208 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2209 <<
"\n OriginalCost: " << OriginalCost
2210 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2212 if (ScalarizedCost >= OriginalCost)
2223 ScalarLoad->copyMetadata(*LI);
2226 for (User *U : LI->
users()) {
2228 replaceValue(*BC, *ScalarLoad,
false);
2234bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2249 Type *ScalarDstTy = DstTy->getElementType();
2250 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2256 unsigned ExtCnt = 0;
2257 bool ExtLane0 =
false;
2258 for (User *U : Ext->users()) {
2272 Instruction::And, ScalarDstTy,
CostKind,
2275 (ExtCnt - ExtLane0) *
2277 Instruction::LShr, ScalarDstTy,
CostKind,
2280 if (ScalarCost > VectorCost)
2283 Value *ScalarV = Ext->getOperand(0);
2290 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2291 bool AllExtractsTriggerUB =
true;
2292 ExtractElementInst *LastExtract =
nullptr;
2294 for (User *U : Ext->users()) {
2297 AllExtractsTriggerUB =
false;
2301 if (!LastExtract || LastExtract->
comesBefore(Extract))
2302 LastExtract = Extract;
2304 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2305 !AllExtractsTriggerUB ||
2313 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2314 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2317 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2318 for (User *U : Ext->users()) {
2324 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2325 : (Idx * SrcEltSizeInBits);
2328 U->replaceAllUsesWith(
And);
2336bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2337 Type *Ty =
I.getType();
2342 if (
DL->isBigEndian())
2353 uint64_t ShAmtX = 0;
2361 uint64_t ShAmtY = 0;
2369 if (ShAmtX > ShAmtY) {
2377 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2378 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2383 MaskTy->getNumElements() != ShAmtDiff ||
2384 MaskTy->getNumElements() > (
BitWidth / 2))
2389 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2390 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2393 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2410 if (Ty != ConcatIntTy)
2416 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2417 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2420 if (NewCost > OldCost)
2430 if (Ty != ConcatIntTy) {
2440 replaceValue(
I, *Result);
2446bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2447 BinaryOperator *BinOp;
2448 ArrayRef<int> OuterMask;
2456 Value *Op00, *Op01, *Op10, *Op11;
2457 ArrayRef<int> Mask0, Mask1;
2462 if (!Match0 && !Match1)
2475 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2478 unsigned NumSrcElts = BinOpTy->getNumElements();
2483 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2487 SmallVector<int> NewMask0, NewMask1;
2488 for (
int M : OuterMask) {
2489 if (M < 0 || M >= (
int)NumSrcElts) {
2493 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2494 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2498 unsigned NumOpElts = Op0Ty->getNumElements();
2499 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2500 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2502 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2503 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2512 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2513 0,
nullptr, {BinOp}, &
I);
2515 NewCost += BinOpCost;
2521 OldCost += Shuf0Cost;
2523 NewCost += Shuf0Cost;
2529 OldCost += Shuf1Cost;
2531 NewCost += Shuf1Cost;
2539 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2543 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2545 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2546 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2550 if (NewCost > OldCost)
2561 NewInst->copyIRFlags(BinOp);
2565 replaceValue(
I, *NewBO);
2571bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2572 ArrayRef<int> OldMask;
2579 if (
LHS->getOpcode() !=
RHS->getOpcode())
2583 bool IsCommutative =
false;
2592 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2603 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2606 bool SameBinOp =
LHS ==
RHS;
2607 unsigned NumSrcElts = BinOpTy->getNumElements();
2610 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2613 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2614 if (M >= (
int)NumSrcElts)
2618 SmallVector<int> NewMask0(OldMask);
2627 SmallVector<int> NewMask1(OldMask);
2646 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2656 ArrayRef<int> InnerMask;
2658 m_Mask(InnerMask)))) &&
2661 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2673 bool ReducedInstCount =
false;
2674 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2675 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2676 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2677 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2678 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2683 auto *ShuffleCmpTy =
2686 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2687 if (!SingleSrcBinOp)
2697 PredLHS,
CostKind, Op0Info, Op1Info);
2707 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2714 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2723 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2727 NewInst->copyIRFlags(
LHS);
2728 NewInst->andIRFlags(
RHS);
2733 replaceValue(
I, *NewBO);
2740bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2742 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2753 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2759 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2760 ((SI0FOp !=
nullptr) &&
2761 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2767 auto SelOp = Instruction::Select;
2775 CostSel1 + CostSel2 +
2777 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2781 Mask,
CostKind, 0,
nullptr, {C1, C2});
2791 if (!Sel1->hasOneUse())
2792 NewCost += CostSel1;
2793 if (!Sel2->hasOneUse())
2794 NewCost += CostSel2;
2797 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2799 if (NewCost > OldCost)
2808 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2809 SI0FOp->getFastMathFlags());
2811 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2816 replaceValue(
I, *NewSel);
2822bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2824 ArrayRef<int> OldMask;
2833 if (!C0 || (IsBinaryShuffle && !C1))
2840 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2843 if (IsBinaryShuffle) {
2844 if (C0->getSrcTy() != C1->getSrcTy())
2847 if (Opcode != C1->getOpcode()) {
2849 Opcode = Instruction::SExt;
2858 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2861 unsigned NumSrcElts = CastSrcTy->getNumElements();
2862 unsigned NumDstElts = CastDstTy->getNumElements();
2863 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2864 "Only bitcasts expected to alter src/dst element counts");
2868 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2869 (NumDstElts % NumSrcElts) != 0)
2872 SmallVector<int, 16> NewMask;
2873 if (NumSrcElts >= NumDstElts) {
2876 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2877 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2882 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2883 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2888 auto *NewShuffleDstTy =
2897 if (IsBinaryShuffle)
2912 if (IsBinaryShuffle) {
2922 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2924 if (NewCost > OldCost)
2928 if (IsBinaryShuffle)
2938 NewInst->copyIRFlags(C0);
2939 if (IsBinaryShuffle)
2940 NewInst->andIRFlags(C1);
2944 replaceValue(
I, *Cast);
2954bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2955 ArrayRef<int> OuterMask;
2956 Value *OuterV0, *OuterV1;
2961 ArrayRef<int> InnerMask0, InnerMask1;
2962 Value *X0, *X1, *Y0, *Y1;
2967 if (!Match0 && !Match1)
2972 SmallVector<int, 16> PoisonMask1;
2977 InnerMask1 = PoisonMask1;
2981 X0 = Match0 ? X0 : OuterV0;
2982 Y0 = Match0 ? Y0 : OuterV0;
2983 X1 = Match1 ? X1 : OuterV1;
2984 Y1 = Match1 ? Y1 : OuterV1;
2988 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2992 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2993 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2998 SmallVector<int, 16> NewMask(OuterMask);
2999 Value *NewX =
nullptr, *NewY =
nullptr;
3000 for (
int &M : NewMask) {
3001 Value *Src =
nullptr;
3002 if (0 <= M && M < (
int)NumImmElts) {
3006 Src =
M >= (int)NumSrcElts ? Y0 : X0;
3007 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3009 }
else if (M >= (
int)NumImmElts) {
3014 Src =
M >= (int)NumSrcElts ? Y1 : X1;
3015 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3019 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3028 if (!NewX || NewX == Src) {
3032 if (!NewY || NewY == Src) {
3051 replaceValue(
I, *NewX);
3068 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3074 nullptr, {NewX, NewY});
3076 NewCost += InnerCost0;
3078 NewCost += InnerCost1;
3081 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3083 if (NewCost > OldCost)
3087 replaceValue(
I, *Shuf);
3103bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3108 unsigned ChainLength = 0;
3109 SmallVector<int>
Mask;
3110 SmallVector<int> YMask;
3120 ArrayRef<int> OuterMask;
3121 Value *OuterV0, *OuterV1;
3122 if (ChainLength != 0 && !Trunk->
hasOneUse())
3125 m_Mask(OuterMask))))
3127 if (OuterV0->
getType() != TrunkType) {
3133 ArrayRef<int> InnerMask0, InnerMask1;
3134 Value *A0, *A1, *B0, *B1;
3139 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3140 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3141 if (Match0Leaf == Match1Leaf) {
3147 SmallVector<int> CommutedOuterMask;
3154 for (
int &M : CommutedOuterMask) {
3157 if (M < (
int)NumTrunkElts)
3162 OuterMask = CommutedOuterMask;
3181 int NumLeafElts = YType->getNumElements();
3182 SmallVector<int> LocalYMask(InnerMask1);
3183 for (
int &M : LocalYMask) {
3184 if (M >= NumLeafElts)
3194 Mask.assign(OuterMask);
3195 YMask.
assign(LocalYMask);
3196 OldCost = NewCost = LocalOldCost;
3203 SmallVector<int> NewYMask(YMask);
3205 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3206 if (LeafM == -1 || CombinedM == LeafM)
3208 if (CombinedM == -1) {
3218 SmallVector<int> NewMask;
3219 NewMask.
reserve(NumTrunkElts);
3220 for (
int M : Mask) {
3221 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3236 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3240 if (ChainLength == 1) {
3241 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3244 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3245 <<
" old cost: " << (OldCost + LocalOldCost)
3246 <<
" new cost: " << LocalNewCost <<
'\n';
3251 OldCost += LocalOldCost;
3252 NewCost = LocalNewCost;
3256 if (ChainLength <= 1)
3264 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3267 for (
int &M : Mask) {
3268 if (M >=
static_cast<int>(NumTrunkElts))
3269 M = YMask[
M - NumTrunkElts];
3273 replaceValue(
I, *Root);
3280 replaceValue(
I, *Root);
3286bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3288 ArrayRef<int> OldMask;
3298 if (IID != II1->getIntrinsicID())
3307 if (!ShuffleDstTy || !II0Ty)
3313 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3314 Value *Arg0 = II0->getArgOperand(
I);
3315 Value *Arg1 = II1->getArgOperand(
I);
3332 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3336 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3337 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3339 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3343 ShuffleDstTy->getNumElements());
3345 std::pair<Value *, Value *> OperandPair =
3346 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3347 if (!SeenOperandPairs.
insert(OperandPair).second) {
3353 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3356 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3359 if (!II0->hasOneUse())
3361 if (II1 != II0 && !II1->hasOneUse())
3365 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3368 if (NewCost > OldCost)
3372 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3373 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3377 std::pair<Value *, Value *> OperandPair =
3378 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3379 auto It = ShuffleCache.
find(OperandPair);
3380 if (It != ShuffleCache.
end()) {
3386 II1->getArgOperand(
I), OldMask);
3387 ShuffleCache[OperandPair] = Shuf;
3395 NewInst->copyIRFlags(II0);
3396 NewInst->andIRFlags(II1);
3399 replaceValue(
I, *NewIntrinsic);
3405bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3417 if (!ShuffleDstTy || !IntrinsicSrcTy)
3421 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3422 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3435 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3439 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3441 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3445 ShuffleDstTy->getNumElements());
3448 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3449 {II0->getArgOperand(
I)});
3452 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3457 if (!II0->hasOneUse())
3460 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3461 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3463 if (NewCost > OldCost)
3468 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3481 NewInst->copyIRFlags(II0);
3483 replaceValue(
I, *NewIntrinsic);
3493 int M = SV->getMaskValue(Lane);
3496 if (
static_cast<unsigned>(M) < NumElts) {
3497 V = SV->getOperand(0);
3500 V = SV->getOperand(1);
3511 auto [U, Lane] = IL;
3524 unsigned NumElts = Ty->getNumElements();
3525 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3531 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3537 unsigned NumSlices = Item.
size() / NumElts;
3542 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3543 Value *SliceV = Item[Slice * NumElts].first;
3544 if (!SliceV || SliceV->
getType() != Ty)
3546 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3547 auto [V, Lane] = Item[Slice * NumElts + Elt];
3548 if (Lane !=
static_cast<int>(Elt) || SliceV != V)
3557 const DenseSet<std::pair<Value *, Use *>> &IdentityLeafs,
3558 const DenseSet<std::pair<Value *, Use *>> &SplatLeafs,
3559 const DenseSet<std::pair<Value *, Use *>> &ConcatLeafs,
3561 auto [FrontV, FrontLane] = Item.
front();
3563 if (IdentityLeafs.contains(std::make_pair(FrontV, From))) {
3566 if (SplatLeafs.contains(std::make_pair(FrontV, From))) {
3568 return Builder.CreateShuffleVector(FrontV, Mask);
3570 if (ConcatLeafs.contains(std::make_pair(FrontV, From))) {
3574 for (
unsigned S = 0; S <
Values.size(); ++S)
3575 Values[S] = Item[S * NumElts].first;
3577 while (
Values.size() > 1) {
3580 std::iota(Mask.begin(), Mask.end(), 0);
3582 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3584 Builder.CreateShuffleVector(
Values[S * 2],
Values[S * 2 + 1], Mask);
3592 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3594 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3597 Ops[Idx] =
II->getOperand(Idx);
3601 &
I->getOperandUse(Idx), Ty, IdentityLeafs,
3602 SplatLeafs, ConcatLeafs, Builder,
TTI);
3606 for (
const auto &Lane : Item)
3619 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3629 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3634 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3648bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3650 if (!Ty ||
I.use_empty())
3654 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3658 Worklist.
push_back(std::make_pair(Start, &*
I.use_begin()));
3659 DenseSet<std::pair<Value *, Use *>> IdentityLeafs, SplatLeafs, ConcatLeafs;
3660 unsigned NumVisited = 0;
3662 while (!Worklist.
empty()) {
3667 auto Item = ItemFrom.first;
3668 auto From = ItemFrom.second;
3669 auto [FrontV, FrontLane] = Item.front();
3677 return X->getType() ==
Y->getType() &&
3682 if (FrontLane == 0 &&
3684 Ty->getNumElements() &&
3686 Value *FrontV = Item.front().first;
3687 return !
E.value().first || (IsEquiv(
E.value().first, FrontV) &&
3688 E.value().second == (int)
E.index());
3690 IdentityLeafs.
insert(std::make_pair(FrontV, From));
3695 C &&
C->getSplatValue() &&
3697 Value *FrontV = Item.front().first;
3703 SplatLeafs.
insert(std::make_pair(FrontV, From));
3708 auto [FrontV, FrontLane] = Item.front();
3709 auto [
V, Lane] = IL;
3710 return !
V || (
V == FrontV && Lane == FrontLane);
3712 SplatLeafs.
insert(std::make_pair(FrontV, From));
3718 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3719 Value *FrontV = Item.front().first;
3728 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3731 if (CI->getSrcTy()->getScalarType() !=
3736 SI->getOperand(0)->getType() !=
3743 II->getIntrinsicID() ==
3745 !
II->hasOperandBundles());
3752 BO && BO->isIntDivRem())
3759 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3760 FPToUIInst, SIToFPInst, UIToFPInst>(FrontV)) {
3768 if (DstTy && SrcTy &&
3769 SrcTy->getNumElements() == DstTy->getNumElements()) {
3771 &BitCast->getOperandUse(0));
3776 &Sel->getOperandUse(0));
3778 &Sel->getOperandUse(1));
3780 &Sel->getOperandUse(2));
3784 !
II->hasOperandBundles()) {
3785 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3789 Value *FrontV = Item.front().first;
3805 ConcatLeafs.
insert(std::make_pair(FrontV, From));
3812 if (NumVisited <= 1)
3815 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3821 SplatLeafs, ConcatLeafs, Builder, &
TTI);
3822 replaceValue(
I, *V);
3829bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3833 switch (
II->getIntrinsicID()) {
3834 case Intrinsic::vector_reduce_add:
3835 case Intrinsic::vector_reduce_mul:
3836 case Intrinsic::vector_reduce_and:
3837 case Intrinsic::vector_reduce_or:
3838 case Intrinsic::vector_reduce_xor:
3839 case Intrinsic::vector_reduce_smin:
3840 case Intrinsic::vector_reduce_smax:
3841 case Intrinsic::vector_reduce_umin:
3842 case Intrinsic::vector_reduce_umax:
3851 std::queue<Value *> Worklist;
3852 SmallPtrSet<Value *, 4> Visited;
3853 ShuffleVectorInst *Shuffle =
nullptr;
3857 while (!Worklist.empty()) {
3858 Value *CV = Worklist.front();
3870 if (CI->isBinaryOp()) {
3871 for (
auto *
Op : CI->operand_values())
3875 if (Shuffle && Shuffle != SV)
3892 for (
auto *V : Visited)
3893 for (
auto *U :
V->users())
3894 if (!Visited.contains(U) && U != &
I)
3897 FixedVectorType *VecType =
3901 FixedVectorType *ShuffleInputType =
3903 if (!ShuffleInputType)
3909 SmallVector<int> ConcatMask;
3911 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3912 bool UsesSecondVec =
3913 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3920 ShuffleInputType, ConcatMask,
CostKind);
3922 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3924 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3926 bool MadeChanges =
false;
3927 if (NewCost < OldCost) {
3931 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3932 replaceValue(*Shuffle, *NewShuffle);
3938 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3959bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3968 if (FVT->getNumElements() < 2)
3971 std::optional<Instruction::BinaryOps> CommonBinOp;
3972 std::optional<Intrinsic::ID> CommonCallOp;
3977 CommonBinOp = BO->getOpcode();
3979 CommonCallOp = MMI->getIntrinsicID();
3985 FastMathFlags CommonFMF;
3986 bool IsFloatReduction =
false;
3990 auto IsChainNode = [&](
Value *
V) {
3992 return CommonBinOp && BO->getOpcode() == *CommonBinOp;
3994 return CommonCallOp && MMI->getIntrinsicID() == *CommonCallOp;
4002 constexpr unsigned MaxChainNodes = 32;
4003 SmallSetVector<Value *, 16> Nodes;
4004 SmallSetVector<Value *, 4> Sources;
4005 unsigned NumVisited = 0;
4006 auto AddSource = [&](
Value *
V) {
4012 auto Walk = [&](
Value *
V,
auto &&Walk) ->
bool {
4015 if (++NumVisited > MaxChainNodes)
4017 if (!IsChainNode(V))
4018 return AddSource(V);
4023 if (!Walk(
U->getOperand(
I), Walk))
4032 return AddSource(V);
4034 if (!Walk(VecOpEE, Walk) || Nodes.
empty())
4041 for (
Value *V : Nodes) {
4047 if (!IsFloatReduction) {
4049 IsFloatReduction =
true;
4063 DenseMap<Value *, Demand> Demands;
4064 auto DemandOf = [&](
Value *
V) -> Demand & {
4066 Demand &
D = Demands[
V];
4067 if (
D.Lanes.getBitWidth() !=
N)
4071 DemandOf(VecOpEE).Lanes.setBit(0);
4073 Demand DV = Demands.
lookup(V);
4074 if (DV.Lanes.isZero())
4077 ArrayRef<int>
Mask = SVI->getShuffleMask();
4078 Demand &
DS = DemandOf(SVI->getOperand(0));
4079 for (
unsigned I = 0,
E =
Mask.size();
I !=
E; ++
I) {
4081 if (!DV.Lanes[
I] || Mask[
I] < 0 ||
4082 (
unsigned)Mask[
I] >=
DS.Lanes.getBitWidth())
4084 if (
DS.Lanes[Mask[
I]] || DV.Duplicates[
I])
4085 DS.Duplicates.setBit(Mask[
I]);
4086 DS.Lanes.setBit(Mask[
I]);
4090 for (
Value *
Op : {
U->getOperand(0),
U->getOperand(1)}) {
4091 Demand &DOp = DemandOf(
Op);
4093 DOp.Duplicates |= DV.Duplicates | (DOp.Lanes & DV.Lanes);
4094 DOp.Lanes |= DV.Lanes;
4101 auto CoversChain = [&](
Value *
V) {
4102 SmallVector<Value *, 8> Worklist(1, VecOpEE);
4103 SmallPtrSet<Value *, 8> Seen;
4105 while (!Worklist.empty()) {
4108 for (
unsigned I = 0;
I !=
NumOps; ++
I) {
4112 if (!Nodes.contains(
Op))
4114 Worklist.push_back(
Op);
4122 struct ReductionCut {
4126 std::optional<ReductionCut> Cut;
4127 for (
Value *S : Sources) {
4128 auto It = Demands.
find(S);
4129 if (It == Demands.
end() || It->second.Lanes.isZero())
4131 if (Cut || (!IsIdempotent && !It->second.Duplicates.isZero())) {
4135 Cut = ReductionCut{S, It->second.Lanes};
4138 for (
Value *V : Nodes) {
4141 auto It = Demands.
find(V);
4142 if (It == Demands.
end() || !It->second.Lanes.isAllOnes())
4144 if (!IsIdempotent && !It->second.Duplicates.isZero())
4146 if (!CoversChain(V))
4148 Cut = ReductionCut{
V, It->second.Lanes};
4153 if (!Cut || Cut->Elts.popcount() < 2)
4163 for (
Value *V : Nodes)
4167 bool IsPartialReduction = !Cut->Elts.isAllOnes();
4168 FixedVectorType *ReduceVecTy =
4173 SmallVector<int> ExtractMask;
4175 if (IsPartialReduction) {
4176 for (
unsigned I = 0,
E = Cut->Elts.getBitWidth();
I !=
E; ++
I)
4178 ExtractMask.push_back(
I);
4179 unsigned SubIdx = 0, SubLen;
4180 auto SK = Cut->Elts.isShiftedMask(SubIdx, SubLen)
4184 SubIdx, ReduceVecTy);
4187 IntrinsicCostAttributes ICA(
4188 ReducedOp, ReduceVecTy->getElementType(),
4192 IsFloatReduction ? CommonFMF : FastMathFlags());
4195 LLVM_DEBUG(
dbgs() <<
"Found reduction shuffle chain: " <<
I <<
"\n OldCost : "
4196 << OrigCost <<
" vs NewCost: " << NewCost <<
"\n");
4201 if (VecOpEE->
hasOneUse() ? (NewCost > OrigCost) : (NewCost >= OrigCost))
4204 Value *ReduceInput = Cut->Src;
4205 if (IsPartialReduction)
4208 Value *ReducedResult;
4209 if (IsFloatReduction) {
4211 *CommonBinOp, ReduceVecTy->getElementType(),
false,
4214 {Identity, ReduceInput}, CommonFMF);
4219 replaceValue(
I, *ReducedResult);
4228bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4233 bool TruncOnly =
false;
4236 case Intrinsic::vector_reduce_add:
4237 case Intrinsic::vector_reduce_mul:
4240 case Intrinsic::vector_reduce_and:
4241 case Intrinsic::vector_reduce_or:
4242 case Intrinsic::vector_reduce_xor:
4249 Value *ReductionSrc =
I.getOperand(0);
4261 Type *ResultTy =
I.getType();
4264 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4274 if (OldCost <= NewCost || !NewCost.
isValid())
4278 II->getIntrinsicID(), {Src});
4280 replaceValue(
I, *NewCast);
4308bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4310 IntrinsicInst *ReduceOp;
4311 const APInt *CmpVal;
4318 case Intrinsic::vector_reduce_or:
4319 case Intrinsic::vector_reduce_umax:
4320 case Intrinsic::vector_reduce_and:
4321 case Intrinsic::vector_reduce_umin:
4322 case Intrinsic::vector_reduce_add:
4333 unsigned BitWidth = VecTy->getScalarSizeInBits();
4337 unsigned NumElts = VecTy->getNumElements();
4346 case Intrinsic::vector_reduce_or:
4347 case Intrinsic::vector_reduce_umax:
4348 TreeOpcode = Instruction::Or;
4350 case Intrinsic::vector_reduce_and:
4351 case Intrinsic::vector_reduce_umin:
4352 TreeOpcode = Instruction::And;
4354 case Intrinsic::vector_reduce_add:
4355 TreeOpcode = Instruction::Add;
4363 SmallVector<Value *, 8> Worklist;
4364 SmallVector<Value *, 8> Sources;
4366 std::optional<bool> IsAShr;
4367 constexpr unsigned MaxSources = 8;
4372 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4373 Sources.
size() <= MaxSources) {
4382 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4384 IsAShr = ThisIsAShr;
4385 else if (*IsAShr != ThisIsAShr)
4411 if (Sources.
empty() || Sources.
size() > MaxSources ||
4412 Worklist.
size() > MaxSources || !IsAShr)
4415 unsigned NumSources = Sources.
size();
4419 if (OrigIID == Intrinsic::vector_reduce_add &&
4427 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4430 NegativeVal.negate();
4462 TestsNegative =
false;
4463 }
else if (*CmpVal == NegativeVal) {
4464 TestsNegative =
true;
4468 IsEq = Pred == ICmpInst::ICMP_EQ;
4469 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4471 TestsNegative = (RangeHigh == NegativeVal);
4472 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4474 TestsNegative = (RangeHigh == NegativeVal);
4475 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4477 TestsNegative = (RangeLow == NegativeVal);
4478 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4480 TestsNegative = (RangeLow == NegativeVal);
4523 enum CheckKind :
unsigned {
4530 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4532 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4534 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4538 case Intrinsic::vector_reduce_or:
4539 case Intrinsic::vector_reduce_umax:
4540 Base = TestsNegative ? AnyNeg : AllNonNeg;
4542 case Intrinsic::vector_reduce_and:
4543 case Intrinsic::vector_reduce_umin:
4544 Base = TestsNegative ? AllNeg : AnyNonNeg;
4546 case Intrinsic::vector_reduce_add:
4547 Base = TestsNegative ? AllNeg : AllNonNeg;
4562 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4563 : std::make_pair(MinMax, MinMaxCost);
4567 auto [NewIID, NewCost] = RequiresOr(
Check)
4568 ? PickCheaper(Intrinsic::vector_reduce_or,
4569 Intrinsic::vector_reduce_umax)
4570 : PickCheaper(
Intrinsic::vector_reduce_and,
4574 if (NumSources > 1) {
4575 unsigned CombineOpc =
4576 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4581 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4582 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4584 if (NewCost > OldCost)
4589 Type *ScalarTy = VecTy->getScalarType();
4592 if (NumSources == 1) {
4603 replaceValue(
I, *NewCmp);
4634bool VectorCombine::foldReductionZeroTest(Instruction &
I) {
4643 if (!
II || !
II->hasOneUse())
4646 auto ReduceID =
II->getIntrinsicID();
4647 if (ReduceID != Intrinsic::vector_reduce_or &&
4648 ReduceID != Intrinsic::vector_reduce_umax)
4651 Value *Vec =
II->getArgOperand(0);
4653 if (!VecTy || !VecTy->getElementType()->isIntegerTy())
4658 ? Intrinsic::vector_reduce_or
4673 LLVM_DEBUG(
dbgs() <<
"Found a reduction zero test: " <<
I <<
"\n OldCost: "
4674 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4676 if (!OldCost.
isValid() || !NewCost.
isValid() || NewCost > OldCost)
4682 replaceValue(
I, *NewReduce);
4707bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4718 switch (
II->getIntrinsicID()) {
4719 case Intrinsic::vector_reduce_add:
4720 case Intrinsic::vector_reduce_or:
4721 case Intrinsic::vector_reduce_umin:
4722 case Intrinsic::vector_reduce_umax:
4723 case Intrinsic::vector_reduce_smin:
4724 case Intrinsic::vector_reduce_smax:
4730 Value *InnerOp =
II->getArgOperand(0);
4773 switch (
II->getIntrinsicID()) {
4774 case Intrinsic::vector_reduce_add: {
4779 unsigned NumElems = XTy->getNumElements();
4785 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4793 case Intrinsic::vector_reduce_smin:
4794 case Intrinsic::vector_reduce_smax:
4804 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4820 case Intrinsic::vector_reduce_add:
4821 case Intrinsic::vector_reduce_or:
4827 case Intrinsic::vector_reduce_umin:
4828 case Intrinsic::vector_reduce_umax:
4829 case Intrinsic::vector_reduce_smin:
4830 case Intrinsic::vector_reduce_smax:
4842 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4844 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4845 << *InnerOp <<
"\n OldCost: " << OldCost
4846 <<
" vs NewCost: " << NewCost <<
"\n");
4852 if (NewCost > OldCost)
4861 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4862 replaceValue(
I, *NewCmp);
4893bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4896 const APInt *CmpVal;
4901 if (!
II || !
II->hasOneUse())
4904 const auto IsValidOrUmaxCmp = [&]() {
4913 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4915 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4916 Pred == ICmpInst::ICMP_SLT;
4917 return IsEquality || IsPositive || IsNegative;
4920 const auto IsValidAndUminCmp = [&]() {
4925 const auto LeadingOnes = CmpVal->
countl_one();
4932 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4941 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4942 return IsEquality || IsNegative || IsPositive;
4950 switch (OriginalIID) {
4951 case Intrinsic::vector_reduce_or:
4952 if (!IsValidOrUmaxCmp())
4954 AlternativeIID = Intrinsic::vector_reduce_umax;
4956 case Intrinsic::vector_reduce_umax:
4957 if (!IsValidOrUmaxCmp())
4959 AlternativeIID = Intrinsic::vector_reduce_or;
4961 case Intrinsic::vector_reduce_and:
4962 if (!IsValidAndUminCmp())
4964 AlternativeIID = Intrinsic::vector_reduce_umin;
4966 case Intrinsic::vector_reduce_umin:
4967 if (!IsValidAndUminCmp())
4969 AlternativeIID = Intrinsic::vector_reduce_and;
4982 if (ReductionOpc != Instruction::ICmp)
4993 <<
"\n OrigCost: " << OrigCost
4994 <<
" vs AltCost: " << AltCost <<
"\n");
4996 if (AltCost >= OrigCost)
5000 Type *ScalarTy = VecTy->getScalarType();
5003 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
5005 replaceValue(
I, *NewCmp);
5019 unsigned Depth = 0) {
5020 constexpr unsigned MaxLocalDepth = 2;
5021 if (
Depth > MaxLocalDepth)
5024 auto NumSignBits = [&](
const Value *
X) {
5027 if (NumSignBits(V) == V->getType()->getScalarSizeInBits())
5032 return NumSignBits(
A) >= 2 && NumSignBits(
B) >= 2 &&
5043bool VectorCombine::foldReduceAddCmpZero(Instruction &
I) {
5053 if (!VecTy || VecTy->getNumElements() < 2)
5059 if (!IsNonNegative && !IsNonPositive)
5064 unsigned NumElts = VecTy->getNumElements();
5066 if (
Log2_32(NumElts) >= NumSignBits)
5069 ICmpInst::Predicate NewPred;
5071 case ICmpInst::ICMP_EQ:
5072 case ICmpInst::ICMP_ULE:
5073 case ICmpInst::ICMP_SLE:
5074 case ICmpInst::ICMP_SGE:
5075 NewPred = ICmpInst::ICMP_EQ;
5077 case ICmpInst::ICMP_NE:
5078 case ICmpInst::ICMP_UGT:
5079 case ICmpInst::ICMP_SGT:
5080 case ICmpInst::ICMP_SLT:
5081 NewPred = ICmpInst::ICMP_NE;
5091 if (!IsNonNegative &&
5092 (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE))
5094 if (!IsNonPositive &&
5095 (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE))
5097 if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE ||
5098 Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) &&
5099 Log2_32(NumElts) >= NumSignBits - 1)
5103 Instruction::Add, VecTy, std::nullopt,
CostKind);
5105 Instruction::Or, VecTy, std::nullopt,
CostKind);
5107 Intrinsic::umax, VecTy, FastMathFlags(),
CostKind);
5110 bool UseOr = OrCost.
isValid() && (!UmaxCost.
isValid() || OrCost <= UmaxCost);
5112 if (AltCost > OrigCost)
5118 Intrinsic::vector_reduce_umax, {VecTy}, {Vec});
5119 Worklist.pushValue(NewReduce);
5121 NewPred, NewReduce, ConstantInt::getNullValue(VecTy->getScalarType()));
5122 replaceValue(
I, *NewCmp);
5131 constexpr unsigned MaxVisited = 32;
5134 bool FoundReduction =
false;
5137 while (!WorkList.
empty()) {
5139 for (
User *U :
I->users()) {
5141 if (!UI || !Visited.
insert(UI).second)
5143 if (Visited.
size() > MaxVisited)
5149 switch (
II->getIntrinsicID()) {
5150 case Intrinsic::vector_reduce_add:
5151 case Intrinsic::vector_reduce_mul:
5152 case Intrinsic::vector_reduce_and:
5153 case Intrinsic::vector_reduce_or:
5154 case Intrinsic::vector_reduce_xor:
5155 case Intrinsic::vector_reduce_smin:
5156 case Intrinsic::vector_reduce_smax:
5157 case Intrinsic::vector_reduce_umin:
5158 case Intrinsic::vector_reduce_umax:
5159 FoundReduction =
true;
5172 return FoundReduction;
5185bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
5190 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
5198 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
5200 if (!
I ||
I->getOperand(0)->getType() != VT)
5202 return any_of(
I->users(), [&](User *U) {
5203 return U != Op0 && U != Op1 &&
5204 !(isa<ShuffleVectorInst>(U) &&
5205 (InputShuffles.contains(cast<Instruction>(U)) ||
5206 isInstructionTriviallyDead(cast<Instruction>(U))));
5209 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
5210 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
5218 for (
auto *U :
I->users()) {
5220 if (!SV || SV->getType() != VT)
5222 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
5223 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
5230 if (!collectShuffles(Op0) || !collectShuffles(Op1))
5234 if (FromReduction && Shuffles.
size() > 1)
5239 if (!FromReduction) {
5240 for (
size_t Idx = 0,
E = Shuffles.
size(); Idx !=
E; ++Idx) {
5241 for (
auto *U : Shuffles[Idx]->
users()) {
5256 int MaxV1Elt = 0, MaxV2Elt = 0;
5257 unsigned NumElts = VT->getNumElements();
5258 for (ShuffleVectorInst *SVN : Shuffles) {
5259 SmallVector<int>
Mask;
5260 SVN->getShuffleMask(Mask);
5264 Value *SVOp0 = SVN->getOperand(0);
5265 Value *SVOp1 = SVN->getOperand(1);
5270 for (
int &Elem : Mask) {
5276 if (SVOp0 == Op1 && SVOp1 == Op0) {
5280 if (SVOp0 != Op0 || SVOp1 != Op1)
5286 SmallVector<int> ReconstructMask;
5287 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5290 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5291 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5292 auto It =
find_if(
V1, [&](
const std::pair<int, int> &
A) {
5293 return Mask[
I] ==
A.first;
5299 V1.emplace_back(Mask[
I],
V1.size());
5302 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5303 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5304 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5318 sort(ReconstructMask);
5319 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5326 if (
V1.empty() || V2.
empty() ||
5327 (MaxV1Elt ==
static_cast<int>(
V1.size()) - 1 &&
5328 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5340 if (InputShuffles.contains(SSV))
5342 return SV->getMaskValue(M);
5350 std::pair<int, int>
Y) {
5351 int MXA = GetBaseMaskValue(
A,
X.first);
5352 int MYA = GetBaseMaskValue(
A,
Y.first);
5356 return SortBase(SVI0A,
A,
B);
5358 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5359 return SortBase(SVI1A,
A,
B);
5364 for (
const auto &Mask : OrigReconstructMasks) {
5365 SmallVector<int> ReconstructMask;
5366 for (
int M : Mask) {
5368 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5369 assert(It !=
V.end() &&
"Expected all entries in Mask");
5370 return std::distance(
V.begin(), It);
5374 else if (M <
static_cast<int>(NumElts)) {
5377 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5380 ReconstructMasks.
push_back(std::move(ReconstructMask));
5385 SmallVector<int> V1A, V1B, V2A, V2B;
5386 for (
unsigned I = 0;
I <
V1.size();
I++) {
5390 for (
unsigned I = 0;
I < V2.
size();
I++) {
5391 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5392 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5394 while (V1A.
size() < NumElts) {
5398 while (V2A.
size() < NumElts) {
5410 VT, VT, SV->getShuffleMask(),
CostKind);
5417 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5418 unsigned MaxVectorSize =
5420 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5421 if (MaxElementsInVector == 0)
5430 std::set<SmallVector<int, 4>> UniqueShuffles;
5435 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5436 if (NumFullVectors < 2)
5437 return C + ShuffleCost;
5438 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5439 unsigned NumUniqueGroups = 0;
5440 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5443 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5444 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5445 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5446 if (UniqueShuffles.insert(SubShuffle).second)
5447 NumUniqueGroups += 1;
5449 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5455 SmallVector<int, 16>
Mask;
5456 SV->getShuffleMask(Mask);
5457 return AddShuffleMaskAdjustedCost(
C, Mask);
5460 auto AllShufflesHaveSameOperands =
5461 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5462 if (InputShuffles.size() < 2)
5464 ShuffleVectorInst *FirstSV =
5471 std::next(InputShuffles.begin()), InputShuffles.end(),
5472 [&](Instruction *
I) {
5473 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5474 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5483 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5485 if (AllShufflesHaveSameOperands(InputShuffles)) {
5486 UniqueShuffles.clear();
5487 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5490 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5496 FixedVectorType *Op0SmallVT =
5498 FixedVectorType *Op1SmallVT =
5503 UniqueShuffles.clear();
5504 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5506 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5508 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5511 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5513 <<
" vs CostAfter: " << CostAfter <<
"\n");
5514 if (CostBefore < CostAfter ||
5525 if (InputShuffles.contains(SSV))
5527 return SV->getOperand(
Op);
5531 GetShuffleOperand(SVI0A, 1), V1A);
5534 GetShuffleOperand(SVI0B, 1), V1B);
5537 GetShuffleOperand(SVI1A, 1), V2A);
5540 GetShuffleOperand(SVI1B, 1), V2B);
5545 I->copyIRFlags(Op0,
true);
5550 I->copyIRFlags(Op1,
true);
5552 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5555 replaceValue(*Shuffles[S], *NSV,
false);
5558 Worklist.pushValue(NSV0A);
5559 Worklist.pushValue(NSV0B);
5560 Worklist.pushValue(NSV1A);
5561 Worklist.pushValue(NSV1B);
5571bool VectorCombine::shrinkType(Instruction &
I) {
5572 Value *ZExted, *OtherOperand;
5578 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5582 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5584 if (
I.getOpcode() == Instruction::LShr) {
5601 Instruction::ZExt, BigTy, SmallTy,
5602 TargetTransformInfo::CastContextHint::None,
CostKind);
5607 for (User *U : ZExtOperand->
users()) {
5614 ShrinkCost += ZExtCost;
5629 ShrinkCost += ZExtCost;
5636 Instruction::Trunc, SmallTy, BigTy,
5637 TargetTransformInfo::CastContextHint::None,
CostKind);
5642 if (ShrinkCost > CurrentCost)
5646 Value *Op0 = ZExted;
5649 if (
I.getOperand(0) == OtherOperand)
5656 replaceValue(
I, *NewZExtr);
5662bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5663 Value *DstVec, *SrcVec;
5664 uint64_t ExtIdx, InsIdx;
5674 if (!DstVecTy || !SrcVecTy ||
5680 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5687 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5689 if (NeedDstSrcSwap) {
5691 Mask[InsIdx] = ExtIdx % NumDstElts;
5695 std::iota(
Mask.begin(),
Mask.end(), 0);
5696 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5709 SmallVector<int> ExtToVecMask;
5710 if (!NeedExpOrNarrow) {
5715 nullptr, {DstVec, SrcVec});
5721 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5724 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5728 if (!Ext->hasOneUse())
5731 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5732 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5735 if (OldCost < NewCost)
5738 if (NeedExpOrNarrow) {
5739 if (!NeedDstSrcSwap)
5752 replaceValue(
I, *Shuf);
5761bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5762 const APInt *SplatVal0, *SplatVal1;
5772 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5773 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5782 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5783 << *
I.getType() <<
" is too high.\n");
5787 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5788 NewSplatVal <<= Width;
5789 NewSplatVal |= SplatVal0->
zext(Width * 2);
5791 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5826bool VectorCombine::foldDeinterleaveIntrinsics(Instruction &
I) {
5828 if (
DL->isBigEndian())
5831 using namespace PatternMatch;
5832 Value *DeinterleavedVal;
5843 unsigned HalfElementWidth = ElementWidth / 2;
5847 std::array<ExtractValueInst *, 2> OrigFields{};
5848 for (User *Usr :
I.users()) {
5851 if (!
E ||
E->getNumIndices() != 1)
5853 unsigned Idx = *
E->idx_begin();
5855 if (Idx >= 2 || OrigFields[Idx] || !
E->hasNUses(2))
5857 OrigFields[Idx] =
E;
5861 SmallVector<Instruction *, 2> MergeInsts;
5862 for (
auto *FieldUsr : OrigFields[0]->
users()) {
5870 auto MatchMerge = [&](void) ->
bool {
5873 return match(MergeInsts[0],
5877 match(MergeInsts[1],
5882 if (!MatchMerge()) {
5883 std::swap(MergeInsts[0], MergeInsts[1]);
5898 auto *NewFieldTy = VecTy->getWithNewBitWidth(HalfElementWidth);
5908 if (OldCost <= NewCost || !NewCost.
isValid()) {
5910 dbgs() <<
"VC: New deinterleave2 sequence cost (" << NewCost <<
")"
5911 <<
" is higher than that of the old one (" << OldCost <<
")\n");
5919 Intrinsic::vector_deinterleave2, {NewVecTy}, {NewVecCast});
5920 for (
auto [Idx, MergeInst] :
enumerate(MergeInsts)) {
5922 NewField = Builder.
CreateBitCast(NewField, MergeInst->getType());
5923 replaceValue(*MergeInst, *NewField);
5929bool VectorCombine::foldBitcastOfVPLoad(Instruction &
I) {
5930 const DataLayout &
DL =
I.getDataLayout();
5945 DL.getValueOrABITypeAlignment(
II->getPointerAlignment(), OrigVecTy);
5946 ElementCount OrigVecCnt = OrigVecTy->getElementCount();
5948 ElementCount NewVecCnt = NewVecTy->getElementCount();
5960 II->getMemoryPointerParam(),
false,
5966 {Intrinsic::vp_load, NewVecTy,
II->getMemoryPointerParam(),
false,
5970 <<
" NewCost=" << NewCost <<
"\n");
5971 if (NewCost > OldCost || !NewCost.
isValid())
5978 NewVecTy, Intrinsic::vp_load,
5979 {
II->getMemoryPointerParam(), NewMask, NewEVL});
5982 0, AttrBuilder(
II->getContext()).addAlignmentAttr(OrigAlign));
5983 replaceValue(*Cast, *NewVP);
5991bool VectorCombine::foldBitOrderReverseAndSwap(Instruction &
I) {
5997 Type *Ty =
I.getType();
5999 TypeSize ElementSize =
DL->getTypeStoreSize(Ty);
6002 Type *NewVecTy = VectorType::get(I8Ty, NewVecCnt);
6018 IntrinsicCostAttributes ICANew(Intrinsic::bitreverse, NewVecTy, {NewVecTy});
6021 InstructionCost NewCost = CastToVecCost + NewIntrinsicCost + CastToOrigCost;
6023 if (!InnerII->hasOneUse())
6027 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
6029 if (!NewCost.
isValid() || NewCost >= OldCost)
6038 replaceValue(
I, *CastToOrig);
6043bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
6045 if (!OldLoad || !OldLoad->isSimple())
6052 unsigned const OldNumElements = OldLoadTy->getNumElements();
6058 using IndexRange = std::pair<int, int>;
6059 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
6060 IndexRange OutputRange = IndexRange(OldNumElements, -1);
6061 for (llvm::Use &Use :
I.uses()) {
6063 User *Shuffle =
Use.getUser();
6068 return std::nullopt;
6075 for (
int Index : Mask) {
6076 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
6077 OutputRange.first = std::min(Index, OutputRange.first);
6078 OutputRange.second = std::max(Index, OutputRange.second);
6083 if (OutputRange.second < OutputRange.first)
6084 return std::nullopt;
6090 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
6091 unsigned const NewNumElements = Indices->second + 1u;
6095 if (NewNumElements < OldNumElements) {
6100 Type *ElemTy = OldLoadTy->getElementType();
6102 Value *PtrOp = OldLoad->getPointerOperand();
6105 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
6106 OldLoad->getPointerAddressSpace(),
CostKind);
6109 OldLoad->getPointerAddressSpace(),
CostKind);
6111 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
6113 unsigned const MaxIndex = NewNumElements * 2u;
6115 for (llvm::Use &Use :
I.uses()) {
6122 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
6128 for (
int Index : OldMask) {
6129 if (Index >=
static_cast<int>(MaxIndex))
6143 dbgs() <<
"Found a load used only by shufflevector instructions: "
6144 <<
I <<
"\n OldCost: " << OldCost
6145 <<
" vs NewCost: " << NewCost <<
"\n");
6147 if (OldCost < NewCost || !NewCost.
isValid())
6153 NewLoad->copyMetadata(
I);
6156 for (UseEntry &Use : NewUses) {
6157 ShuffleVectorInst *Shuffle =
Use.first;
6158 std::vector<int> &NewMask =
Use.second;
6165 replaceValue(*Shuffle, *NewShuffle,
false);
6178bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
6180 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
6184 ArrayRef<int> Mask0;
6185 ArrayRef<int> Mask1;
6198 auto const InputNumElements = InputVT->getNumElements();
6200 if (InputNumElements >= ResultVT->getNumElements())
6205 SmallVector<int, 16> NewMask;
6208 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
6209 if (
M0 >= 0 &&
M1 >= 0)
6211 else if (
M0 == -1 &&
M1 == -1)
6224 int MaskOffset = NewMask[0
u];
6225 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
6228 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
6242 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
6245 if (NewCost > OldCost)
6257 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
6259 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
6265 replaceValue(*Phi, *NewShuf1);
6271bool VectorCombine::run() {
6285 auto Opcode =
I.getOpcode();
6293 if (IsFixedVectorType) {
6295 case Instruction::InsertElement:
6296 if (vectorizeLoadInsert(
I))
6299 case Instruction::ShuffleVector:
6300 if (widenSubvectorLoad(
I))
6311 if (scalarizeOpOrCmp(
I))
6313 if (scalarizeLoad(
I))
6315 if (scalarizeExtExtract(
I))
6317 if (scalarizeVPIntrinsic(
I))
6319 if (foldInterleaveIntrinsics(
I))
6321 if (foldBitcastOfVPLoad(
I))
6325 if (foldDeinterleaveIntrinsics(
I))
6328 if (Opcode == Instruction::Store)
6329 if (foldSingleElementStore(
I))
6333 if (TryEarlyFoldsOnly)
6336 if (Opcode == Instruction::Call)
6337 if (foldBitOrderReverseAndSwap(
I))
6344 if (IsFixedVectorType) {
6346 case Instruction::InsertElement:
6347 if (foldInsExtFNeg(
I))
6349 if (foldInsExtBinop(
I))
6351 if (foldInsExtVectorToShuffle(
I))
6354 case Instruction::ShuffleVector:
6355 if (foldPermuteOfBinops(
I))
6357 if (foldShuffleOfBinops(
I))
6359 if (foldShuffleOfSelects(
I))
6361 if (foldShuffleOfCastops(
I))
6363 if (foldShuffleOfShuffles(
I))
6365 if (foldPermuteOfIntrinsic(
I))
6367 if (foldShufflesOfLengthChangingShuffles(
I))
6369 if (foldShuffleOfIntrinsics(
I))
6371 if (foldSelectShuffle(
I))
6373 if (foldShuffleToIdentity(
I))
6376 case Instruction::Load:
6377 if (shrinkLoadForShuffles(
I))
6380 case Instruction::BitCast:
6381 if (foldBitcastShuffle(
I))
6383 if (foldSelectsFromBitcast(
I))
6386 case Instruction::And:
6387 case Instruction::Or:
6388 case Instruction::Xor:
6389 if (foldBitOpOfCastops(
I))
6391 if (foldBitOpOfCastConstant(
I))
6394 case Instruction::PHI:
6395 if (shrinkPhiOfShuffles(
I))
6405 case Instruction::Call:
6406 if (foldShuffleFromReductions(
I))
6408 if (foldCastFromReductions(
I))
6411 case Instruction::ExtractElement:
6412 if (foldShuffleChainsToReduce(
I))
6415 case Instruction::ICmp:
6416 if (foldSignBitReductionCmp(
I))
6418 if (foldICmpEqZeroVectorReduce(
I))
6420 if (foldReductionZeroTest(
I))
6422 if (foldEquivalentReductionCmp(
I))
6424 if (foldReduceAddCmpZero(
I))
6427 case Instruction::FCmp:
6428 if (foldExtractExtract(
I))
6431 case Instruction::Or:
6432 if (foldConcatOfBoolMasks(
I))
6437 if (foldExtractExtract(
I))
6439 if (foldExtractedCmps(
I))
6441 if (foldBinopOfReductions(
I))
6450 bool MadeChange =
false;
6451 for (BasicBlock &BB :
F) {
6463 if (!
I->isDebugOrPseudoInst())
6464 MadeChange |= FoldInst(*
I);
6471 while (!Worklist.isEmpty()) {
6481 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, Use *From, FixedVectorType *Ty, const DenseSet< std::pair< Value *, Use * > > &IdentityLeafs, const DenseSet< std::pair< Value *, Use * > > &SplatLeafs, const DenseSet< std::pair< Value *, Use * > > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
std::pair< Value *, int > InstLane
static bool isKnownNonPositive(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Used by foldReduceAddCmpZero to check if we can prove that a value is non-positive.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, const SimplifyQuery &SQ)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static InstLane lookThroughShuffles(Value *V, int Lane)
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void addParamAttrs(unsigned ArgNo, const AttrBuilder &B)
Adds attributes to the indicated argument.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
LLVM_ABI CallInst * CreateIntrinsicWithoutFolding(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI Value * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isIdempotent() const
Return true if the instruction is idempotent:
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI bool hasAllowReassoc() const LLVM_READONLY
Determine whether the allow-reassociation flag is set.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
auto m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd >::Ty m_Deinterleave2(const Opnd &Op)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
RelativeUniformCounterPtr Values
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const