43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldSelectsFromBitcast(Instruction &
I);
131 bool foldBinopOfReductions(Instruction &
I);
132 bool foldSingleElementStore(Instruction &
I);
133 bool scalarizeLoad(Instruction &
I);
134 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
135 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
136 bool scalarizeExtExtract(Instruction &
I);
137 bool foldConcatOfBoolMasks(Instruction &
I);
138 bool foldPermuteOfBinops(Instruction &
I);
139 bool foldShuffleOfBinops(Instruction &
I);
140 bool foldShuffleOfSelects(Instruction &
I);
141 bool foldShuffleOfCastops(Instruction &
I);
142 bool foldShuffleOfShuffles(Instruction &
I);
143 bool foldPermuteOfIntrinsic(Instruction &
I);
144 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
145 bool foldShuffleOfIntrinsics(Instruction &
I);
146 bool foldShuffleToIdentity(Instruction &
I);
147 bool foldShuffleFromReductions(Instruction &
I);
148 bool foldShuffleChainsToReduce(Instruction &
I);
149 bool foldCastFromReductions(Instruction &
I);
150 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
151 bool foldInterleaveIntrinsics(Instruction &
I);
152 bool shrinkType(Instruction &
I);
153 bool shrinkLoadForShuffles(Instruction &
I);
154 bool shrinkPhiOfShuffles(Instruction &
I);
156 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
162 Worklist.pushUsersToWorkList(*NewI);
163 Worklist.pushValue(NewI);
180 SmallPtrSet<Value *, 4> Visited;
185 OpI,
nullptr,
nullptr, [&](
Value *V) {
190 NextInst = NextInst->getNextNode();
195 Worklist.pushUsersToWorkList(*OpI);
196 Worklist.pushValue(OpI);
216 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
217 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
223 Type *ScalarTy = Load->getType()->getScalarType();
225 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
226 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
233bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
259 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
262 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
263 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
264 unsigned OffsetEltIndex = 0;
272 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
273 APInt
Offset(OffsetBitWidth, 0);
283 uint64_t ScalarSizeInBytes = ScalarSize / 8;
284 if (
Offset.urem(ScalarSizeInBytes) != 0)
288 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
289 if (OffsetEltIndex >= MinVecNumElts)
306 unsigned AS =
Load->getPointerAddressSpace();
325 unsigned OutputNumElts = Ty->getNumElements();
327 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
328 Mask[0] = OffsetEltIndex;
335 if (OldCost < NewCost || !NewCost.
isValid())
346 replaceValue(
I, *VecLd);
354bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
357 if (!Shuf->isIdentityWithPadding())
363 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
364 return M >= (int)(NumOpElts);
375 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
383 unsigned AS =
Load->getPointerAddressSpace();
398 if (OldCost < NewCost || !NewCost.
isValid())
405 replaceValue(
I, *VecLd);
412ExtractElementInst *VectorCombine::getShuffleExtract(
413 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
417 assert(Index0C && Index1C &&
"Expected constant extract indexes");
419 unsigned Index0 = Index0C->getZExtValue();
420 unsigned Index1 = Index1C->getZExtValue();
423 if (Index0 == Index1)
447 if (PreferredExtractIndex == Index0)
449 if (PreferredExtractIndex == Index1)
453 return Index0 > Index1 ? Ext0 : Ext1;
461bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
462 ExtractElementInst *Ext1,
463 const Instruction &
I,
464 ExtractElementInst *&ConvertToShuffle,
465 unsigned PreferredExtractIndex) {
468 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
470 unsigned Opcode =
I.getOpcode();
483 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
484 "Expected a compare");
494 unsigned Ext0Index = Ext0IndexC->getZExtValue();
495 unsigned Ext1Index = Ext1IndexC->getZExtValue();
509 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
510 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
511 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
516 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
521 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
523 OldCost = CheapExtractCost + ScalarOpCost;
524 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
528 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
529 NewCost = VectorOpCost + CheapExtractCost +
534 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
535 if (ConvertToShuffle) {
547 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
549 ShuffleMask[BestInsIndex] = BestExtIndex;
551 VecTy, VecTy, ShuffleMask,
CostKind, 0,
552 nullptr, {ConvertToShuffle});
555 VecTy, VecTy, {},
CostKind, 0,
nullptr,
563 return OldCost < NewCost;
575 ShufMask[NewIndex] = OldIndex;
576 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
628 V1,
"foldExtExtBinop");
633 VecBOInst->copyIRFlags(&
I);
639bool VectorCombine::foldExtractExtract(Instruction &
I) {
670 ExtractElementInst *ExtractToChange;
671 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
677 if (ExtractToChange) {
678 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
683 if (ExtractToChange == Ext0)
692 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
693 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
696 replaceValue(
I, *NewExt);
702bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
705 uint64_t ExtIdx, InsIdx;
720 auto *DstVecScalarTy = DstVecTy->getScalarType();
722 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
727 unsigned NumDstElts = DstVecTy->getNumElements();
728 unsigned NumSrcElts = SrcVecTy->getNumElements();
729 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
735 SmallVector<int>
Mask(NumDstElts);
736 std::iota(
Mask.begin(),
Mask.end(), 0);
737 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
753 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
756 SmallVector<int> SrcMask;
759 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
761 DstVecTy, SrcVecTy, SrcMask,
CostKind);
765 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
767 if (NewCost > OldCost)
770 Value *NewShuf, *LenChgShuf =
nullptr;
784 replaceValue(
I, *NewShuf);
790bool VectorCombine::foldInsExtBinop(Instruction &
I) {
791 BinaryOperator *VecBinOp, *SclBinOp;
823 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
825 if (NewCost > OldCost)
836 NewInst->copyIRFlags(VecBinOp);
837 NewInst->andIRFlags(SclBinOp);
842 replaceValue(
I, *NewBO);
848bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
851 if (!BinOp || !BinOp->isBitwiseLogicOp())
857 if (!LHSCast || !RHSCast) {
858 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
864 if (CastOpcode != RHSCast->getOpcode())
868 switch (CastOpcode) {
869 case Instruction::BitCast:
870 case Instruction::Trunc:
871 case Instruction::SExt:
872 case Instruction::ZExt:
878 Value *LHSSrc = LHSCast->getOperand(0);
879 Value *RHSSrc = RHSCast->getOperand(0);
885 auto *SrcTy = LHSSrc->
getType();
886 auto *DstTy =
I.getType();
889 if (CastOpcode != Instruction::BitCast &&
894 if (!SrcTy->getScalarType()->isIntegerTy() ||
895 !DstTy->getScalarType()->isIntegerTy())
910 LHSCastCost + RHSCastCost;
921 if (!LHSCast->hasOneUse())
922 NewCost += LHSCastCost;
923 if (!RHSCast->hasOneUse())
924 NewCost += RHSCastCost;
927 <<
" NewCost=" << NewCost <<
"\n");
929 if (NewCost > OldCost)
934 BinOp->getName() +
".inner");
936 NewBinOp->copyIRFlags(BinOp);
950 replaceValue(
I, *Result);
959bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
975 switch (CastOpcode) {
976 case Instruction::BitCast:
977 case Instruction::ZExt:
978 case Instruction::SExt:
979 case Instruction::Trunc:
985 Value *LHSSrc = LHSCast->getOperand(0);
987 auto *SrcTy = LHSSrc->
getType();
988 auto *DstTy =
I.getType();
991 if (CastOpcode != Instruction::BitCast &&
996 if (!SrcTy->getScalarType()->isIntegerTy() ||
997 !DstTy->getScalarType()->isIntegerTy())
1001 PreservedCastFlags RHSFlags;
1026 if (!LHSCast->hasOneUse())
1027 NewCost += LHSCastCost;
1029 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1030 <<
" NewCost=" << NewCost <<
"\n");
1032 if (NewCost > OldCost)
1037 LHSSrc, InvC,
I.getName() +
".inner");
1039 NewBinOp->copyIRFlags(&
I);
1059 replaceValue(
I, *Result);
1066bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1080 if (!DestTy || !SrcTy)
1083 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1084 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1085 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1095 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1096 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1100 SmallVector<int, 16> NewMask;
1101 if (DestEltSize <= SrcEltSize) {
1104 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1105 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1110 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1111 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1118 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1119 auto *NewShuffleTy =
1121 auto *OldShuffleTy =
1123 unsigned NumOps = IsUnary ? 1 : 2;
1133 TargetTransformInfo::CastContextHint::None,
1138 TargetTransformInfo::CastContextHint::None,
1141 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1142 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1144 if (NewCost > OldCost || !NewCost.
isValid())
1152 replaceValue(
I, *Shuf);
1159bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1173 if (!ScalarOp0 || !ScalarOp1)
1181 auto IsAllTrueMask = [](
Value *MaskVal) {
1184 return ConstValue->isAllOnesValue();
1198 SmallVector<int>
Mask;
1200 Mask.resize(FVTy->getNumElements(), 0);
1209 Args.push_back(
V->getType());
1210 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1215 std::optional<unsigned> FunctionalOpcode =
1217 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1218 if (!FunctionalOpcode) {
1227 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1237 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1239 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1242 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1245 if (OldCost < NewCost || !NewCost.
isValid())
1256 bool SafeToSpeculate;
1262 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1263 if (!SafeToSpeculate &&
1270 {ScalarOp0, ScalarOp1})
1272 ScalarOp0, ScalarOp1);
1281bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1286 if (!UO && !BO && !CI && !
II)
1294 if (Arg->getType() !=
II->getType() &&
1304 for (User *U :
I.users())
1311 std::optional<uint64_t>
Index;
1313 auto Ops =
II ?
II->args() :
I.operands();
1317 uint64_t InsIdx = 0;
1322 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1328 else if (InsIdx != *Index)
1345 if (!
Index.has_value())
1349 Type *ScalarTy = VecTy->getScalarType();
1350 assert(VecTy->isVectorTy() &&
1353 "Unexpected types for insert element into binop or cmp");
1355 unsigned Opcode =
I.getOpcode();
1363 }
else if (UO || BO) {
1367 IntrinsicCostAttributes ScalarICA(
1368 II->getIntrinsicID(), ScalarTy,
1371 IntrinsicCostAttributes VectorICA(
1372 II->getIntrinsicID(), VecTy,
1379 Value *NewVecC =
nullptr;
1381 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1384 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1386 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1400 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1402 II->getIntrinsicID(), Idx, &
TTI)))
1405 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1406 OldCost += InsertCost;
1407 NewCost += !
Op->hasOneUse() * InsertCost;
1411 if (OldCost < NewCost || !NewCost.
isValid())
1421 ++NumScalarIntrinsic;
1431 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1437 Scalar->setName(
I.getName() +
".scalar");
1442 ScalarInst->copyIRFlags(&
I);
1445 replaceValue(
I, *Insert);
1452bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1457 if (!BI || !
I.getType()->isIntegerTy(1))
1462 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1465 CmpPredicate
P0,
P1;
1477 uint64_t Index0, Index1;
1484 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1487 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1488 "Unknown ExtractElementInst");
1493 unsigned CmpOpcode =
1508 Ext0Cost + Ext1Cost + CmpCost * 2 +
1514 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1515 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1520 ShufMask[CheapIndex] = ExpensiveIndex;
1525 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1526 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1531 if (OldCost < NewCost || !NewCost.
isValid())
1541 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1542 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1545 replaceValue(
I, *NewExt);
1572bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1579 if (!SrcVecTy || !DstVecTy)
1589 if (SrcEltBits != 32 && SrcEltBits != 64)
1592 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1609 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1612 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1615 if (!BC->hasNUsesOrMore(MinSelects))
1620 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1622 for (User *U : BC->users()) {
1627 for (User *ExtUser : Ext->users()) {
1631 Cond->getType()->isIntegerTy(1))
1636 if (CondToSelects.
empty())
1639 bool MadeChange =
false;
1640 Value *SrcVec = BC->getOperand(0);
1643 for (
auto [
Cond, Selects] : CondToSelects) {
1645 if (Selects.size() < MinSelects) {
1646 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1647 <<
"profitable (VecCost=" << VecSelCost
1648 <<
", ScalarCost=" << ScalarSelCost
1649 <<
", NumSelects=" << Selects.size() <<
")\n");
1660 for (SelectInst *Sel : Selects) {
1662 Value *Idx = Ext->getIndexOperand();
1666 replaceValue(*Sel, *NewExt);
1671 <<
" selects into vector select\n");
1685 unsigned ReductionOpc =
1691 CostBeforeReduction =
1692 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1694 CostAfterReduction =
1695 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1699 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1705 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1712 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1715 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1717 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1720 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1721 CostAfterReduction =
TTI.getMulAccReductionCost(
1722 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1725 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1729bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1732 if (BinOpOpc == Instruction::Sub)
1733 ReductionIID = Intrinsic::vector_reduce_add;
1737 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1742 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1743 return II->getArgOperand(0);
1747 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1750 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1759 unsigned ReductionOpc =
1772 CostOfRedOperand0 + CostOfRedOperand1 +
1775 if (NewCost >= OldCost || !NewCost.
isValid())
1779 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1782 if (BinOpOpc == Instruction::Or)
1783 VectorBO = Builder.
CreateOr(V0, V1,
"",
1789 replaceValue(
I, *Rdx);
1797 unsigned NumScanned = 0;
1798 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1807class ScalarizationResult {
1808 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1813 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1814 : Status(Status), ToFreeze(ToFreeze) {}
1817 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1818 ~ScalarizationResult() {
1819 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1822 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1823 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1824 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1825 return {StatusTy::SafeWithFreeze, ToFreeze};
1829 bool isSafe()
const {
return Status == StatusTy::Safe; }
1831 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1834 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1839 Status = StatusTy::Unsafe;
1843 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1844 assert(isSafeWithFreeze() &&
1845 "should only be used when freezing is required");
1847 "UserI must be a user of ToFreeze");
1848 IRBuilder<>::InsertPointGuard Guard(Builder);
1853 if (
U.get() == ToFreeze)
1870 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1874 if (
C->getValue().ult(NumElements))
1875 return ScalarizationResult::safe();
1876 return ScalarizationResult::unsafe();
1881 return ScalarizationResult::unsafe();
1883 APInt Zero(IntWidth, 0);
1884 APInt MaxElts(IntWidth, NumElements);
1890 true, &AC, CtxI, &DT)))
1891 return ScalarizationResult::safe();
1892 return ScalarizationResult::unsafe();
1905 if (ValidIndices.
contains(IdxRange))
1906 return ScalarizationResult::safeWithFreeze(IdxBase);
1907 return ScalarizationResult::unsafe();
1919 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1931bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1943 if (!
match(
SI->getValueOperand(),
1950 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1953 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1954 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1955 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1959 if (ScalarizableIdx.isUnsafe() ||
1966 Worklist.
push(Load);
1968 if (ScalarizableIdx.isSafeWithFreeze())
1971 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1972 {ConstantInt::get(Idx->getType(), 0), Idx});
1976 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1979 replaceValue(
I, *NSI);
1989bool VectorCombine::scalarizeLoad(Instruction &
I) {
1996 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1999 bool AllExtracts =
true;
2000 bool AllBitcasts =
true;
2002 unsigned NumInstChecked = 0;
2007 for (User *U : LI->users()) {
2009 if (!UI || UI->getParent() != LI->getParent())
2014 if (UI->use_empty())
2018 AllExtracts =
false;
2020 AllBitcasts =
false;
2024 for (Instruction &
I :
2025 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2032 LastCheckedInst = UI;
2037 return scalarizeLoadExtract(LI, VecTy, Ptr);
2039 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2044bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2049 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2052 for (
auto &Pair : NeedFreeze)
2053 Pair.second.discard();
2061 for (User *U : LI->
users()) {
2066 if (ScalarIdx.isUnsafe())
2068 if (ScalarIdx.isSafeWithFreeze()) {
2069 NeedFreeze.try_emplace(UI, ScalarIdx);
2070 ScalarIdx.discard();
2076 Index ?
Index->getZExtValue() : -1);
2084 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2085 <<
"\n LoadExtractCost: " << OriginalCost
2086 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2088 if (ScalarizedCost >= OriginalCost)
2095 Type *ElemType = VecTy->getElementType();
2098 for (User *U : LI->
users()) {
2100 Value *Idx = EI->getIndexOperand();
2103 auto It = NeedFreeze.find(EI);
2104 if (It != NeedFreeze.end())
2111 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2113 Align ScalarOpAlignment =
2115 NewLoad->setAlignment(ScalarOpAlignment);
2118 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2123 replaceValue(*EI, *NewLoad,
false);
2126 FailureGuard.release();
2131bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2137 Type *TargetScalarType =
nullptr;
2138 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2140 for (User *U : LI->
users()) {
2143 Type *DestTy = BC->getDestTy();
2147 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2148 if (DestBitWidth != VecBitWidth)
2152 if (!TargetScalarType)
2153 TargetScalarType = DestTy;
2154 else if (TargetScalarType != DestTy)
2162 if (!TargetScalarType)
2170 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2171 <<
"\n OriginalCost: " << OriginalCost
2172 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2174 if (ScalarizedCost >= OriginalCost)
2185 ScalarLoad->copyMetadata(*LI);
2188 for (User *U : LI->
users()) {
2190 replaceValue(*BC, *ScalarLoad,
false);
2196bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2211 Type *ScalarDstTy = DstTy->getElementType();
2212 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2218 unsigned ExtCnt = 0;
2219 bool ExtLane0 =
false;
2220 for (User *U : Ext->users()) {
2234 Instruction::And, ScalarDstTy,
CostKind,
2237 (ExtCnt - ExtLane0) *
2239 Instruction::LShr, ScalarDstTy,
CostKind,
2242 if (ScalarCost > VectorCost)
2245 Value *ScalarV = Ext->getOperand(0);
2252 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2253 bool AllExtractsTriggerUB =
true;
2254 ExtractElementInst *LastExtract =
nullptr;
2256 for (User *U : Ext->users()) {
2259 AllExtractsTriggerUB =
false;
2263 if (!LastExtract || LastExtract->
comesBefore(Extract))
2264 LastExtract = Extract;
2266 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2267 !AllExtractsTriggerUB ||
2275 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2276 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2279 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2280 for (User *U : Ext->users()) {
2286 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2287 : (Idx * SrcEltSizeInBits);
2290 U->replaceAllUsesWith(
And);
2298bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2299 Type *Ty =
I.getType();
2304 if (
DL->isBigEndian())
2315 uint64_t ShAmtX = 0;
2323 uint64_t ShAmtY = 0;
2331 if (ShAmtX > ShAmtY) {
2339 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2340 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2345 MaskTy->getNumElements() != ShAmtDiff ||
2346 MaskTy->getNumElements() > (
BitWidth / 2))
2351 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2352 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2355 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2372 if (Ty != ConcatIntTy)
2378 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2379 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2382 if (NewCost > OldCost)
2392 if (Ty != ConcatIntTy) {
2402 replaceValue(
I, *Result);
2408bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2409 BinaryOperator *BinOp;
2410 ArrayRef<int> OuterMask;
2418 Value *Op00, *Op01, *Op10, *Op11;
2419 ArrayRef<int> Mask0, Mask1;
2424 if (!Match0 && !Match1)
2437 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2440 unsigned NumSrcElts = BinOpTy->getNumElements();
2445 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2449 SmallVector<int> NewMask0, NewMask1;
2450 for (
int M : OuterMask) {
2451 if (M < 0 || M >= (
int)NumSrcElts) {
2455 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2456 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2460 unsigned NumOpElts = Op0Ty->getNumElements();
2461 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2462 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2464 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2465 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2474 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2475 0,
nullptr, {BinOp}, &
I);
2477 NewCost += BinOpCost;
2483 OldCost += Shuf0Cost;
2485 NewCost += Shuf0Cost;
2491 OldCost += Shuf1Cost;
2493 NewCost += Shuf1Cost;
2501 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2505 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2507 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2508 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2512 if (NewCost > OldCost)
2523 NewInst->copyIRFlags(BinOp);
2527 replaceValue(
I, *NewBO);
2533bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2534 ArrayRef<int> OldMask;
2541 if (
LHS->getOpcode() !=
RHS->getOpcode())
2545 bool IsCommutative =
false;
2554 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2565 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2568 unsigned NumSrcElts = BinOpTy->getNumElements();
2571 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2574 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2575 if (M >= (
int)NumSrcElts)
2579 SmallVector<int> NewMask0(OldMask);
2587 SmallVector<int> NewMask1(OldMask);
2610 ArrayRef<int> InnerMask;
2612 m_Mask(InnerMask)))) &&
2615 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2627 bool ReducedInstCount =
false;
2628 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2629 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2630 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2631 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2632 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2633 ReducedInstCount |= SingleSrcBinOp;
2635 auto *ShuffleCmpTy =
2638 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2639 if (!SingleSrcBinOp)
2652 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2659 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2668 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2672 NewInst->copyIRFlags(
LHS);
2673 NewInst->andIRFlags(
RHS);
2678 replaceValue(
I, *NewBO);
2685bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2687 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2698 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2704 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2705 ((SI0FOp !=
nullptr) &&
2706 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2712 auto SelOp = Instruction::Select;
2720 CostSel1 + CostSel2 +
2722 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2726 Mask,
CostKind, 0,
nullptr, {C1, C2});
2736 if (!Sel1->hasOneUse())
2737 NewCost += CostSel1;
2738 if (!Sel2->hasOneUse())
2739 NewCost += CostSel2;
2742 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2744 if (NewCost > OldCost)
2753 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2754 SI0FOp->getFastMathFlags());
2756 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2761 replaceValue(
I, *NewSel);
2767bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2769 ArrayRef<int> OldMask;
2778 if (!C0 || (IsBinaryShuffle && !C1))
2785 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2788 if (IsBinaryShuffle) {
2789 if (C0->getSrcTy() != C1->getSrcTy())
2792 if (Opcode != C1->getOpcode()) {
2794 Opcode = Instruction::SExt;
2803 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2806 unsigned NumSrcElts = CastSrcTy->getNumElements();
2807 unsigned NumDstElts = CastDstTy->getNumElements();
2808 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2809 "Only bitcasts expected to alter src/dst element counts");
2813 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2814 (NumDstElts % NumSrcElts) != 0)
2817 SmallVector<int, 16> NewMask;
2818 if (NumSrcElts >= NumDstElts) {
2821 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2822 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2827 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2828 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2833 auto *NewShuffleDstTy =
2842 if (IsBinaryShuffle)
2857 if (IsBinaryShuffle) {
2867 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2869 if (NewCost > OldCost)
2873 if (IsBinaryShuffle)
2883 NewInst->copyIRFlags(C0);
2884 if (IsBinaryShuffle)
2885 NewInst->andIRFlags(C1);
2889 replaceValue(
I, *Cast);
2899bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2900 ArrayRef<int> OuterMask;
2901 Value *OuterV0, *OuterV1;
2906 ArrayRef<int> InnerMask0, InnerMask1;
2907 Value *X0, *X1, *Y0, *Y1;
2912 if (!Match0 && !Match1)
2917 SmallVector<int, 16> PoisonMask1;
2922 InnerMask1 = PoisonMask1;
2926 X0 = Match0 ? X0 : OuterV0;
2927 Y0 = Match0 ? Y0 : OuterV0;
2928 X1 = Match1 ? X1 : OuterV1;
2929 Y1 = Match1 ? Y1 : OuterV1;
2933 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2937 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2938 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2943 SmallVector<int, 16> NewMask(OuterMask);
2944 Value *NewX =
nullptr, *NewY =
nullptr;
2945 for (
int &M : NewMask) {
2946 Value *Src =
nullptr;
2947 if (0 <= M && M < (
int)NumImmElts) {
2951 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2952 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2954 }
else if (M >= (
int)NumImmElts) {
2959 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2960 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2964 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2973 if (!NewX || NewX == Src) {
2977 if (!NewY || NewY == Src) {
2993 replaceValue(
I, *NewX);
3010 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3016 nullptr, {NewX, NewY});
3018 NewCost += InnerCost0;
3020 NewCost += InnerCost1;
3023 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3025 if (NewCost > OldCost)
3029 replaceValue(
I, *Shuf);
3045bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3050 unsigned ChainLength = 0;
3051 SmallVector<int>
Mask;
3052 SmallVector<int> YMask;
3062 ArrayRef<int> OuterMask;
3063 Value *OuterV0, *OuterV1;
3064 if (ChainLength != 0 && !Trunk->
hasOneUse())
3067 m_Mask(OuterMask))))
3069 if (OuterV0->
getType() != TrunkType) {
3075 ArrayRef<int> InnerMask0, InnerMask1;
3076 Value *A0, *A1, *B0, *B1;
3081 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3082 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3083 if (Match0Leaf == Match1Leaf) {
3089 SmallVector<int> CommutedOuterMask;
3096 for (
int &M : CommutedOuterMask) {
3099 if (M < (
int)NumTrunkElts)
3104 OuterMask = CommutedOuterMask;
3123 int NumLeafElts = YType->getNumElements();
3124 SmallVector<int> LocalYMask(InnerMask1);
3125 for (
int &M : LocalYMask) {
3126 if (M >= NumLeafElts)
3136 Mask.assign(OuterMask);
3137 YMask.
assign(LocalYMask);
3138 OldCost = NewCost = LocalOldCost;
3145 SmallVector<int> NewYMask(YMask);
3147 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3148 if (LeafM == -1 || CombinedM == LeafM)
3150 if (CombinedM == -1) {
3160 SmallVector<int> NewMask;
3161 NewMask.
reserve(NumTrunkElts);
3162 for (
int M : Mask) {
3163 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3178 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3182 if (ChainLength == 1) {
3183 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3186 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3187 <<
" old cost: " << (OldCost + LocalOldCost)
3188 <<
" new cost: " << LocalNewCost <<
'\n';
3193 OldCost += LocalOldCost;
3194 NewCost = LocalNewCost;
3198 if (ChainLength <= 1)
3202 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3205 for (
int &M : Mask) {
3206 if (M >=
static_cast<int>(NumTrunkElts))
3207 M = YMask[
M - NumTrunkElts];
3211 replaceValue(
I, *Root);
3218 replaceValue(
I, *Root);
3224bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3226 ArrayRef<int> OldMask;
3236 if (IID != II1->getIntrinsicID())
3245 if (!ShuffleDstTy || !II0Ty)
3251 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3253 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3259 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3263 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3264 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3266 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3270 ShuffleDstTy->getNumElements());
3272 std::pair<Value *, Value *> OperandPair =
3273 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3274 if (!SeenOperandPairs.
insert(OperandPair).second) {
3280 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3283 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3286 if (!II0->hasOneUse())
3288 if (II1 != II0 && !II1->hasOneUse())
3292 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3295 if (NewCost > OldCost)
3299 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3300 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3304 std::pair<Value *, Value *> OperandPair =
3305 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3306 auto It = ShuffleCache.
find(OperandPair);
3307 if (It != ShuffleCache.
end()) {
3313 II1->getArgOperand(
I), OldMask);
3314 ShuffleCache[OperandPair] = Shuf;
3322 NewInst->copyIRFlags(II0);
3323 NewInst->andIRFlags(II1);
3326 replaceValue(
I, *NewIntrinsic);
3332bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3344 if (!ShuffleDstTy || !IntrinsicSrcTy)
3348 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3349 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3362 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3366 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3368 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3372 ShuffleDstTy->getNumElements());
3375 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3376 {II0->getArgOperand(
I)});
3379 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3384 if (!II0->hasOneUse())
3387 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3388 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3390 if (NewCost > OldCost)
3395 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3410 replaceValue(
I, *NewIntrinsic);
3420 int M = SV->getMaskValue(Lane);
3423 if (
static_cast<unsigned>(M) < NumElts) {
3424 U = &SV->getOperandUse(0);
3427 U = &SV->getOperandUse(1);
3438 auto [U, Lane] = IL;
3452 unsigned NumElts = Ty->getNumElements();
3453 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3459 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3465 unsigned NumSlices = Item.
size() / NumElts;
3470 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3471 Use *SliceV = Item[Slice * NumElts].first;
3472 if (!SliceV || SliceV->get()->
getType() != Ty)
3474 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3475 auto [V, Lane] = Item[Slice * NumElts + Elt];
3476 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
3489 auto [FrontU, FrontLane] = Item.
front();
3491 if (IdentityLeafs.
contains(FrontU)) {
3492 return FrontU->get();
3496 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3498 if (ConcatLeafs.
contains(FrontU)) {
3502 for (
unsigned S = 0; S < Values.
size(); ++S)
3503 Values[S] = Item[S * NumElts].first->get();
3505 while (Values.
size() > 1) {
3508 std::iota(Mask.begin(), Mask.end(), 0);
3510 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3512 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3520 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3522 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3525 Ops[Idx] =
II->getOperand(Idx);
3529 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3534 for (
const auto &Lane : Item)
3547 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3557 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3562 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3576bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3578 if (!Ty ||
I.use_empty())
3582 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3587 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3588 unsigned NumVisited = 0;
3590 while (!Worklist.
empty()) {
3595 auto [FrontU, FrontLane] = Item.
front();
3603 return X->getType() ==
Y->getType() &&
3608 if (FrontLane == 0 &&
3610 Ty->getNumElements() &&
3613 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3614 E.value().second == (int)
E.index());
3616 IdentityLeafs.
insert(FrontU);
3621 C &&
C->getSplatValue() &&
3629 SplatLeafs.
insert(FrontU);
3634 auto [FrontU, FrontLane] = Item.
front();
3635 auto [
U, Lane] = IL;
3636 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3638 SplatLeafs.
insert(FrontU);
3644 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3648 Value *
V = IL.first->get();
3654 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3657 if (CI->getSrcTy()->getScalarType() !=
3662 SI->getOperand(0)->getType() !=
3669 II->getIntrinsicID() ==
3671 !
II->hasOperandBundles());
3678 BO && BO->isIntDivRem())
3683 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3684 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3691 if (DstTy && SrcTy &&
3692 SrcTy->getNumElements() == DstTy->getNumElements()) {
3703 !
II->hasOperandBundles()) {
3704 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3723 ConcatLeafs.
insert(FrontU);
3730 if (NumVisited <= 1)
3733 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3739 ConcatLeafs, Builder, &
TTI);
3740 replaceValue(
I, *V);
3747bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3751 switch (
II->getIntrinsicID()) {
3752 case Intrinsic::vector_reduce_add:
3753 case Intrinsic::vector_reduce_mul:
3754 case Intrinsic::vector_reduce_and:
3755 case Intrinsic::vector_reduce_or:
3756 case Intrinsic::vector_reduce_xor:
3757 case Intrinsic::vector_reduce_smin:
3758 case Intrinsic::vector_reduce_smax:
3759 case Intrinsic::vector_reduce_umin:
3760 case Intrinsic::vector_reduce_umax:
3769 std::queue<Value *> Worklist;
3770 SmallPtrSet<Value *, 4> Visited;
3771 ShuffleVectorInst *Shuffle =
nullptr;
3775 while (!Worklist.empty()) {
3776 Value *CV = Worklist.front();
3788 if (CI->isBinaryOp()) {
3789 for (
auto *
Op : CI->operand_values())
3793 if (Shuffle && Shuffle != SV)
3810 for (
auto *V : Visited)
3811 for (
auto *U :
V->users())
3812 if (!Visited.contains(U) && U != &
I)
3815 FixedVectorType *VecType =
3819 FixedVectorType *ShuffleInputType =
3821 if (!ShuffleInputType)
3827 SmallVector<int> ConcatMask;
3829 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3830 bool UsesSecondVec =
3831 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3838 ShuffleInputType, ConcatMask,
CostKind);
3840 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3842 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3844 bool MadeChanges =
false;
3845 if (NewCost < OldCost) {
3849 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3850 replaceValue(*Shuffle, *NewShuffle);
3856 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3902bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3904 std::queue<Value *> InstWorklist;
3908 std::optional<unsigned int> CommonCallOp = std::nullopt;
3909 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3911 bool IsFirstCallOrBinInst =
true;
3912 bool ShouldBeCallOrBinInst =
true;
3918 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3928 int64_t
VecSize = FVT->getNumElements();
3934 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3935 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3945 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3946 Cur = (Cur + 1) / 2, --
Mask) {
3948 ExpectedParityMask |= (1ll <<
Mask);
3951 InstWorklist.push(VecOpEE);
3953 while (!InstWorklist.empty()) {
3954 Value *CI = InstWorklist.front();
3958 if (!ShouldBeCallOrBinInst)
3961 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
3966 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3968 IsFirstCallOrBinInst =
false;
3971 CommonCallOp =
II->getIntrinsicID();
3972 if (
II->getIntrinsicID() != *CommonCallOp)
3975 switch (
II->getIntrinsicID()) {
3976 case Intrinsic::umin:
3977 case Intrinsic::umax:
3978 case Intrinsic::smin:
3979 case Intrinsic::smax: {
3980 auto *Op0 =
II->getOperand(0);
3981 auto *Op1 =
II->getOperand(1);
3989 ShouldBeCallOrBinInst ^= 1;
3991 IntrinsicCostAttributes ICA(
3992 *CommonCallOp,
II->getType(),
3993 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4000 InstWorklist.push(PrevVecV[1]);
4001 InstWorklist.push(PrevVecV[0]);
4005 if (!ShouldBeCallOrBinInst)
4008 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4011 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4013 IsFirstCallOrBinInst =
false;
4021 switch (*CommonBinOp) {
4022 case BinaryOperator::Add:
4023 case BinaryOperator::Mul:
4024 case BinaryOperator::Or:
4025 case BinaryOperator::And:
4026 case BinaryOperator::Xor: {
4036 ShouldBeCallOrBinInst ^= 1;
4043 InstWorklist.push(PrevVecV[1]);
4044 InstWorklist.push(PrevVecV[0]);
4048 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4051 if (SVInst != PrevVecV[1])
4054 ArrayRef<int> CurMask;
4060 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4061 if (Mask < ShuffleMaskHalf &&
4062 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4064 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4069 ShuffleMaskHalf *= 2;
4070 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4071 ExpectedParityMask >>= 1;
4074 SVInst->getType(), SVInst->getType(),
4078 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4081 ShouldBeCallOrBinInst ^= 1;
4088 if (ShouldBeCallOrBinInst)
4091 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4093 Value *FinalVecV = PrevVecV[0];
4105 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
4108 if (NewCost >= OrigCost)
4111 auto *ReducedResult =
4113 replaceValue(
I, *ReducedResult);
4122bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4127 bool TruncOnly =
false;
4130 case Intrinsic::vector_reduce_add:
4131 case Intrinsic::vector_reduce_mul:
4134 case Intrinsic::vector_reduce_and:
4135 case Intrinsic::vector_reduce_or:
4136 case Intrinsic::vector_reduce_xor:
4143 Value *ReductionSrc =
I.getOperand(0);
4155 Type *ResultTy =
I.getType();
4158 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4168 if (OldCost <= NewCost || !NewCost.
isValid())
4172 II->getIntrinsicID(), {Src});
4174 replaceValue(
I, *NewCast);
4183 constexpr unsigned MaxVisited = 32;
4186 bool FoundReduction =
false;
4189 while (!WorkList.
empty()) {
4191 for (
User *U :
I->users()) {
4193 if (!UI || !Visited.
insert(UI).second)
4195 if (Visited.
size() > MaxVisited)
4201 switch (
II->getIntrinsicID()) {
4202 case Intrinsic::vector_reduce_add:
4203 case Intrinsic::vector_reduce_mul:
4204 case Intrinsic::vector_reduce_and:
4205 case Intrinsic::vector_reduce_or:
4206 case Intrinsic::vector_reduce_xor:
4207 case Intrinsic::vector_reduce_smin:
4208 case Intrinsic::vector_reduce_smax:
4209 case Intrinsic::vector_reduce_umin:
4210 case Intrinsic::vector_reduce_umax:
4211 FoundReduction =
true;
4224 return FoundReduction;
4237bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
4242 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
4250 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
4252 if (!
I ||
I->getOperand(0)->getType() != VT)
4254 return any_of(
I->users(), [&](User *U) {
4255 return U != Op0 && U != Op1 &&
4256 !(isa<ShuffleVectorInst>(U) &&
4257 (InputShuffles.contains(cast<Instruction>(U)) ||
4258 isInstructionTriviallyDead(cast<Instruction>(U))));
4261 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
4262 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
4270 for (
auto *U :
I->users()) {
4272 if (!SV || SV->getType() != VT)
4274 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
4275 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
4282 if (!collectShuffles(Op0) || !collectShuffles(Op1))
4286 if (FromReduction && Shuffles.
size() > 1)
4291 if (!FromReduction) {
4292 for (ShuffleVectorInst *SV : Shuffles) {
4293 for (
auto *U : SV->users()) {
4296 Shuffles.push_back(SSV);
4308 int MaxV1Elt = 0, MaxV2Elt = 0;
4309 unsigned NumElts = VT->getNumElements();
4310 for (ShuffleVectorInst *SVN : Shuffles) {
4311 SmallVector<int>
Mask;
4312 SVN->getShuffleMask(Mask);
4316 Value *SVOp0 = SVN->getOperand(0);
4317 Value *SVOp1 = SVN->getOperand(1);
4322 for (
int &Elem : Mask) {
4328 if (SVOp0 == Op1 && SVOp1 == Op0) {
4332 if (SVOp0 != Op0 || SVOp1 != Op1)
4338 SmallVector<int> ReconstructMask;
4339 for (
unsigned I = 0;
I <
Mask.size();
I++) {
4342 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
4343 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
4344 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
4345 return Mask[
I] ==
A.first;
4354 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
4355 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
4356 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
4370 sort(ReconstructMask);
4371 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
4379 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
4380 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
4392 if (InputShuffles.contains(SSV))
4394 return SV->getMaskValue(M);
4402 std::pair<int, int>
Y) {
4403 int MXA = GetBaseMaskValue(
A,
X.first);
4404 int MYA = GetBaseMaskValue(
A,
Y.first);
4407 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4408 return SortBase(SVI0A,
A,
B);
4410 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4411 return SortBase(SVI1A,
A,
B);
4416 for (
const auto &Mask : OrigReconstructMasks) {
4417 SmallVector<int> ReconstructMask;
4418 for (
int M : Mask) {
4420 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
4421 assert(It !=
V.end() &&
"Expected all entries in Mask");
4422 return std::distance(
V.begin(), It);
4426 else if (M <
static_cast<int>(NumElts)) {
4427 ReconstructMask.
push_back(FindIndex(V1, M));
4429 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
4432 ReconstructMasks.
push_back(std::move(ReconstructMask));
4437 SmallVector<int> V1A, V1B, V2A, V2B;
4438 for (
unsigned I = 0;
I < V1.
size();
I++) {
4439 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
4440 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
4442 for (
unsigned I = 0;
I < V2.
size();
I++) {
4443 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
4444 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
4446 while (V1A.
size() < NumElts) {
4450 while (V2A.
size() < NumElts) {
4462 VT, VT, SV->getShuffleMask(),
CostKind);
4469 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
4470 unsigned MaxVectorSize =
4472 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
4473 if (MaxElementsInVector == 0)
4482 std::set<SmallVector<int, 4>> UniqueShuffles;
4487 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
4488 if (NumFullVectors < 2)
4489 return C + ShuffleCost;
4490 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
4491 unsigned NumUniqueGroups = 0;
4492 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
4495 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
4496 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
4497 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
4498 if (UniqueShuffles.insert(SubShuffle).second)
4499 NumUniqueGroups += 1;
4501 return C + ShuffleCost * NumUniqueGroups / NumGroups;
4507 SmallVector<int, 16>
Mask;
4508 SV->getShuffleMask(Mask);
4509 return AddShuffleMaskAdjustedCost(
C, Mask);
4512 auto AllShufflesHaveSameOperands =
4513 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
4514 if (InputShuffles.size() < 2)
4516 ShuffleVectorInst *FirstSV =
4523 std::next(InputShuffles.begin()), InputShuffles.end(),
4524 [&](Instruction *
I) {
4525 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
4526 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
4535 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
4537 if (AllShufflesHaveSameOperands(InputShuffles)) {
4538 UniqueShuffles.clear();
4539 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4542 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4548 FixedVectorType *Op0SmallVT =
4550 FixedVectorType *Op1SmallVT =
4555 UniqueShuffles.clear();
4556 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
4558 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
4560 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
4563 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
4565 <<
" vs CostAfter: " << CostAfter <<
"\n");
4566 if (CostBefore < CostAfter ||
4577 if (InputShuffles.contains(SSV))
4579 return SV->getOperand(
Op);
4583 GetShuffleOperand(SVI0A, 1), V1A);
4586 GetShuffleOperand(SVI0B, 1), V1B);
4589 GetShuffleOperand(SVI1A, 1), V2A);
4592 GetShuffleOperand(SVI1B, 1), V2B);
4597 I->copyIRFlags(Op0,
true);
4602 I->copyIRFlags(Op1,
true);
4604 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4607 replaceValue(*Shuffles[S], *NSV,
false);
4610 Worklist.pushValue(NSV0A);
4611 Worklist.pushValue(NSV0B);
4612 Worklist.pushValue(NSV1A);
4613 Worklist.pushValue(NSV1B);
4623bool VectorCombine::shrinkType(Instruction &
I) {
4624 Value *ZExted, *OtherOperand;
4630 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4634 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4636 if (
I.getOpcode() == Instruction::LShr) {
4653 Instruction::ZExt, BigTy, SmallTy,
4654 TargetTransformInfo::CastContextHint::None,
CostKind);
4659 for (User *U : ZExtOperand->
users()) {
4666 ShrinkCost += ZExtCost;
4681 ShrinkCost += ZExtCost;
4688 Instruction::Trunc, SmallTy, BigTy,
4689 TargetTransformInfo::CastContextHint::None,
CostKind);
4694 if (ShrinkCost > CurrentCost)
4698 Value *Op0 = ZExted;
4701 if (
I.getOperand(0) == OtherOperand)
4708 replaceValue(
I, *NewZExtr);
4714bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4715 Value *DstVec, *SrcVec;
4716 uint64_t ExtIdx, InsIdx;
4726 if (!DstVecTy || !SrcVecTy ||
4732 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4739 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4741 if (NeedDstSrcSwap) {
4743 Mask[InsIdx] = ExtIdx % NumDstElts;
4747 std::iota(
Mask.begin(),
Mask.end(), 0);
4748 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
4761 SmallVector<int> ExtToVecMask;
4762 if (!NeedExpOrNarrow) {
4767 nullptr, {DstVec, SrcVec});
4773 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
4776 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4780 if (!Ext->hasOneUse())
4783 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4784 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4787 if (OldCost < NewCost)
4790 if (NeedExpOrNarrow) {
4791 if (!NeedDstSrcSwap)
4804 replaceValue(
I, *Shuf);
4813bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4814 const APInt *SplatVal0, *SplatVal1;
4824 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4825 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4834 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4835 << *
I.getType() <<
" is too high.\n");
4839 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4840 NewSplatVal <<= Width;
4841 NewSplatVal |= SplatVal0->
zext(Width * 2);
4843 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4851bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4853 if (!OldLoad || !OldLoad->isSimple())
4860 unsigned const OldNumElements = OldLoadTy->getNumElements();
4866 using IndexRange = std::pair<int, int>;
4867 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4868 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4869 for (llvm::Use &Use :
I.uses()) {
4871 User *Shuffle =
Use.getUser();
4876 return std::nullopt;
4883 for (
int Index : Mask) {
4884 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4885 OutputRange.first = std::min(Index, OutputRange.first);
4886 OutputRange.second = std::max(Index, OutputRange.second);
4891 if (OutputRange.second < OutputRange.first)
4892 return std::nullopt;
4898 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4899 unsigned const NewNumElements = Indices->second + 1u;
4903 if (NewNumElements < OldNumElements) {
4908 Type *ElemTy = OldLoadTy->getElementType();
4910 Value *PtrOp = OldLoad->getPointerOperand();
4913 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4914 OldLoad->getPointerAddressSpace(),
CostKind);
4917 OldLoad->getPointerAddressSpace(),
CostKind);
4919 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4921 unsigned const MaxIndex = NewNumElements * 2u;
4923 for (llvm::Use &Use :
I.uses()) {
4925 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4931 for (
int Index : OldMask) {
4932 if (Index >=
static_cast<int>(MaxIndex))
4946 dbgs() <<
"Found a load used only by shufflevector instructions: "
4947 <<
I <<
"\n OldCost: " << OldCost
4948 <<
" vs NewCost: " << NewCost <<
"\n");
4950 if (OldCost < NewCost || !NewCost.
isValid())
4956 NewLoad->copyMetadata(
I);
4959 for (UseEntry &Use : NewUses) {
4960 ShuffleVectorInst *Shuffle =
Use.first;
4961 std::vector<int> &NewMask =
Use.second;
4968 replaceValue(*Shuffle, *NewShuffle,
false);
4981bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4983 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4987 ArrayRef<int> Mask0;
4988 ArrayRef<int> Mask1;
5001 auto const InputNumElements = InputVT->getNumElements();
5003 if (InputNumElements >= ResultVT->getNumElements())
5008 SmallVector<int, 16> NewMask;
5011 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
5012 if (
M0 >= 0 &&
M1 >= 0)
5014 else if (
M0 == -1 &&
M1 == -1)
5027 int MaskOffset = NewMask[0
u];
5028 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
5031 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
5045 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5048 if (NewCost > OldCost)
5060 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
5062 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
5068 replaceValue(*Phi, *NewShuf1);
5074bool VectorCombine::run() {
5088 auto Opcode =
I.getOpcode();
5096 if (IsFixedVectorType) {
5098 case Instruction::InsertElement:
5099 if (vectorizeLoadInsert(
I))
5102 case Instruction::ShuffleVector:
5103 if (widenSubvectorLoad(
I))
5114 if (scalarizeOpOrCmp(
I))
5116 if (scalarizeLoad(
I))
5118 if (scalarizeExtExtract(
I))
5120 if (scalarizeVPIntrinsic(
I))
5122 if (foldInterleaveIntrinsics(
I))
5126 if (Opcode == Instruction::Store)
5127 if (foldSingleElementStore(
I))
5131 if (TryEarlyFoldsOnly)
5138 if (IsFixedVectorType) {
5140 case Instruction::InsertElement:
5141 if (foldInsExtFNeg(
I))
5143 if (foldInsExtBinop(
I))
5145 if (foldInsExtVectorToShuffle(
I))
5148 case Instruction::ShuffleVector:
5149 if (foldPermuteOfBinops(
I))
5151 if (foldShuffleOfBinops(
I))
5153 if (foldShuffleOfSelects(
I))
5155 if (foldShuffleOfCastops(
I))
5157 if (foldShuffleOfShuffles(
I))
5159 if (foldPermuteOfIntrinsic(
I))
5161 if (foldShufflesOfLengthChangingShuffles(
I))
5163 if (foldShuffleOfIntrinsics(
I))
5165 if (foldSelectShuffle(
I))
5167 if (foldShuffleToIdentity(
I))
5170 case Instruction::Load:
5171 if (shrinkLoadForShuffles(
I))
5174 case Instruction::BitCast:
5175 if (foldBitcastShuffle(
I))
5177 if (foldSelectsFromBitcast(
I))
5180 case Instruction::And:
5181 case Instruction::Or:
5182 case Instruction::Xor:
5183 if (foldBitOpOfCastops(
I))
5185 if (foldBitOpOfCastConstant(
I))
5188 case Instruction::PHI:
5189 if (shrinkPhiOfShuffles(
I))
5199 case Instruction::Call:
5200 if (foldShuffleFromReductions(
I))
5202 if (foldCastFromReductions(
I))
5205 case Instruction::ExtractElement:
5206 if (foldShuffleChainsToReduce(
I))
5209 case Instruction::ICmp:
5210 case Instruction::FCmp:
5211 if (foldExtractExtract(
I))
5214 case Instruction::Or:
5215 if (foldConcatOfBoolMasks(
I))
5220 if (foldExtractExtract(
I))
5222 if (foldExtractedCmps(
I))
5224 if (foldBinopOfReductions(
I))
5233 bool MadeChange =
false;
5234 for (BasicBlock &BB :
F) {
5246 if (!
I->isDebugOrPseudoInst())
5247 MadeChange |= FoldInst(*
I);
5254 while (!Worklist.isEmpty()) {
5264 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.