43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoad(Instruction &
I);
133 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
134 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
135 bool scalarizeExtExtract(Instruction &
I);
136 bool foldConcatOfBoolMasks(Instruction &
I);
137 bool foldPermuteOfBinops(Instruction &
I);
138 bool foldShuffleOfBinops(Instruction &
I);
139 bool foldShuffleOfSelects(Instruction &
I);
140 bool foldShuffleOfCastops(Instruction &
I);
141 bool foldShuffleOfShuffles(Instruction &
I);
142 bool foldShuffleOfIntrinsics(Instruction &
I);
143 bool foldShuffleToIdentity(Instruction &
I);
144 bool foldShuffleFromReductions(Instruction &
I);
145 bool foldShuffleChainsToReduce(Instruction &
I);
146 bool foldCastFromReductions(Instruction &
I);
147 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
148 bool foldInterleaveIntrinsics(Instruction &
I);
149 bool shrinkType(Instruction &
I);
150 bool shrinkLoadForShuffles(Instruction &
I);
151 bool shrinkPhiOfShuffles(Instruction &
I);
153 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
159 Worklist.pushUsersToWorkList(*NewI);
160 Worklist.pushValue(NewI);
177 SmallPtrSet<Value *, 4> Visited;
182 OpI,
nullptr,
nullptr, [&](
Value *V) {
187 NextInst = NextInst->getNextNode();
192 Worklist.pushUsersToWorkList(*OpI);
193 Worklist.pushValue(OpI);
213 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
214 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
220 Type *ScalarTy = Load->getType()->getScalarType();
222 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
223 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
230bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
256 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
259 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
260 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
261 unsigned OffsetEltIndex = 0;
269 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
270 APInt
Offset(OffsetBitWidth, 0);
280 uint64_t ScalarSizeInBytes = ScalarSize / 8;
281 if (
Offset.urem(ScalarSizeInBytes) != 0)
285 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
286 if (OffsetEltIndex >= MinVecNumElts)
303 unsigned AS =
Load->getPointerAddressSpace();
322 unsigned OutputNumElts = Ty->getNumElements();
324 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
325 Mask[0] = OffsetEltIndex;
332 if (OldCost < NewCost || !NewCost.
isValid())
343 replaceValue(
I, *VecLd);
351bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
354 if (!Shuf->isIdentityWithPadding())
360 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
361 return M >= (int)(NumOpElts);
372 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
380 unsigned AS =
Load->getPointerAddressSpace();
395 if (OldCost < NewCost || !NewCost.
isValid())
402 replaceValue(
I, *VecLd);
409ExtractElementInst *VectorCombine::getShuffleExtract(
410 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
414 assert(Index0C && Index1C &&
"Expected constant extract indexes");
416 unsigned Index0 = Index0C->getZExtValue();
417 unsigned Index1 = Index1C->getZExtValue();
420 if (Index0 == Index1)
444 if (PreferredExtractIndex == Index0)
446 if (PreferredExtractIndex == Index1)
450 return Index0 > Index1 ? Ext0 : Ext1;
458bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
459 ExtractElementInst *Ext1,
460 const Instruction &
I,
461 ExtractElementInst *&ConvertToShuffle,
462 unsigned PreferredExtractIndex) {
465 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
467 unsigned Opcode =
I.getOpcode();
480 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
481 "Expected a compare");
491 unsigned Ext0Index = Ext0IndexC->getZExtValue();
492 unsigned Ext1Index = Ext1IndexC->getZExtValue();
506 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
507 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
508 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
513 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
518 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
520 OldCost = CheapExtractCost + ScalarOpCost;
521 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
525 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
526 NewCost = VectorOpCost + CheapExtractCost +
531 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
532 if (ConvertToShuffle) {
544 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
546 ShuffleMask[BestInsIndex] = BestExtIndex;
548 VecTy, VecTy, ShuffleMask,
CostKind, 0,
549 nullptr, {ConvertToShuffle});
552 VecTy, VecTy, {},
CostKind, 0,
nullptr,
560 return OldCost < NewCost;
572 ShufMask[NewIndex] = OldIndex;
573 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
625 V1,
"foldExtExtBinop");
630 VecBOInst->copyIRFlags(&
I);
636bool VectorCombine::foldExtractExtract(Instruction &
I) {
667 ExtractElementInst *ExtractToChange;
668 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
674 if (ExtractToChange) {
675 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
680 if (ExtractToChange == Ext0)
689 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
690 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
693 replaceValue(
I, *NewExt);
699bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
702 uint64_t ExtIdx, InsIdx;
717 auto *DstVecScalarTy = DstVecTy->getScalarType();
719 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
724 unsigned NumDstElts = DstVecTy->getNumElements();
725 unsigned NumSrcElts = SrcVecTy->getNumElements();
726 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
732 SmallVector<int>
Mask(NumDstElts);
733 std::iota(
Mask.begin(),
Mask.end(), 0);
734 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
750 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
753 SmallVector<int> SrcMask;
756 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
758 DstVecTy, SrcVecTy, SrcMask,
CostKind);
762 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
764 if (NewCost > OldCost)
767 Value *NewShuf, *LenChgShuf =
nullptr;
781 replaceValue(
I, *NewShuf);
787bool VectorCombine::foldInsExtBinop(Instruction &
I) {
788 BinaryOperator *VecBinOp, *SclBinOp;
820 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
822 if (NewCost > OldCost)
833 NewInst->copyIRFlags(VecBinOp);
834 NewInst->andIRFlags(SclBinOp);
839 replaceValue(
I, *NewBO);
845bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
848 if (!BinOp || !BinOp->isBitwiseLogicOp())
854 if (!LHSCast || !RHSCast) {
855 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
861 if (CastOpcode != RHSCast->getOpcode())
865 switch (CastOpcode) {
866 case Instruction::BitCast:
867 case Instruction::Trunc:
868 case Instruction::SExt:
869 case Instruction::ZExt:
875 Value *LHSSrc = LHSCast->getOperand(0);
876 Value *RHSSrc = RHSCast->getOperand(0);
882 auto *SrcTy = LHSSrc->
getType();
883 auto *DstTy =
I.getType();
886 if (CastOpcode != Instruction::BitCast &&
891 if (!SrcTy->getScalarType()->isIntegerTy() ||
892 !DstTy->getScalarType()->isIntegerTy())
907 LHSCastCost + RHSCastCost;
918 if (!LHSCast->hasOneUse())
919 NewCost += LHSCastCost;
920 if (!RHSCast->hasOneUse())
921 NewCost += RHSCastCost;
924 <<
" NewCost=" << NewCost <<
"\n");
926 if (NewCost > OldCost)
931 BinOp->getName() +
".inner");
933 NewBinOp->copyIRFlags(BinOp);
947 replaceValue(
I, *Result);
956bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
972 switch (CastOpcode) {
973 case Instruction::BitCast:
974 case Instruction::ZExt:
975 case Instruction::SExt:
976 case Instruction::Trunc:
982 Value *LHSSrc = LHSCast->getOperand(0);
984 auto *SrcTy = LHSSrc->
getType();
985 auto *DstTy =
I.getType();
988 if (CastOpcode != Instruction::BitCast &&
993 if (!SrcTy->getScalarType()->isIntegerTy() ||
994 !DstTy->getScalarType()->isIntegerTy())
998 PreservedCastFlags RHSFlags;
1023 if (!LHSCast->hasOneUse())
1024 NewCost += LHSCastCost;
1026 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1027 <<
" NewCost=" << NewCost <<
"\n");
1029 if (NewCost > OldCost)
1034 LHSSrc, InvC,
I.getName() +
".inner");
1036 NewBinOp->copyIRFlags(&
I);
1056 replaceValue(
I, *Result);
1063bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1077 if (!DestTy || !SrcTy)
1080 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1081 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1082 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1092 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1093 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1097 SmallVector<int, 16> NewMask;
1098 if (DestEltSize <= SrcEltSize) {
1101 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1102 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1107 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1108 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1115 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1116 auto *NewShuffleTy =
1118 auto *OldShuffleTy =
1120 unsigned NumOps = IsUnary ? 1 : 2;
1130 TargetTransformInfo::CastContextHint::None,
1135 TargetTransformInfo::CastContextHint::None,
1138 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1139 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1141 if (NewCost > OldCost || !NewCost.
isValid())
1149 replaceValue(
I, *Shuf);
1156bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1170 if (!ScalarOp0 || !ScalarOp1)
1178 auto IsAllTrueMask = [](
Value *MaskVal) {
1181 return ConstValue->isAllOnesValue();
1195 SmallVector<int>
Mask;
1197 Mask.resize(FVTy->getNumElements(), 0);
1206 Args.push_back(
V->getType());
1207 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1212 std::optional<unsigned> FunctionalOpcode =
1214 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1215 if (!FunctionalOpcode) {
1224 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1234 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1236 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1239 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1242 if (OldCost < NewCost || !NewCost.
isValid())
1253 bool SafeToSpeculate;
1259 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1260 if (!SafeToSpeculate &&
1267 {ScalarOp0, ScalarOp1})
1269 ScalarOp0, ScalarOp1);
1278bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1283 if (!UO && !BO && !CI && !
II)
1291 if (Arg->getType() !=
II->getType() &&
1301 for (User *U :
I.users())
1308 std::optional<uint64_t>
Index;
1310 auto Ops =
II ?
II->args() :
I.operands();
1314 uint64_t InsIdx = 0;
1319 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1325 else if (InsIdx != *Index)
1342 if (!
Index.has_value())
1346 Type *ScalarTy = VecTy->getScalarType();
1347 assert(VecTy->isVectorTy() &&
1350 "Unexpected types for insert element into binop or cmp");
1352 unsigned Opcode =
I.getOpcode();
1360 }
else if (UO || BO) {
1364 IntrinsicCostAttributes ScalarICA(
1365 II->getIntrinsicID(), ScalarTy,
1368 IntrinsicCostAttributes VectorICA(
1369 II->getIntrinsicID(), VecTy,
1376 Value *NewVecC =
nullptr;
1378 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1381 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1383 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1397 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1399 II->getIntrinsicID(), Idx, &
TTI)))
1402 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1403 OldCost += InsertCost;
1404 NewCost += !
Op->hasOneUse() * InsertCost;
1408 if (OldCost < NewCost || !NewCost.
isValid())
1418 ++NumScalarIntrinsic;
1428 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1434 Scalar->setName(
I.getName() +
".scalar");
1439 ScalarInst->copyIRFlags(&
I);
1442 replaceValue(
I, *Insert);
1449bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1454 if (!BI || !
I.getType()->isIntegerTy(1))
1459 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1462 CmpPredicate
P0,
P1;
1474 uint64_t Index0, Index1;
1481 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1484 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1485 "Unknown ExtractElementInst");
1490 unsigned CmpOpcode =
1505 Ext0Cost + Ext1Cost + CmpCost * 2 +
1511 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1512 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1517 ShufMask[CheapIndex] = ExpensiveIndex;
1522 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1523 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1528 if (OldCost < NewCost || !NewCost.
isValid())
1538 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1539 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1542 replaceValue(
I, *NewExt);
1555 unsigned ReductionOpc =
1561 CostBeforeReduction =
1562 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1564 CostAfterReduction =
1565 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1569 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1575 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1582 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1585 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1587 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1590 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1591 CostAfterReduction =
TTI.getMulAccReductionCost(
1592 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1595 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1599bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1602 if (BinOpOpc == Instruction::Sub)
1603 ReductionIID = Intrinsic::vector_reduce_add;
1607 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1612 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1613 return II->getArgOperand(0);
1617 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1620 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1629 unsigned ReductionOpc =
1642 CostOfRedOperand0 + CostOfRedOperand1 +
1645 if (NewCost >= OldCost || !NewCost.
isValid())
1649 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1652 if (BinOpOpc == Instruction::Or)
1653 VectorBO = Builder.
CreateOr(V0, V1,
"",
1659 replaceValue(
I, *Rdx);
1667 unsigned NumScanned = 0;
1668 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1677class ScalarizationResult {
1678 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1683 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1684 : Status(Status), ToFreeze(ToFreeze) {}
1687 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1688 ~ScalarizationResult() {
1689 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1692 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1693 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1694 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1695 return {StatusTy::SafeWithFreeze, ToFreeze};
1699 bool isSafe()
const {
return Status == StatusTy::Safe; }
1701 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1704 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1709 Status = StatusTy::Unsafe;
1713 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1714 assert(isSafeWithFreeze() &&
1715 "should only be used when freezing is required");
1717 "UserI must be a user of ToFreeze");
1718 IRBuilder<>::InsertPointGuard Guard(Builder);
1723 if (
U.get() == ToFreeze)
1740 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1744 if (
C->getValue().ult(NumElements))
1745 return ScalarizationResult::safe();
1746 return ScalarizationResult::unsafe();
1751 return ScalarizationResult::unsafe();
1753 APInt Zero(IntWidth, 0);
1754 APInt MaxElts(IntWidth, NumElements);
1760 true, &AC, CtxI, &DT)))
1761 return ScalarizationResult::safe();
1762 return ScalarizationResult::unsafe();
1775 if (ValidIndices.
contains(IdxRange))
1776 return ScalarizationResult::safeWithFreeze(IdxBase);
1777 return ScalarizationResult::unsafe();
1789 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1801bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1813 if (!
match(
SI->getValueOperand(),
1820 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1823 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1824 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1825 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1829 if (ScalarizableIdx.isUnsafe() ||
1836 Worklist.
push(Load);
1838 if (ScalarizableIdx.isSafeWithFreeze())
1841 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1842 {ConstantInt::get(Idx->getType(), 0), Idx});
1846 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1849 replaceValue(
I, *NSI);
1859bool VectorCombine::scalarizeLoad(Instruction &
I) {
1866 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1869 bool AllExtracts =
true;
1870 bool AllBitcasts =
true;
1872 unsigned NumInstChecked = 0;
1877 for (User *U : LI->users()) {
1879 if (!UI || UI->getParent() != LI->getParent())
1884 if (UI->use_empty())
1888 AllExtracts =
false;
1890 AllBitcasts =
false;
1894 for (Instruction &
I :
1895 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1902 LastCheckedInst = UI;
1907 return scalarizeLoadExtract(LI, VecTy, Ptr);
1909 return scalarizeLoadBitcast(LI, VecTy, Ptr);
1914bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
1919 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1922 for (
auto &Pair : NeedFreeze)
1923 Pair.second.discard();
1931 for (User *U : LI->
users()) {
1936 if (ScalarIdx.isUnsafe())
1938 if (ScalarIdx.isSafeWithFreeze()) {
1939 NeedFreeze.try_emplace(UI, ScalarIdx);
1940 ScalarIdx.discard();
1946 Index ?
Index->getZExtValue() : -1);
1954 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
1955 <<
"\n LoadExtractCost: " << OriginalCost
1956 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1958 if (ScalarizedCost >= OriginalCost)
1965 Type *ElemType = VecTy->getElementType();
1968 for (User *U : LI->
users()) {
1970 Value *Idx = EI->getIndexOperand();
1973 auto It = NeedFreeze.find(EI);
1974 if (It != NeedFreeze.end())
1981 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1983 Align ScalarOpAlignment =
1985 NewLoad->setAlignment(ScalarOpAlignment);
1988 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1993 replaceValue(*EI, *NewLoad,
false);
1996 FailureGuard.release();
2001bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2007 Type *TargetScalarType =
nullptr;
2008 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2010 for (User *U : LI->
users()) {
2013 Type *DestTy = BC->getDestTy();
2017 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2018 if (DestBitWidth != VecBitWidth)
2022 if (!TargetScalarType)
2023 TargetScalarType = DestTy;
2024 else if (TargetScalarType != DestTy)
2032 if (!TargetScalarType)
2040 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2041 <<
"\n OriginalCost: " << OriginalCost
2042 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2044 if (ScalarizedCost >= OriginalCost)
2055 ScalarLoad->copyMetadata(*LI);
2058 for (User *U : LI->
users()) {
2060 replaceValue(*BC, *ScalarLoad,
false);
2066bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2081 Type *ScalarDstTy = DstTy->getElementType();
2082 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2088 unsigned ExtCnt = 0;
2089 bool ExtLane0 =
false;
2090 for (User *U :
Ext->users()) {
2104 Instruction::And, ScalarDstTy,
CostKind,
2107 (ExtCnt - ExtLane0) *
2109 Instruction::LShr, ScalarDstTy,
CostKind,
2112 if (ScalarCost > VectorCost)
2115 Value *ScalarV =
Ext->getOperand(0);
2122 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2123 bool AllExtractsTriggerUB =
true;
2124 ExtractElementInst *LastExtract =
nullptr;
2126 for (User *U :
Ext->users()) {
2129 AllExtractsTriggerUB =
false;
2133 if (!LastExtract || LastExtract->
comesBefore(Extract))
2134 LastExtract = Extract;
2136 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2137 !AllExtractsTriggerUB ||
2145 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2146 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2147 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2149 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2150 for (User *U :
Ext->users()) {
2156 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2157 : (Idx * SrcEltSizeInBits);
2160 U->replaceAllUsesWith(
And);
2168bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2169 Type *Ty =
I.getType();
2174 if (
DL->isBigEndian())
2185 uint64_t ShAmtX = 0;
2193 uint64_t ShAmtY = 0;
2201 if (ShAmtX > ShAmtY) {
2209 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2210 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2215 MaskTy->getNumElements() != ShAmtDiff ||
2216 MaskTy->getNumElements() > (
BitWidth / 2))
2221 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2222 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2225 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2242 if (Ty != ConcatIntTy)
2248 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2249 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2252 if (NewCost > OldCost)
2262 if (Ty != ConcatIntTy) {
2272 replaceValue(
I, *Result);
2278bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2279 BinaryOperator *BinOp;
2280 ArrayRef<int> OuterMask;
2289 Value *Op00, *Op01, *Op10, *Op11;
2290 ArrayRef<int> Mask0, Mask1;
2297 if (!Match0 && !Match1)
2310 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2313 unsigned NumSrcElts = BinOpTy->getNumElements();
2318 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2322 SmallVector<int> NewMask0, NewMask1;
2323 for (
int M : OuterMask) {
2324 if (M < 0 || M >= (
int)NumSrcElts) {
2328 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2329 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2333 unsigned NumOpElts = Op0Ty->getNumElements();
2334 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2335 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2337 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2338 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2345 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2361 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2365 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2367 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2368 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2372 if (NewCost > OldCost)
2383 NewInst->copyIRFlags(BinOp);
2387 replaceValue(
I, *NewBO);
2393bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2394 ArrayRef<int> OldMask;
2401 if (
LHS->getOpcode() !=
RHS->getOpcode())
2405 bool IsCommutative =
false;
2414 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2425 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2428 unsigned NumSrcElts = BinOpTy->getNumElements();
2431 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2434 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2435 if (M >= (
int)NumSrcElts)
2439 SmallVector<int> NewMask0(OldMask);
2447 SmallVector<int> NewMask1(OldMask);
2470 ArrayRef<int> InnerMask;
2472 m_Mask(InnerMask)))) &&
2475 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2487 bool ReducedInstCount =
false;
2488 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2489 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2490 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2491 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2493 auto *ShuffleCmpTy =
2510 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2517 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2525 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2529 NewInst->copyIRFlags(
LHS);
2530 NewInst->andIRFlags(
RHS);
2535 replaceValue(
I, *NewBO);
2542bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2544 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2553 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2559 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2560 ((SI0FOp !=
nullptr) &&
2561 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2567 auto SelOp = Instruction::Select;
2574 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2578 Mask,
CostKind, 0,
nullptr, {C1, C2});
2584 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2589 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2591 if (NewCost > OldCost)
2600 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2601 SI0FOp->getFastMathFlags());
2603 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2608 replaceValue(
I, *NewSel);
2614bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2616 ArrayRef<int> OldMask;
2625 if (!C0 || (IsBinaryShuffle && !C1))
2632 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2635 if (IsBinaryShuffle) {
2636 if (C0->getSrcTy() != C1->getSrcTy())
2639 if (Opcode != C1->getOpcode()) {
2641 Opcode = Instruction::SExt;
2650 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2653 unsigned NumSrcElts = CastSrcTy->getNumElements();
2654 unsigned NumDstElts = CastDstTy->getNumElements();
2655 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2656 "Only bitcasts expected to alter src/dst element counts");
2660 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2661 (NumDstElts % NumSrcElts) != 0)
2664 SmallVector<int, 16> NewMask;
2665 if (NumSrcElts >= NumDstElts) {
2668 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2669 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2674 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2675 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2680 auto *NewShuffleDstTy =
2689 if (IsBinaryShuffle)
2704 if (IsBinaryShuffle) {
2714 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2716 if (NewCost > OldCost)
2720 if (IsBinaryShuffle)
2730 NewInst->copyIRFlags(C0);
2731 if (IsBinaryShuffle)
2732 NewInst->andIRFlags(C1);
2736 replaceValue(
I, *Cast);
2746bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2747 ArrayRef<int> OuterMask;
2748 Value *OuterV0, *OuterV1;
2753 ArrayRef<int> InnerMask0, InnerMask1;
2754 Value *X0, *X1, *Y0, *Y1;
2759 if (!Match0 && !Match1)
2764 SmallVector<int, 16> PoisonMask1;
2769 InnerMask1 = PoisonMask1;
2773 X0 = Match0 ? X0 : OuterV0;
2774 Y0 = Match0 ? Y0 : OuterV0;
2775 X1 = Match1 ? X1 : OuterV1;
2776 Y1 = Match1 ? Y1 : OuterV1;
2780 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2784 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2785 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2790 SmallVector<int, 16> NewMask(OuterMask);
2791 Value *NewX =
nullptr, *NewY =
nullptr;
2792 for (
int &M : NewMask) {
2793 Value *Src =
nullptr;
2794 if (0 <= M && M < (
int)NumImmElts) {
2798 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2799 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2801 }
else if (M >= (
int)NumImmElts) {
2806 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2807 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2811 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2820 if (!NewX || NewX == Src) {
2824 if (!NewY || NewY == Src) {
2840 replaceValue(
I, *NewX);
2857 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2863 nullptr, {NewX, NewY});
2865 NewCost += InnerCost0;
2867 NewCost += InnerCost1;
2870 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2872 if (NewCost > OldCost)
2876 replaceValue(
I, *Shuf);
2882bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
2884 ArrayRef<int> OldMask;
2895 if (IID != II1->getIntrinsicID())
2900 if (!ShuffleDstTy || !II0Ty)
2906 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2908 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2915 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2919 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
2921 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2925 ShuffleDstTy->getNumElements());
2929 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
2932 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2936 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2939 if (NewCost > OldCost)
2943 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2948 II1->getArgOperand(
I), OldMask);
2956 NewInst->copyIRFlags(II0);
2957 NewInst->andIRFlags(II1);
2960 replaceValue(
I, *NewIntrinsic);
2970 int M = SV->getMaskValue(Lane);
2973 if (
static_cast<unsigned>(M) < NumElts) {
2974 U = &SV->getOperandUse(0);
2977 U = &SV->getOperandUse(1);
2988 auto [U, Lane] = IL;
3002 unsigned NumElts = Ty->getNumElements();
3003 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3009 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3015 unsigned NumSlices = Item.
size() / NumElts;
3020 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3021 Use *SliceV = Item[Slice * NumElts].first;
3022 if (!SliceV || SliceV->get()->
getType() != Ty)
3024 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3025 auto [V, Lane] = Item[Slice * NumElts + Elt];
3026 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
3039 auto [FrontU, FrontLane] = Item.
front();
3041 if (IdentityLeafs.
contains(FrontU)) {
3042 return FrontU->get();
3046 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3048 if (ConcatLeafs.
contains(FrontU)) {
3052 for (
unsigned S = 0; S < Values.
size(); ++S)
3053 Values[S] = Item[S * NumElts].first->get();
3055 while (Values.
size() > 1) {
3058 std::iota(Mask.begin(), Mask.end(), 0);
3060 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3062 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3070 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3072 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3075 Ops[Idx] =
II->getOperand(Idx);
3079 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3084 for (
const auto &Lane : Item)
3097 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3107 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3112 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3126bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3128 if (!Ty ||
I.use_empty())
3132 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3137 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3138 unsigned NumVisited = 0;
3140 while (!Worklist.
empty()) {
3145 auto [FrontU, FrontLane] = Item.
front();
3153 return X->getType() ==
Y->getType() &&
3158 if (FrontLane == 0 &&
3160 Ty->getNumElements() &&
3163 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3164 E.value().second == (int)
E.index());
3166 IdentityLeafs.
insert(FrontU);
3171 C &&
C->getSplatValue() &&
3179 SplatLeafs.
insert(FrontU);
3184 auto [FrontU, FrontLane] = Item.
front();
3185 auto [
U, Lane] = IL;
3186 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3188 SplatLeafs.
insert(FrontU);
3194 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3198 Value *
V = IL.first->get();
3204 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3207 if (CI->getSrcTy()->getScalarType() !=
3212 SI->getOperand(0)->getType() !=
3219 II->getIntrinsicID() ==
3221 !
II->hasOperandBundles());
3228 BO && BO->isIntDivRem())
3233 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3234 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3241 if (DstTy && SrcTy &&
3242 SrcTy->getNumElements() == DstTy->getNumElements()) {
3253 !
II->hasOperandBundles()) {
3254 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3273 ConcatLeafs.
insert(FrontU);
3280 if (NumVisited <= 1)
3283 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3289 ConcatLeafs, Builder, &
TTI);
3290 replaceValue(
I, *V);
3297bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3301 switch (
II->getIntrinsicID()) {
3302 case Intrinsic::vector_reduce_add:
3303 case Intrinsic::vector_reduce_mul:
3304 case Intrinsic::vector_reduce_and:
3305 case Intrinsic::vector_reduce_or:
3306 case Intrinsic::vector_reduce_xor:
3307 case Intrinsic::vector_reduce_smin:
3308 case Intrinsic::vector_reduce_smax:
3309 case Intrinsic::vector_reduce_umin:
3310 case Intrinsic::vector_reduce_umax:
3319 std::queue<Value *> Worklist;
3320 SmallPtrSet<Value *, 4> Visited;
3321 ShuffleVectorInst *Shuffle =
nullptr;
3325 while (!Worklist.empty()) {
3326 Value *CV = Worklist.front();
3338 if (CI->isBinaryOp()) {
3339 for (
auto *
Op : CI->operand_values())
3343 if (Shuffle && Shuffle != SV)
3360 for (
auto *V : Visited)
3361 for (
auto *U :
V->users())
3362 if (!Visited.contains(U) && U != &
I)
3365 FixedVectorType *VecType =
3369 FixedVectorType *ShuffleInputType =
3371 if (!ShuffleInputType)
3377 SmallVector<int> ConcatMask;
3379 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3380 bool UsesSecondVec =
3381 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3388 ShuffleInputType, ConcatMask,
CostKind);
3390 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3392 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3394 bool MadeChanges =
false;
3395 if (NewCost < OldCost) {
3399 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3400 replaceValue(*Shuffle, *NewShuffle);
3406 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3452bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3454 std::queue<Value *> InstWorklist;
3458 std::optional<unsigned int> CommonCallOp = std::nullopt;
3459 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3461 bool IsFirstCallOrBinInst =
true;
3462 bool ShouldBeCallOrBinInst =
true;
3468 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3478 int64_t
VecSize = FVT->getNumElements();
3484 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3485 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3495 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3496 Cur = (Cur + 1) / 2, --
Mask) {
3498 ExpectedParityMask |= (1ll <<
Mask);
3501 InstWorklist.push(VecOpEE);
3503 while (!InstWorklist.empty()) {
3504 Value *CI = InstWorklist.front();
3508 if (!ShouldBeCallOrBinInst)
3511 if (!IsFirstCallOrBinInst &&
3512 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3517 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3519 IsFirstCallOrBinInst =
false;
3522 CommonCallOp =
II->getIntrinsicID();
3523 if (
II->getIntrinsicID() != *CommonCallOp)
3526 switch (
II->getIntrinsicID()) {
3527 case Intrinsic::umin:
3528 case Intrinsic::umax:
3529 case Intrinsic::smin:
3530 case Intrinsic::smax: {
3531 auto *Op0 =
II->getOperand(0);
3532 auto *Op1 =
II->getOperand(1);
3540 ShouldBeCallOrBinInst ^= 1;
3542 IntrinsicCostAttributes ICA(
3543 *CommonCallOp,
II->getType(),
3544 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3551 InstWorklist.push(PrevVecV[1]);
3552 InstWorklist.push(PrevVecV[0]);
3556 if (!ShouldBeCallOrBinInst)
3559 if (!IsFirstCallOrBinInst &&
3560 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3563 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3565 IsFirstCallOrBinInst =
false;
3573 switch (*CommonBinOp) {
3574 case BinaryOperator::Add:
3575 case BinaryOperator::Mul:
3576 case BinaryOperator::Or:
3577 case BinaryOperator::And:
3578 case BinaryOperator::Xor: {
3588 ShouldBeCallOrBinInst ^= 1;
3595 InstWorklist.push(PrevVecV[1]);
3596 InstWorklist.push(PrevVecV[0]);
3600 if (ShouldBeCallOrBinInst ||
3601 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3604 if (SVInst != PrevVecV[1])
3607 ArrayRef<int> CurMask;
3613 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3614 if (Mask < ShuffleMaskHalf &&
3615 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3617 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3622 ShuffleMaskHalf *= 2;
3623 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3624 ExpectedParityMask >>= 1;
3627 SVInst->getType(), SVInst->getType(),
3631 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3634 ShouldBeCallOrBinInst ^= 1;
3641 if (ShouldBeCallOrBinInst)
3644 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3646 Value *FinalVecV = PrevVecV[0];
3658 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3661 if (NewCost >= OrigCost)
3664 auto *ReducedResult =
3666 replaceValue(
I, *ReducedResult);
3675bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3680 bool TruncOnly =
false;
3683 case Intrinsic::vector_reduce_add:
3684 case Intrinsic::vector_reduce_mul:
3687 case Intrinsic::vector_reduce_and:
3688 case Intrinsic::vector_reduce_or:
3689 case Intrinsic::vector_reduce_xor:
3696 Value *ReductionSrc =
I.getOperand(0);
3708 Type *ResultTy =
I.getType();
3711 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3721 if (OldCost <= NewCost || !NewCost.
isValid())
3725 II->getIntrinsicID(), {Src});
3727 replaceValue(
I, *NewCast);
3736 constexpr unsigned MaxVisited = 32;
3739 bool FoundReduction =
false;
3742 while (!WorkList.
empty()) {
3744 for (
User *U :
I->users()) {
3746 if (!UI || !Visited.
insert(UI).second)
3748 if (Visited.
size() > MaxVisited)
3754 switch (
II->getIntrinsicID()) {
3755 case Intrinsic::vector_reduce_add:
3756 case Intrinsic::vector_reduce_mul:
3757 case Intrinsic::vector_reduce_and:
3758 case Intrinsic::vector_reduce_or:
3759 case Intrinsic::vector_reduce_xor:
3760 case Intrinsic::vector_reduce_smin:
3761 case Intrinsic::vector_reduce_smax:
3762 case Intrinsic::vector_reduce_umin:
3763 case Intrinsic::vector_reduce_umax:
3764 FoundReduction =
true;
3777 return FoundReduction;
3790bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
3795 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3803 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3805 if (!
I ||
I->getOperand(0)->getType() != VT)
3807 return any_of(
I->users(), [&](User *U) {
3808 return U != Op0 && U != Op1 &&
3809 !(isa<ShuffleVectorInst>(U) &&
3810 (InputShuffles.contains(cast<Instruction>(U)) ||
3811 isInstructionTriviallyDead(cast<Instruction>(U))));
3814 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3815 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3823 for (
auto *U :
I->users()) {
3825 if (!SV || SV->getType() != VT)
3827 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3828 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3835 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3839 if (FromReduction && Shuffles.
size() > 1)
3844 if (!FromReduction) {
3845 for (ShuffleVectorInst *SV : Shuffles) {
3846 for (
auto *U : SV->users()) {
3849 Shuffles.push_back(SSV);
3861 int MaxV1Elt = 0, MaxV2Elt = 0;
3862 unsigned NumElts = VT->getNumElements();
3863 for (ShuffleVectorInst *SVN : Shuffles) {
3864 SmallVector<int>
Mask;
3865 SVN->getShuffleMask(Mask);
3869 Value *SVOp0 = SVN->getOperand(0);
3870 Value *SVOp1 = SVN->getOperand(1);
3875 for (
int &Elem : Mask) {
3881 if (SVOp0 == Op1 && SVOp1 == Op0) {
3885 if (SVOp0 != Op0 || SVOp1 != Op1)
3891 SmallVector<int> ReconstructMask;
3892 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3895 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3896 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3897 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3898 return Mask[
I] ==
A.first;
3907 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3908 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3909 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3923 sort(ReconstructMask);
3924 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3932 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3933 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3945 if (InputShuffles.contains(SSV))
3947 return SV->getMaskValue(M);
3955 std::pair<int, int>
Y) {
3956 int MXA = GetBaseMaskValue(
A,
X.first);
3957 int MYA = GetBaseMaskValue(
A,
Y.first);
3960 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3961 return SortBase(SVI0A,
A,
B);
3963 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3964 return SortBase(SVI1A,
A,
B);
3969 for (
const auto &Mask : OrigReconstructMasks) {
3970 SmallVector<int> ReconstructMask;
3971 for (
int M : Mask) {
3973 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3974 assert(It !=
V.end() &&
"Expected all entries in Mask");
3975 return std::distance(
V.begin(), It);
3979 else if (M <
static_cast<int>(NumElts)) {
3980 ReconstructMask.
push_back(FindIndex(V1, M));
3982 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3985 ReconstructMasks.
push_back(std::move(ReconstructMask));
3990 SmallVector<int> V1A, V1B, V2A, V2B;
3991 for (
unsigned I = 0;
I < V1.
size();
I++) {
3992 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3993 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3995 for (
unsigned I = 0;
I < V2.
size();
I++) {
3996 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3997 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3999 while (V1A.
size() < NumElts) {
4003 while (V2A.
size() < NumElts) {
4015 VT, VT, SV->getShuffleMask(),
CostKind);
4022 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
4023 unsigned MaxVectorSize =
4025 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
4026 if (MaxElementsInVector == 0)
4035 std::set<SmallVector<int, 4>> UniqueShuffles;
4040 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
4041 if (NumFullVectors < 2)
4042 return C + ShuffleCost;
4043 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
4044 unsigned NumUniqueGroups = 0;
4045 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
4048 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
4049 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
4050 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
4051 if (UniqueShuffles.insert(SubShuffle).second)
4052 NumUniqueGroups += 1;
4054 return C + ShuffleCost * NumUniqueGroups / NumGroups;
4060 SmallVector<int, 16>
Mask;
4061 SV->getShuffleMask(Mask);
4062 return AddShuffleMaskAdjustedCost(
C, Mask);
4065 auto AllShufflesHaveSameOperands =
4066 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
4067 if (InputShuffles.size() < 2)
4069 ShuffleVectorInst *FirstSV =
4076 std::next(InputShuffles.begin()), InputShuffles.end(),
4077 [&](Instruction *
I) {
4078 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
4079 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
4088 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
4090 if (AllShufflesHaveSameOperands(InputShuffles)) {
4091 UniqueShuffles.clear();
4092 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4095 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4101 FixedVectorType *Op0SmallVT =
4103 FixedVectorType *Op1SmallVT =
4108 UniqueShuffles.clear();
4109 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
4111 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
4113 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
4116 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
4118 <<
" vs CostAfter: " << CostAfter <<
"\n");
4119 if (CostBefore < CostAfter ||
4130 if (InputShuffles.contains(SSV))
4132 return SV->getOperand(
Op);
4136 GetShuffleOperand(SVI0A, 1), V1A);
4139 GetShuffleOperand(SVI0B, 1), V1B);
4142 GetShuffleOperand(SVI1A, 1), V2A);
4145 GetShuffleOperand(SVI1B, 1), V2B);
4150 I->copyIRFlags(Op0,
true);
4155 I->copyIRFlags(Op1,
true);
4157 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4160 replaceValue(*Shuffles[S], *NSV,
false);
4163 Worklist.pushValue(NSV0A);
4164 Worklist.pushValue(NSV0B);
4165 Worklist.pushValue(NSV1A);
4166 Worklist.pushValue(NSV1B);
4176bool VectorCombine::shrinkType(Instruction &
I) {
4177 Value *ZExted, *OtherOperand;
4183 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4187 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4189 if (
I.getOpcode() == Instruction::LShr) {
4206 Instruction::ZExt, BigTy, SmallTy,
4207 TargetTransformInfo::CastContextHint::None,
CostKind);
4212 for (User *U : ZExtOperand->
users()) {
4219 ShrinkCost += ZExtCost;
4234 ShrinkCost += ZExtCost;
4241 Instruction::Trunc, SmallTy, BigTy,
4242 TargetTransformInfo::CastContextHint::None,
CostKind);
4247 if (ShrinkCost > CurrentCost)
4251 Value *Op0 = ZExted;
4254 if (
I.getOperand(0) == OtherOperand)
4261 replaceValue(
I, *NewZExtr);
4267bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4268 Value *DstVec, *SrcVec;
4269 uint64_t ExtIdx, InsIdx;
4279 if (!DstVecTy || !SrcVecTy ||
4280 SrcVecTy->getElementType() != DstVecTy->getElementType())
4283 unsigned NumDstElts = DstVecTy->getNumElements();
4284 unsigned NumSrcElts = SrcVecTy->getNumElements();
4285 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4292 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4293 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4295 if (NeedDstSrcSwap) {
4297 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4300 Mask[InsIdx] = ExtIdx;
4304 std::iota(
Mask.begin(),
Mask.end(), 0);
4305 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4306 Mask[InsIdx] = NumDstElts;
4308 Mask[InsIdx] = ExtIdx + NumDstElts;
4321 SmallVector<int> ExtToVecMask;
4322 if (!NeedExpOrNarrow) {
4327 nullptr, {DstVec, SrcVec});
4333 if (IsExtIdxInBounds)
4334 ExtToVecMask[ExtIdx] = ExtIdx;
4336 ExtToVecMask[0] = ExtIdx;
4339 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4343 if (!
Ext->hasOneUse())
4346 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4347 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4350 if (OldCost < NewCost)
4353 if (NeedExpOrNarrow) {
4354 if (!NeedDstSrcSwap)
4367 replaceValue(
I, *Shuf);
4376bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4377 const APInt *SplatVal0, *SplatVal1;
4387 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4388 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4397 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4398 << *
I.getType() <<
" is too high.\n");
4402 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4403 NewSplatVal <<= Width;
4404 NewSplatVal |= SplatVal0->
zext(Width * 2);
4406 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4414bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4416 if (!OldLoad || !OldLoad->isSimple())
4423 unsigned const OldNumElements = OldLoadTy->getNumElements();
4429 using IndexRange = std::pair<int, int>;
4430 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4431 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4432 for (llvm::Use &Use :
I.uses()) {
4434 User *Shuffle =
Use.getUser();
4439 return std::nullopt;
4446 for (
int Index : Mask) {
4447 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4448 OutputRange.first = std::min(Index, OutputRange.first);
4449 OutputRange.second = std::max(Index, OutputRange.second);
4454 if (OutputRange.second < OutputRange.first)
4455 return std::nullopt;
4461 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4462 unsigned const NewNumElements = Indices->second + 1u;
4466 if (NewNumElements < OldNumElements) {
4471 Type *ElemTy = OldLoadTy->getElementType();
4473 Value *PtrOp = OldLoad->getPointerOperand();
4476 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4477 OldLoad->getPointerAddressSpace(),
CostKind);
4480 OldLoad->getPointerAddressSpace(),
CostKind);
4482 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4484 unsigned const MaxIndex = NewNumElements * 2u;
4486 for (llvm::Use &Use :
I.uses()) {
4488 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4494 for (
int Index : OldMask) {
4495 if (Index >=
static_cast<int>(MaxIndex))
4509 dbgs() <<
"Found a load used only by shufflevector instructions: "
4510 <<
I <<
"\n OldCost: " << OldCost
4511 <<
" vs NewCost: " << NewCost <<
"\n");
4513 if (OldCost < NewCost || !NewCost.
isValid())
4519 NewLoad->copyMetadata(
I);
4522 for (UseEntry &Use : NewUses) {
4523 ShuffleVectorInst *Shuffle =
Use.first;
4524 std::vector<int> &NewMask =
Use.second;
4531 replaceValue(*Shuffle, *NewShuffle,
false);
4544bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4546 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4550 ArrayRef<int> Mask0;
4551 ArrayRef<int> Mask1;
4564 auto const InputNumElements = InputVT->getNumElements();
4566 if (InputNumElements >= ResultVT->getNumElements())
4571 SmallVector<int, 16> NewMask;
4574 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4575 if (
M0 >= 0 &&
M1 >= 0)
4577 else if (
M0 == -1 &&
M1 == -1)
4590 int MaskOffset = NewMask[0
u];
4591 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
4594 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4608 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4611 if (NewCost > OldCost)
4623 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4625 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4631 replaceValue(*Phi, *NewShuf1);
4637bool VectorCombine::run() {
4651 auto Opcode =
I.getOpcode();
4659 if (IsFixedVectorType) {
4661 case Instruction::InsertElement:
4662 if (vectorizeLoadInsert(
I))
4665 case Instruction::ShuffleVector:
4666 if (widenSubvectorLoad(
I))
4677 if (scalarizeOpOrCmp(
I))
4679 if (scalarizeLoad(
I))
4681 if (scalarizeExtExtract(
I))
4683 if (scalarizeVPIntrinsic(
I))
4685 if (foldInterleaveIntrinsics(
I))
4689 if (Opcode == Instruction::Store)
4690 if (foldSingleElementStore(
I))
4694 if (TryEarlyFoldsOnly)
4701 if (IsFixedVectorType) {
4703 case Instruction::InsertElement:
4704 if (foldInsExtFNeg(
I))
4706 if (foldInsExtBinop(
I))
4708 if (foldInsExtVectorToShuffle(
I))
4711 case Instruction::ShuffleVector:
4712 if (foldPermuteOfBinops(
I))
4714 if (foldShuffleOfBinops(
I))
4716 if (foldShuffleOfSelects(
I))
4718 if (foldShuffleOfCastops(
I))
4720 if (foldShuffleOfShuffles(
I))
4722 if (foldShuffleOfIntrinsics(
I))
4724 if (foldSelectShuffle(
I))
4726 if (foldShuffleToIdentity(
I))
4729 case Instruction::Load:
4730 if (shrinkLoadForShuffles(
I))
4733 case Instruction::BitCast:
4734 if (foldBitcastShuffle(
I))
4737 case Instruction::And:
4738 case Instruction::Or:
4739 case Instruction::Xor:
4740 if (foldBitOpOfCastops(
I))
4742 if (foldBitOpOfCastConstant(
I))
4745 case Instruction::PHI:
4746 if (shrinkPhiOfShuffles(
I))
4756 case Instruction::Call:
4757 if (foldShuffleFromReductions(
I))
4759 if (foldCastFromReductions(
I))
4762 case Instruction::ExtractElement:
4763 if (foldShuffleChainsToReduce(
I))
4766 case Instruction::ICmp:
4767 case Instruction::FCmp:
4768 if (foldExtractExtract(
I))
4771 case Instruction::Or:
4772 if (foldConcatOfBoolMasks(
I))
4777 if (foldExtractExtract(
I))
4779 if (foldExtractedCmps(
I))
4781 if (foldBinopOfReductions(
I))
4790 bool MadeChange =
false;
4791 for (BasicBlock &BB :
F) {
4803 if (!
I->isDebugOrPseudoInst())
4804 MadeChange |= FoldInst(*
I);
4811 while (!Worklist.isEmpty()) {
4821 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.