43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoad(Instruction &
I);
133 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
134 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
135 bool scalarizeExtExtract(Instruction &
I);
136 bool foldConcatOfBoolMasks(Instruction &
I);
137 bool foldPermuteOfBinops(Instruction &
I);
138 bool foldShuffleOfBinops(Instruction &
I);
139 bool foldShuffleOfSelects(Instruction &
I);
140 bool foldShuffleOfCastops(Instruction &
I);
141 bool foldShuffleOfShuffles(Instruction &
I);
142 bool foldPermuteOfIntrinsic(Instruction &
I);
143 bool foldShuffleOfIntrinsics(Instruction &
I);
144 bool foldShuffleToIdentity(Instruction &
I);
145 bool foldShuffleFromReductions(Instruction &
I);
146 bool foldShuffleChainsToReduce(Instruction &
I);
147 bool foldCastFromReductions(Instruction &
I);
148 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
149 bool foldInterleaveIntrinsics(Instruction &
I);
150 bool shrinkType(Instruction &
I);
151 bool shrinkLoadForShuffles(Instruction &
I);
152 bool shrinkPhiOfShuffles(Instruction &
I);
154 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
160 Worklist.pushUsersToWorkList(*NewI);
161 Worklist.pushValue(NewI);
178 SmallPtrSet<Value *, 4> Visited;
183 OpI,
nullptr,
nullptr, [&](
Value *V) {
188 NextInst = NextInst->getNextNode();
193 Worklist.pushUsersToWorkList(*OpI);
194 Worklist.pushValue(OpI);
214 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
215 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
221 Type *ScalarTy = Load->getType()->getScalarType();
223 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
224 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
231bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
257 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
260 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
261 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
262 unsigned OffsetEltIndex = 0;
270 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
271 APInt
Offset(OffsetBitWidth, 0);
281 uint64_t ScalarSizeInBytes = ScalarSize / 8;
282 if (
Offset.urem(ScalarSizeInBytes) != 0)
286 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
287 if (OffsetEltIndex >= MinVecNumElts)
304 unsigned AS =
Load->getPointerAddressSpace();
323 unsigned OutputNumElts = Ty->getNumElements();
325 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
326 Mask[0] = OffsetEltIndex;
333 if (OldCost < NewCost || !NewCost.
isValid())
344 replaceValue(
I, *VecLd);
352bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
355 if (!Shuf->isIdentityWithPadding())
361 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
362 return M >= (int)(NumOpElts);
373 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
381 unsigned AS =
Load->getPointerAddressSpace();
396 if (OldCost < NewCost || !NewCost.
isValid())
403 replaceValue(
I, *VecLd);
410ExtractElementInst *VectorCombine::getShuffleExtract(
411 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
415 assert(Index0C && Index1C &&
"Expected constant extract indexes");
417 unsigned Index0 = Index0C->getZExtValue();
418 unsigned Index1 = Index1C->getZExtValue();
421 if (Index0 == Index1)
445 if (PreferredExtractIndex == Index0)
447 if (PreferredExtractIndex == Index1)
451 return Index0 > Index1 ? Ext0 : Ext1;
459bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
460 ExtractElementInst *Ext1,
461 const Instruction &
I,
462 ExtractElementInst *&ConvertToShuffle,
463 unsigned PreferredExtractIndex) {
466 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
468 unsigned Opcode =
I.getOpcode();
481 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
482 "Expected a compare");
492 unsigned Ext0Index = Ext0IndexC->getZExtValue();
493 unsigned Ext1Index = Ext1IndexC->getZExtValue();
507 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
508 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
509 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
514 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
519 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
521 OldCost = CheapExtractCost + ScalarOpCost;
522 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
526 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
527 NewCost = VectorOpCost + CheapExtractCost +
532 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
533 if (ConvertToShuffle) {
545 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
547 ShuffleMask[BestInsIndex] = BestExtIndex;
549 VecTy, VecTy, ShuffleMask,
CostKind, 0,
550 nullptr, {ConvertToShuffle});
553 VecTy, VecTy, {},
CostKind, 0,
nullptr,
561 return OldCost < NewCost;
573 ShufMask[NewIndex] = OldIndex;
574 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
626 V1,
"foldExtExtBinop");
631 VecBOInst->copyIRFlags(&
I);
637bool VectorCombine::foldExtractExtract(Instruction &
I) {
668 ExtractElementInst *ExtractToChange;
669 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
675 if (ExtractToChange) {
676 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
681 if (ExtractToChange == Ext0)
690 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
691 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
694 replaceValue(
I, *NewExt);
700bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
703 uint64_t ExtIdx, InsIdx;
718 auto *DstVecScalarTy = DstVecTy->getScalarType();
720 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
725 unsigned NumDstElts = DstVecTy->getNumElements();
726 unsigned NumSrcElts = SrcVecTy->getNumElements();
727 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
733 SmallVector<int>
Mask(NumDstElts);
734 std::iota(
Mask.begin(),
Mask.end(), 0);
735 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
751 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
754 SmallVector<int> SrcMask;
757 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
759 DstVecTy, SrcVecTy, SrcMask,
CostKind);
763 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
765 if (NewCost > OldCost)
768 Value *NewShuf, *LenChgShuf =
nullptr;
782 replaceValue(
I, *NewShuf);
788bool VectorCombine::foldInsExtBinop(Instruction &
I) {
789 BinaryOperator *VecBinOp, *SclBinOp;
821 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
823 if (NewCost > OldCost)
834 NewInst->copyIRFlags(VecBinOp);
835 NewInst->andIRFlags(SclBinOp);
840 replaceValue(
I, *NewBO);
846bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
849 if (!BinOp || !BinOp->isBitwiseLogicOp())
855 if (!LHSCast || !RHSCast) {
856 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
862 if (CastOpcode != RHSCast->getOpcode())
866 switch (CastOpcode) {
867 case Instruction::BitCast:
868 case Instruction::Trunc:
869 case Instruction::SExt:
870 case Instruction::ZExt:
876 Value *LHSSrc = LHSCast->getOperand(0);
877 Value *RHSSrc = RHSCast->getOperand(0);
883 auto *SrcTy = LHSSrc->
getType();
884 auto *DstTy =
I.getType();
887 if (CastOpcode != Instruction::BitCast &&
892 if (!SrcTy->getScalarType()->isIntegerTy() ||
893 !DstTy->getScalarType()->isIntegerTy())
908 LHSCastCost + RHSCastCost;
919 if (!LHSCast->hasOneUse())
920 NewCost += LHSCastCost;
921 if (!RHSCast->hasOneUse())
922 NewCost += RHSCastCost;
925 <<
" NewCost=" << NewCost <<
"\n");
927 if (NewCost > OldCost)
932 BinOp->getName() +
".inner");
934 NewBinOp->copyIRFlags(BinOp);
948 replaceValue(
I, *Result);
957bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
973 switch (CastOpcode) {
974 case Instruction::BitCast:
975 case Instruction::ZExt:
976 case Instruction::SExt:
977 case Instruction::Trunc:
983 Value *LHSSrc = LHSCast->getOperand(0);
985 auto *SrcTy = LHSSrc->
getType();
986 auto *DstTy =
I.getType();
989 if (CastOpcode != Instruction::BitCast &&
994 if (!SrcTy->getScalarType()->isIntegerTy() ||
995 !DstTy->getScalarType()->isIntegerTy())
999 PreservedCastFlags RHSFlags;
1024 if (!LHSCast->hasOneUse())
1025 NewCost += LHSCastCost;
1027 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1028 <<
" NewCost=" << NewCost <<
"\n");
1030 if (NewCost > OldCost)
1035 LHSSrc, InvC,
I.getName() +
".inner");
1037 NewBinOp->copyIRFlags(&
I);
1057 replaceValue(
I, *Result);
1064bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1078 if (!DestTy || !SrcTy)
1081 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1082 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1083 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1093 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1094 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1098 SmallVector<int, 16> NewMask;
1099 if (DestEltSize <= SrcEltSize) {
1102 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1103 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1108 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1109 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1116 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1117 auto *NewShuffleTy =
1119 auto *OldShuffleTy =
1121 unsigned NumOps = IsUnary ? 1 : 2;
1131 TargetTransformInfo::CastContextHint::None,
1136 TargetTransformInfo::CastContextHint::None,
1139 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1140 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1142 if (NewCost > OldCost || !NewCost.
isValid())
1150 replaceValue(
I, *Shuf);
1157bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1171 if (!ScalarOp0 || !ScalarOp1)
1179 auto IsAllTrueMask = [](
Value *MaskVal) {
1182 return ConstValue->isAllOnesValue();
1196 SmallVector<int>
Mask;
1198 Mask.resize(FVTy->getNumElements(), 0);
1207 Args.push_back(
V->getType());
1208 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1213 std::optional<unsigned> FunctionalOpcode =
1215 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1216 if (!FunctionalOpcode) {
1225 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1235 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1237 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1240 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1243 if (OldCost < NewCost || !NewCost.
isValid())
1254 bool SafeToSpeculate;
1260 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1261 if (!SafeToSpeculate &&
1268 {ScalarOp0, ScalarOp1})
1270 ScalarOp0, ScalarOp1);
1279bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1284 if (!UO && !BO && !CI && !
II)
1292 if (Arg->getType() !=
II->getType() &&
1302 for (User *U :
I.users())
1309 std::optional<uint64_t>
Index;
1311 auto Ops =
II ?
II->args() :
I.operands();
1315 uint64_t InsIdx = 0;
1320 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1326 else if (InsIdx != *Index)
1343 if (!
Index.has_value())
1347 Type *ScalarTy = VecTy->getScalarType();
1348 assert(VecTy->isVectorTy() &&
1351 "Unexpected types for insert element into binop or cmp");
1353 unsigned Opcode =
I.getOpcode();
1361 }
else if (UO || BO) {
1365 IntrinsicCostAttributes ScalarICA(
1366 II->getIntrinsicID(), ScalarTy,
1369 IntrinsicCostAttributes VectorICA(
1370 II->getIntrinsicID(), VecTy,
1377 Value *NewVecC =
nullptr;
1379 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1382 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1384 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1398 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1400 II->getIntrinsicID(), Idx, &
TTI)))
1403 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1404 OldCost += InsertCost;
1405 NewCost += !
Op->hasOneUse() * InsertCost;
1409 if (OldCost < NewCost || !NewCost.
isValid())
1419 ++NumScalarIntrinsic;
1429 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1435 Scalar->setName(
I.getName() +
".scalar");
1440 ScalarInst->copyIRFlags(&
I);
1443 replaceValue(
I, *Insert);
1450bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1455 if (!BI || !
I.getType()->isIntegerTy(1))
1460 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1463 CmpPredicate
P0,
P1;
1475 uint64_t Index0, Index1;
1482 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1485 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1486 "Unknown ExtractElementInst");
1491 unsigned CmpOpcode =
1506 Ext0Cost + Ext1Cost + CmpCost * 2 +
1512 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1513 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1518 ShufMask[CheapIndex] = ExpensiveIndex;
1523 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1524 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1529 if (OldCost < NewCost || !NewCost.
isValid())
1539 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1540 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1543 replaceValue(
I, *NewExt);
1556 unsigned ReductionOpc =
1562 CostBeforeReduction =
1563 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1565 CostAfterReduction =
1566 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1570 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1576 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1583 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1586 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1588 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1591 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1592 CostAfterReduction =
TTI.getMulAccReductionCost(
1593 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1596 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1600bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1603 if (BinOpOpc == Instruction::Sub)
1604 ReductionIID = Intrinsic::vector_reduce_add;
1608 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1613 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1614 return II->getArgOperand(0);
1618 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1621 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1630 unsigned ReductionOpc =
1643 CostOfRedOperand0 + CostOfRedOperand1 +
1646 if (NewCost >= OldCost || !NewCost.
isValid())
1650 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1653 if (BinOpOpc == Instruction::Or)
1654 VectorBO = Builder.
CreateOr(V0, V1,
"",
1660 replaceValue(
I, *Rdx);
1668 unsigned NumScanned = 0;
1669 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1678class ScalarizationResult {
1679 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1684 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1685 : Status(Status), ToFreeze(ToFreeze) {}
1688 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1689 ~ScalarizationResult() {
1690 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1693 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1694 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1695 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1696 return {StatusTy::SafeWithFreeze, ToFreeze};
1700 bool isSafe()
const {
return Status == StatusTy::Safe; }
1702 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1705 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1710 Status = StatusTy::Unsafe;
1714 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1715 assert(isSafeWithFreeze() &&
1716 "should only be used when freezing is required");
1718 "UserI must be a user of ToFreeze");
1719 IRBuilder<>::InsertPointGuard Guard(Builder);
1724 if (
U.get() == ToFreeze)
1741 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1745 if (
C->getValue().ult(NumElements))
1746 return ScalarizationResult::safe();
1747 return ScalarizationResult::unsafe();
1752 return ScalarizationResult::unsafe();
1754 APInt Zero(IntWidth, 0);
1755 APInt MaxElts(IntWidth, NumElements);
1761 true, &AC, CtxI, &DT)))
1762 return ScalarizationResult::safe();
1763 return ScalarizationResult::unsafe();
1776 if (ValidIndices.
contains(IdxRange))
1777 return ScalarizationResult::safeWithFreeze(IdxBase);
1778 return ScalarizationResult::unsafe();
1790 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1802bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1814 if (!
match(
SI->getValueOperand(),
1821 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1824 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1825 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1826 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1830 if (ScalarizableIdx.isUnsafe() ||
1837 Worklist.
push(Load);
1839 if (ScalarizableIdx.isSafeWithFreeze())
1842 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1843 {ConstantInt::get(Idx->getType(), 0), Idx});
1847 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1850 replaceValue(
I, *NSI);
1860bool VectorCombine::scalarizeLoad(Instruction &
I) {
1867 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1870 bool AllExtracts =
true;
1871 bool AllBitcasts =
true;
1873 unsigned NumInstChecked = 0;
1878 for (User *U : LI->users()) {
1880 if (!UI || UI->getParent() != LI->getParent())
1885 if (UI->use_empty())
1889 AllExtracts =
false;
1891 AllBitcasts =
false;
1895 for (Instruction &
I :
1896 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1903 LastCheckedInst = UI;
1908 return scalarizeLoadExtract(LI, VecTy, Ptr);
1910 return scalarizeLoadBitcast(LI, VecTy, Ptr);
1915bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
1920 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1923 for (
auto &Pair : NeedFreeze)
1924 Pair.second.discard();
1932 for (User *U : LI->
users()) {
1937 if (ScalarIdx.isUnsafe())
1939 if (ScalarIdx.isSafeWithFreeze()) {
1940 NeedFreeze.try_emplace(UI, ScalarIdx);
1941 ScalarIdx.discard();
1947 Index ?
Index->getZExtValue() : -1);
1955 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
1956 <<
"\n LoadExtractCost: " << OriginalCost
1957 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1959 if (ScalarizedCost >= OriginalCost)
1966 Type *ElemType = VecTy->getElementType();
1969 for (User *U : LI->
users()) {
1971 Value *Idx = EI->getIndexOperand();
1974 auto It = NeedFreeze.find(EI);
1975 if (It != NeedFreeze.end())
1982 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1984 Align ScalarOpAlignment =
1986 NewLoad->setAlignment(ScalarOpAlignment);
1989 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1994 replaceValue(*EI, *NewLoad,
false);
1997 FailureGuard.release();
2002bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2008 Type *TargetScalarType =
nullptr;
2009 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2011 for (User *U : LI->
users()) {
2014 Type *DestTy = BC->getDestTy();
2018 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2019 if (DestBitWidth != VecBitWidth)
2023 if (!TargetScalarType)
2024 TargetScalarType = DestTy;
2025 else if (TargetScalarType != DestTy)
2033 if (!TargetScalarType)
2041 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2042 <<
"\n OriginalCost: " << OriginalCost
2043 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2045 if (ScalarizedCost >= OriginalCost)
2056 ScalarLoad->copyMetadata(*LI);
2059 for (User *U : LI->
users()) {
2061 replaceValue(*BC, *ScalarLoad,
false);
2067bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2082 Type *ScalarDstTy = DstTy->getElementType();
2083 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2089 unsigned ExtCnt = 0;
2090 bool ExtLane0 =
false;
2091 for (User *U : Ext->users()) {
2105 Instruction::And, ScalarDstTy,
CostKind,
2108 (ExtCnt - ExtLane0) *
2110 Instruction::LShr, ScalarDstTy,
CostKind,
2113 if (ScalarCost > VectorCost)
2116 Value *ScalarV = Ext->getOperand(0);
2123 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2124 bool AllExtractsTriggerUB =
true;
2125 ExtractElementInst *LastExtract =
nullptr;
2127 for (User *U : Ext->users()) {
2130 AllExtractsTriggerUB =
false;
2134 if (!LastExtract || LastExtract->
comesBefore(Extract))
2135 LastExtract = Extract;
2137 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2138 !AllExtractsTriggerUB ||
2146 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2147 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2148 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2150 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2151 for (User *U : Ext->users()) {
2157 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2158 : (Idx * SrcEltSizeInBits);
2161 U->replaceAllUsesWith(
And);
2169bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2170 Type *Ty =
I.getType();
2175 if (
DL->isBigEndian())
2186 uint64_t ShAmtX = 0;
2194 uint64_t ShAmtY = 0;
2202 if (ShAmtX > ShAmtY) {
2210 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2211 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2216 MaskTy->getNumElements() != ShAmtDiff ||
2217 MaskTy->getNumElements() > (
BitWidth / 2))
2222 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2223 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2226 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2243 if (Ty != ConcatIntTy)
2249 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2250 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2253 if (NewCost > OldCost)
2263 if (Ty != ConcatIntTy) {
2273 replaceValue(
I, *Result);
2279bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2280 BinaryOperator *BinOp;
2281 ArrayRef<int> OuterMask;
2290 Value *Op00, *Op01, *Op10, *Op11;
2291 ArrayRef<int> Mask0, Mask1;
2298 if (!Match0 && !Match1)
2311 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2314 unsigned NumSrcElts = BinOpTy->getNumElements();
2319 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2323 SmallVector<int> NewMask0, NewMask1;
2324 for (
int M : OuterMask) {
2325 if (M < 0 || M >= (
int)NumSrcElts) {
2329 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2330 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2334 unsigned NumOpElts = Op0Ty->getNumElements();
2335 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2336 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2338 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2339 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2346 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2362 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2366 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2368 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2369 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2373 if (NewCost > OldCost)
2384 NewInst->copyIRFlags(BinOp);
2388 replaceValue(
I, *NewBO);
2394bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2395 ArrayRef<int> OldMask;
2402 if (
LHS->getOpcode() !=
RHS->getOpcode())
2406 bool IsCommutative =
false;
2415 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2426 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2429 unsigned NumSrcElts = BinOpTy->getNumElements();
2432 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2435 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2436 if (M >= (
int)NumSrcElts)
2440 SmallVector<int> NewMask0(OldMask);
2448 SmallVector<int> NewMask1(OldMask);
2471 ArrayRef<int> InnerMask;
2473 m_Mask(InnerMask)))) &&
2476 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2488 bool ReducedInstCount =
false;
2489 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2490 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2491 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2492 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2494 auto *ShuffleCmpTy =
2511 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2518 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2526 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2530 NewInst->copyIRFlags(
LHS);
2531 NewInst->andIRFlags(
RHS);
2536 replaceValue(
I, *NewBO);
2543bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2545 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2554 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2560 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2561 ((SI0FOp !=
nullptr) &&
2562 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2568 auto SelOp = Instruction::Select;
2575 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2579 Mask,
CostKind, 0,
nullptr, {C1, C2});
2585 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2590 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2592 if (NewCost > OldCost)
2601 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2602 SI0FOp->getFastMathFlags());
2604 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2609 replaceValue(
I, *NewSel);
2615bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2617 ArrayRef<int> OldMask;
2626 if (!C0 || (IsBinaryShuffle && !C1))
2633 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2636 if (IsBinaryShuffle) {
2637 if (C0->getSrcTy() != C1->getSrcTy())
2640 if (Opcode != C1->getOpcode()) {
2642 Opcode = Instruction::SExt;
2651 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2654 unsigned NumSrcElts = CastSrcTy->getNumElements();
2655 unsigned NumDstElts = CastDstTy->getNumElements();
2656 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2657 "Only bitcasts expected to alter src/dst element counts");
2661 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2662 (NumDstElts % NumSrcElts) != 0)
2665 SmallVector<int, 16> NewMask;
2666 if (NumSrcElts >= NumDstElts) {
2669 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2670 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2675 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2676 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2681 auto *NewShuffleDstTy =
2690 if (IsBinaryShuffle)
2705 if (IsBinaryShuffle) {
2715 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2717 if (NewCost > OldCost)
2721 if (IsBinaryShuffle)
2731 NewInst->copyIRFlags(C0);
2732 if (IsBinaryShuffle)
2733 NewInst->andIRFlags(C1);
2737 replaceValue(
I, *Cast);
2747bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2748 ArrayRef<int> OuterMask;
2749 Value *OuterV0, *OuterV1;
2754 ArrayRef<int> InnerMask0, InnerMask1;
2755 Value *X0, *X1, *Y0, *Y1;
2760 if (!Match0 && !Match1)
2765 SmallVector<int, 16> PoisonMask1;
2770 InnerMask1 = PoisonMask1;
2774 X0 = Match0 ? X0 : OuterV0;
2775 Y0 = Match0 ? Y0 : OuterV0;
2776 X1 = Match1 ? X1 : OuterV1;
2777 Y1 = Match1 ? Y1 : OuterV1;
2781 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2785 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2786 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2791 SmallVector<int, 16> NewMask(OuterMask);
2792 Value *NewX =
nullptr, *NewY =
nullptr;
2793 for (
int &M : NewMask) {
2794 Value *Src =
nullptr;
2795 if (0 <= M && M < (
int)NumImmElts) {
2799 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2800 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2802 }
else if (M >= (
int)NumImmElts) {
2807 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2808 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2812 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2821 if (!NewX || NewX == Src) {
2825 if (!NewY || NewY == Src) {
2841 replaceValue(
I, *NewX);
2858 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2864 nullptr, {NewX, NewY});
2866 NewCost += InnerCost0;
2868 NewCost += InnerCost1;
2871 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2873 if (NewCost > OldCost)
2877 replaceValue(
I, *Shuf);
2883bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
2885 ArrayRef<int> OldMask;
2896 if (IID != II1->getIntrinsicID())
2901 if (!ShuffleDstTy || !II0Ty)
2907 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2909 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2916 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2920 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
2922 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2926 ShuffleDstTy->getNumElements());
2930 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
2933 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2937 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2940 if (NewCost > OldCost)
2944 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2949 II1->getArgOperand(
I), OldMask);
2957 NewInst->copyIRFlags(II0);
2958 NewInst->andIRFlags(II1);
2961 replaceValue(
I, *NewIntrinsic);
2967bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
2979 if (!ShuffleDstTy || !IntrinsicSrcTy)
2983 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
2984 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2995 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
2999 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3001 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3005 ShuffleDstTy->getNumElements());
3008 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3009 {II0->getArgOperand(
I)});
3012 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3015 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3016 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3018 if (NewCost > OldCost)
3023 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3038 replaceValue(
I, *NewIntrinsic);
3048 int M = SV->getMaskValue(Lane);
3051 if (
static_cast<unsigned>(M) < NumElts) {
3052 U = &SV->getOperandUse(0);
3055 U = &SV->getOperandUse(1);
3066 auto [U, Lane] = IL;
3080 unsigned NumElts = Ty->getNumElements();
3081 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3087 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3093 unsigned NumSlices = Item.
size() / NumElts;
3098 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3099 Use *SliceV = Item[Slice * NumElts].first;
3100 if (!SliceV || SliceV->get()->
getType() != Ty)
3102 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3103 auto [V, Lane] = Item[Slice * NumElts + Elt];
3104 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
3117 auto [FrontU, FrontLane] = Item.
front();
3119 if (IdentityLeafs.
contains(FrontU)) {
3120 return FrontU->get();
3124 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3126 if (ConcatLeafs.
contains(FrontU)) {
3130 for (
unsigned S = 0; S < Values.
size(); ++S)
3131 Values[S] = Item[S * NumElts].first->get();
3133 while (Values.
size() > 1) {
3136 std::iota(Mask.begin(), Mask.end(), 0);
3138 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3140 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3148 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3150 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3153 Ops[Idx] =
II->getOperand(Idx);
3157 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3162 for (
const auto &Lane : Item)
3175 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3185 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3190 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3204bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3206 if (!Ty ||
I.use_empty())
3210 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3215 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3216 unsigned NumVisited = 0;
3218 while (!Worklist.
empty()) {
3223 auto [FrontU, FrontLane] = Item.
front();
3231 return X->getType() ==
Y->getType() &&
3236 if (FrontLane == 0 &&
3238 Ty->getNumElements() &&
3241 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3242 E.value().second == (int)
E.index());
3244 IdentityLeafs.
insert(FrontU);
3249 C &&
C->getSplatValue() &&
3257 SplatLeafs.
insert(FrontU);
3262 auto [FrontU, FrontLane] = Item.
front();
3263 auto [
U, Lane] = IL;
3264 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3266 SplatLeafs.
insert(FrontU);
3272 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3276 Value *
V = IL.first->get();
3282 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3285 if (CI->getSrcTy()->getScalarType() !=
3290 SI->getOperand(0)->getType() !=
3297 II->getIntrinsicID() ==
3299 !
II->hasOperandBundles());
3306 BO && BO->isIntDivRem())
3311 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3312 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3319 if (DstTy && SrcTy &&
3320 SrcTy->getNumElements() == DstTy->getNumElements()) {
3331 !
II->hasOperandBundles()) {
3332 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3351 ConcatLeafs.
insert(FrontU);
3358 if (NumVisited <= 1)
3361 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3367 ConcatLeafs, Builder, &
TTI);
3368 replaceValue(
I, *V);
3375bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3379 switch (
II->getIntrinsicID()) {
3380 case Intrinsic::vector_reduce_add:
3381 case Intrinsic::vector_reduce_mul:
3382 case Intrinsic::vector_reduce_and:
3383 case Intrinsic::vector_reduce_or:
3384 case Intrinsic::vector_reduce_xor:
3385 case Intrinsic::vector_reduce_smin:
3386 case Intrinsic::vector_reduce_smax:
3387 case Intrinsic::vector_reduce_umin:
3388 case Intrinsic::vector_reduce_umax:
3397 std::queue<Value *> Worklist;
3398 SmallPtrSet<Value *, 4> Visited;
3399 ShuffleVectorInst *Shuffle =
nullptr;
3403 while (!Worklist.empty()) {
3404 Value *CV = Worklist.front();
3416 if (CI->isBinaryOp()) {
3417 for (
auto *
Op : CI->operand_values())
3421 if (Shuffle && Shuffle != SV)
3438 for (
auto *V : Visited)
3439 for (
auto *U :
V->users())
3440 if (!Visited.contains(U) && U != &
I)
3443 FixedVectorType *VecType =
3447 FixedVectorType *ShuffleInputType =
3449 if (!ShuffleInputType)
3455 SmallVector<int> ConcatMask;
3457 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3458 bool UsesSecondVec =
3459 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3466 ShuffleInputType, ConcatMask,
CostKind);
3468 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3470 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3472 bool MadeChanges =
false;
3473 if (NewCost < OldCost) {
3477 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3478 replaceValue(*Shuffle, *NewShuffle);
3484 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3530bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3532 std::queue<Value *> InstWorklist;
3536 std::optional<unsigned int> CommonCallOp = std::nullopt;
3537 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3539 bool IsFirstCallOrBinInst =
true;
3540 bool ShouldBeCallOrBinInst =
true;
3546 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3556 int64_t
VecSize = FVT->getNumElements();
3562 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3563 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3573 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3574 Cur = (Cur + 1) / 2, --
Mask) {
3576 ExpectedParityMask |= (1ll <<
Mask);
3579 InstWorklist.push(VecOpEE);
3581 while (!InstWorklist.empty()) {
3582 Value *CI = InstWorklist.front();
3586 if (!ShouldBeCallOrBinInst)
3589 if (!IsFirstCallOrBinInst &&
3590 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3595 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3597 IsFirstCallOrBinInst =
false;
3600 CommonCallOp =
II->getIntrinsicID();
3601 if (
II->getIntrinsicID() != *CommonCallOp)
3604 switch (
II->getIntrinsicID()) {
3605 case Intrinsic::umin:
3606 case Intrinsic::umax:
3607 case Intrinsic::smin:
3608 case Intrinsic::smax: {
3609 auto *Op0 =
II->getOperand(0);
3610 auto *Op1 =
II->getOperand(1);
3618 ShouldBeCallOrBinInst ^= 1;
3620 IntrinsicCostAttributes ICA(
3621 *CommonCallOp,
II->getType(),
3622 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3629 InstWorklist.push(PrevVecV[1]);
3630 InstWorklist.push(PrevVecV[0]);
3634 if (!ShouldBeCallOrBinInst)
3637 if (!IsFirstCallOrBinInst &&
3638 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3641 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3643 IsFirstCallOrBinInst =
false;
3651 switch (*CommonBinOp) {
3652 case BinaryOperator::Add:
3653 case BinaryOperator::Mul:
3654 case BinaryOperator::Or:
3655 case BinaryOperator::And:
3656 case BinaryOperator::Xor: {
3666 ShouldBeCallOrBinInst ^= 1;
3673 InstWorklist.push(PrevVecV[1]);
3674 InstWorklist.push(PrevVecV[0]);
3678 if (ShouldBeCallOrBinInst ||
3679 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3682 if (SVInst != PrevVecV[1])
3685 ArrayRef<int> CurMask;
3691 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3692 if (Mask < ShuffleMaskHalf &&
3693 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3695 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3700 ShuffleMaskHalf *= 2;
3701 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3702 ExpectedParityMask >>= 1;
3705 SVInst->getType(), SVInst->getType(),
3709 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3712 ShouldBeCallOrBinInst ^= 1;
3719 if (ShouldBeCallOrBinInst)
3722 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3724 Value *FinalVecV = PrevVecV[0];
3736 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3739 if (NewCost >= OrigCost)
3742 auto *ReducedResult =
3744 replaceValue(
I, *ReducedResult);
3753bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3758 bool TruncOnly =
false;
3761 case Intrinsic::vector_reduce_add:
3762 case Intrinsic::vector_reduce_mul:
3765 case Intrinsic::vector_reduce_and:
3766 case Intrinsic::vector_reduce_or:
3767 case Intrinsic::vector_reduce_xor:
3774 Value *ReductionSrc =
I.getOperand(0);
3786 Type *ResultTy =
I.getType();
3789 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3799 if (OldCost <= NewCost || !NewCost.
isValid())
3803 II->getIntrinsicID(), {Src});
3805 replaceValue(
I, *NewCast);
3814 constexpr unsigned MaxVisited = 32;
3817 bool FoundReduction =
false;
3820 while (!WorkList.
empty()) {
3822 for (
User *U :
I->users()) {
3824 if (!UI || !Visited.
insert(UI).second)
3826 if (Visited.
size() > MaxVisited)
3832 switch (
II->getIntrinsicID()) {
3833 case Intrinsic::vector_reduce_add:
3834 case Intrinsic::vector_reduce_mul:
3835 case Intrinsic::vector_reduce_and:
3836 case Intrinsic::vector_reduce_or:
3837 case Intrinsic::vector_reduce_xor:
3838 case Intrinsic::vector_reduce_smin:
3839 case Intrinsic::vector_reduce_smax:
3840 case Intrinsic::vector_reduce_umin:
3841 case Intrinsic::vector_reduce_umax:
3842 FoundReduction =
true;
3855 return FoundReduction;
3868bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
3873 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3881 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3883 if (!
I ||
I->getOperand(0)->getType() != VT)
3885 return any_of(
I->users(), [&](User *U) {
3886 return U != Op0 && U != Op1 &&
3887 !(isa<ShuffleVectorInst>(U) &&
3888 (InputShuffles.contains(cast<Instruction>(U)) ||
3889 isInstructionTriviallyDead(cast<Instruction>(U))));
3892 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3893 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3901 for (
auto *U :
I->users()) {
3903 if (!SV || SV->getType() != VT)
3905 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3906 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3913 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3917 if (FromReduction && Shuffles.
size() > 1)
3922 if (!FromReduction) {
3923 for (ShuffleVectorInst *SV : Shuffles) {
3924 for (
auto *U : SV->users()) {
3927 Shuffles.push_back(SSV);
3939 int MaxV1Elt = 0, MaxV2Elt = 0;
3940 unsigned NumElts = VT->getNumElements();
3941 for (ShuffleVectorInst *SVN : Shuffles) {
3942 SmallVector<int>
Mask;
3943 SVN->getShuffleMask(Mask);
3947 Value *SVOp0 = SVN->getOperand(0);
3948 Value *SVOp1 = SVN->getOperand(1);
3953 for (
int &Elem : Mask) {
3959 if (SVOp0 == Op1 && SVOp1 == Op0) {
3963 if (SVOp0 != Op0 || SVOp1 != Op1)
3969 SmallVector<int> ReconstructMask;
3970 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3973 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3974 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3975 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3976 return Mask[
I] ==
A.first;
3985 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3986 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3987 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
4001 sort(ReconstructMask);
4002 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
4010 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
4011 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
4023 if (InputShuffles.contains(SSV))
4025 return SV->getMaskValue(M);
4033 std::pair<int, int>
Y) {
4034 int MXA = GetBaseMaskValue(
A,
X.first);
4035 int MYA = GetBaseMaskValue(
A,
Y.first);
4038 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4039 return SortBase(SVI0A,
A,
B);
4041 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4042 return SortBase(SVI1A,
A,
B);
4047 for (
const auto &Mask : OrigReconstructMasks) {
4048 SmallVector<int> ReconstructMask;
4049 for (
int M : Mask) {
4051 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
4052 assert(It !=
V.end() &&
"Expected all entries in Mask");
4053 return std::distance(
V.begin(), It);
4057 else if (M <
static_cast<int>(NumElts)) {
4058 ReconstructMask.
push_back(FindIndex(V1, M));
4060 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
4063 ReconstructMasks.
push_back(std::move(ReconstructMask));
4068 SmallVector<int> V1A, V1B, V2A, V2B;
4069 for (
unsigned I = 0;
I < V1.
size();
I++) {
4070 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
4071 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
4073 for (
unsigned I = 0;
I < V2.
size();
I++) {
4074 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
4075 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
4077 while (V1A.
size() < NumElts) {
4081 while (V2A.
size() < NumElts) {
4093 VT, VT, SV->getShuffleMask(),
CostKind);
4100 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
4101 unsigned MaxVectorSize =
4103 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
4104 if (MaxElementsInVector == 0)
4113 std::set<SmallVector<int, 4>> UniqueShuffles;
4118 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
4119 if (NumFullVectors < 2)
4120 return C + ShuffleCost;
4121 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
4122 unsigned NumUniqueGroups = 0;
4123 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
4126 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
4127 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
4128 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
4129 if (UniqueShuffles.insert(SubShuffle).second)
4130 NumUniqueGroups += 1;
4132 return C + ShuffleCost * NumUniqueGroups / NumGroups;
4138 SmallVector<int, 16>
Mask;
4139 SV->getShuffleMask(Mask);
4140 return AddShuffleMaskAdjustedCost(
C, Mask);
4143 auto AllShufflesHaveSameOperands =
4144 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
4145 if (InputShuffles.size() < 2)
4147 ShuffleVectorInst *FirstSV =
4154 std::next(InputShuffles.begin()), InputShuffles.end(),
4155 [&](Instruction *
I) {
4156 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
4157 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
4166 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
4168 if (AllShufflesHaveSameOperands(InputShuffles)) {
4169 UniqueShuffles.clear();
4170 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4173 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4179 FixedVectorType *Op0SmallVT =
4181 FixedVectorType *Op1SmallVT =
4186 UniqueShuffles.clear();
4187 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
4189 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
4191 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
4194 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
4196 <<
" vs CostAfter: " << CostAfter <<
"\n");
4197 if (CostBefore < CostAfter ||
4208 if (InputShuffles.contains(SSV))
4210 return SV->getOperand(
Op);
4214 GetShuffleOperand(SVI0A, 1), V1A);
4217 GetShuffleOperand(SVI0B, 1), V1B);
4220 GetShuffleOperand(SVI1A, 1), V2A);
4223 GetShuffleOperand(SVI1B, 1), V2B);
4228 I->copyIRFlags(Op0,
true);
4233 I->copyIRFlags(Op1,
true);
4235 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4238 replaceValue(*Shuffles[S], *NSV,
false);
4241 Worklist.pushValue(NSV0A);
4242 Worklist.pushValue(NSV0B);
4243 Worklist.pushValue(NSV1A);
4244 Worklist.pushValue(NSV1B);
4254bool VectorCombine::shrinkType(Instruction &
I) {
4255 Value *ZExted, *OtherOperand;
4261 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4265 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4267 if (
I.getOpcode() == Instruction::LShr) {
4284 Instruction::ZExt, BigTy, SmallTy,
4285 TargetTransformInfo::CastContextHint::None,
CostKind);
4290 for (User *U : ZExtOperand->
users()) {
4297 ShrinkCost += ZExtCost;
4312 ShrinkCost += ZExtCost;
4319 Instruction::Trunc, SmallTy, BigTy,
4320 TargetTransformInfo::CastContextHint::None,
CostKind);
4325 if (ShrinkCost > CurrentCost)
4329 Value *Op0 = ZExted;
4332 if (
I.getOperand(0) == OtherOperand)
4339 replaceValue(
I, *NewZExtr);
4345bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4346 Value *DstVec, *SrcVec;
4347 uint64_t ExtIdx, InsIdx;
4357 if (!DstVecTy || !SrcVecTy ||
4358 SrcVecTy->getElementType() != DstVecTy->getElementType())
4361 unsigned NumDstElts = DstVecTy->getNumElements();
4362 unsigned NumSrcElts = SrcVecTy->getNumElements();
4363 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4370 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4371 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4373 if (NeedDstSrcSwap) {
4375 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4378 Mask[InsIdx] = ExtIdx;
4382 std::iota(
Mask.begin(),
Mask.end(), 0);
4383 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4384 Mask[InsIdx] = NumDstElts;
4386 Mask[InsIdx] = ExtIdx + NumDstElts;
4399 SmallVector<int> ExtToVecMask;
4400 if (!NeedExpOrNarrow) {
4405 nullptr, {DstVec, SrcVec});
4411 if (IsExtIdxInBounds)
4412 ExtToVecMask[ExtIdx] = ExtIdx;
4414 ExtToVecMask[0] = ExtIdx;
4417 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4421 if (!Ext->hasOneUse())
4424 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4425 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4428 if (OldCost < NewCost)
4431 if (NeedExpOrNarrow) {
4432 if (!NeedDstSrcSwap)
4445 replaceValue(
I, *Shuf);
4454bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4455 const APInt *SplatVal0, *SplatVal1;
4465 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4466 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4475 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4476 << *
I.getType() <<
" is too high.\n");
4480 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4481 NewSplatVal <<= Width;
4482 NewSplatVal |= SplatVal0->
zext(Width * 2);
4484 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4492bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4494 if (!OldLoad || !OldLoad->isSimple())
4501 unsigned const OldNumElements = OldLoadTy->getNumElements();
4507 using IndexRange = std::pair<int, int>;
4508 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4509 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4510 for (llvm::Use &Use :
I.uses()) {
4512 User *Shuffle =
Use.getUser();
4517 return std::nullopt;
4524 for (
int Index : Mask) {
4525 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4526 OutputRange.first = std::min(Index, OutputRange.first);
4527 OutputRange.second = std::max(Index, OutputRange.second);
4532 if (OutputRange.second < OutputRange.first)
4533 return std::nullopt;
4539 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4540 unsigned const NewNumElements = Indices->second + 1u;
4544 if (NewNumElements < OldNumElements) {
4549 Type *ElemTy = OldLoadTy->getElementType();
4551 Value *PtrOp = OldLoad->getPointerOperand();
4554 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4555 OldLoad->getPointerAddressSpace(),
CostKind);
4558 OldLoad->getPointerAddressSpace(),
CostKind);
4560 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4562 unsigned const MaxIndex = NewNumElements * 2u;
4564 for (llvm::Use &Use :
I.uses()) {
4566 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4572 for (
int Index : OldMask) {
4573 if (Index >=
static_cast<int>(MaxIndex))
4587 dbgs() <<
"Found a load used only by shufflevector instructions: "
4588 <<
I <<
"\n OldCost: " << OldCost
4589 <<
" vs NewCost: " << NewCost <<
"\n");
4591 if (OldCost < NewCost || !NewCost.
isValid())
4597 NewLoad->copyMetadata(
I);
4600 for (UseEntry &Use : NewUses) {
4601 ShuffleVectorInst *Shuffle =
Use.first;
4602 std::vector<int> &NewMask =
Use.second;
4609 replaceValue(*Shuffle, *NewShuffle,
false);
4622bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4624 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4628 ArrayRef<int> Mask0;
4629 ArrayRef<int> Mask1;
4642 auto const InputNumElements = InputVT->getNumElements();
4644 if (InputNumElements >= ResultVT->getNumElements())
4649 SmallVector<int, 16> NewMask;
4652 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4653 if (
M0 >= 0 &&
M1 >= 0)
4655 else if (
M0 == -1 &&
M1 == -1)
4668 int MaskOffset = NewMask[0
u];
4669 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
4672 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4686 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4689 if (NewCost > OldCost)
4701 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4703 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4709 replaceValue(*Phi, *NewShuf1);
4715bool VectorCombine::run() {
4729 auto Opcode =
I.getOpcode();
4737 if (IsFixedVectorType) {
4739 case Instruction::InsertElement:
4740 if (vectorizeLoadInsert(
I))
4743 case Instruction::ShuffleVector:
4744 if (widenSubvectorLoad(
I))
4755 if (scalarizeOpOrCmp(
I))
4757 if (scalarizeLoad(
I))
4759 if (scalarizeExtExtract(
I))
4761 if (scalarizeVPIntrinsic(
I))
4763 if (foldInterleaveIntrinsics(
I))
4767 if (Opcode == Instruction::Store)
4768 if (foldSingleElementStore(
I))
4772 if (TryEarlyFoldsOnly)
4779 if (IsFixedVectorType) {
4781 case Instruction::InsertElement:
4782 if (foldInsExtFNeg(
I))
4784 if (foldInsExtBinop(
I))
4786 if (foldInsExtVectorToShuffle(
I))
4789 case Instruction::ShuffleVector:
4790 if (foldPermuteOfBinops(
I))
4792 if (foldShuffleOfBinops(
I))
4794 if (foldShuffleOfSelects(
I))
4796 if (foldShuffleOfCastops(
I))
4798 if (foldShuffleOfShuffles(
I))
4800 if (foldPermuteOfIntrinsic(
I))
4802 if (foldShuffleOfIntrinsics(
I))
4804 if (foldSelectShuffle(
I))
4806 if (foldShuffleToIdentity(
I))
4809 case Instruction::Load:
4810 if (shrinkLoadForShuffles(
I))
4813 case Instruction::BitCast:
4814 if (foldBitcastShuffle(
I))
4817 case Instruction::And:
4818 case Instruction::Or:
4819 case Instruction::Xor:
4820 if (foldBitOpOfCastops(
I))
4822 if (foldBitOpOfCastConstant(
I))
4825 case Instruction::PHI:
4826 if (shrinkPhiOfShuffles(
I))
4836 case Instruction::Call:
4837 if (foldShuffleFromReductions(
I))
4839 if (foldCastFromReductions(
I))
4842 case Instruction::ExtractElement:
4843 if (foldShuffleChainsToReduce(
I))
4846 case Instruction::ICmp:
4847 case Instruction::FCmp:
4848 if (foldExtractExtract(
I))
4851 case Instruction::Or:
4852 if (foldConcatOfBoolMasks(
I))
4857 if (foldExtractExtract(
I))
4859 if (foldExtractedCmps(
I))
4861 if (foldBinopOfReductions(
I))
4870 bool MadeChange =
false;
4871 for (BasicBlock &BB :
F) {
4883 if (!
I->isDebugOrPseudoInst())
4884 MadeChange |= FoldInst(*
I);
4891 while (!Worklist.isEmpty()) {
4901 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.