44#define DEBUG_TYPE "vector-combine"
50STATISTIC(NumVecLoad,
"Number of vector loads formed");
51STATISTIC(NumVecCmp,
"Number of vector compares formed");
52STATISTIC(NumVecBO,
"Number of vector binops formed");
53STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
54STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
55STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
56STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
57STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
61 cl::desc(
"Disable all vector combine transforms"));
65 cl::desc(
"Disable binop extract to shuffle transforms"));
69 cl::desc(
"Max number of instructions to scan for vector combining."));
71static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
79 bool TryEarlyFoldsOnly)
82 SQ(*
DL, nullptr, &DT, &AC),
83 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
90 const TargetTransformInfo &TTI;
91 const DominatorTree &DT;
95 const SimplifyQuery SQ;
99 bool TryEarlyFoldsOnly;
101 InstructionWorklist Worklist;
110 bool vectorizeLoadInsert(Instruction &
I);
111 bool widenSubvectorLoad(Instruction &
I);
112 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
113 ExtractElementInst *Ext1,
114 unsigned PreferredExtractIndex)
const;
115 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
116 const Instruction &
I,
117 ExtractElementInst *&ConvertToShuffle,
118 unsigned PreferredExtractIndex);
121 bool foldExtractExtract(Instruction &
I);
122 bool foldInsExtFNeg(Instruction &
I);
123 bool foldInsExtBinop(Instruction &
I);
124 bool foldInsExtVectorToShuffle(Instruction &
I);
125 bool foldBitOpOfCastops(Instruction &
I);
126 bool foldBitOpOfCastConstant(Instruction &
I);
127 bool foldBitcastShuffle(Instruction &
I);
128 bool scalarizeOpOrCmp(Instruction &
I);
129 bool scalarizeVPIntrinsic(Instruction &
I);
130 bool foldExtractedCmps(Instruction &
I);
131 bool foldSelectsFromBitcast(Instruction &
I);
132 bool foldBinopOfReductions(Instruction &
I);
133 bool foldSingleElementStore(Instruction &
I);
134 bool scalarizeLoad(Instruction &
I);
135 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
136 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeExtExtract(Instruction &
I);
138 bool foldConcatOfBoolMasks(Instruction &
I);
139 bool foldPermuteOfBinops(Instruction &
I);
140 bool foldShuffleOfBinops(Instruction &
I);
141 bool foldShuffleOfSelects(Instruction &
I);
142 bool foldShuffleOfCastops(Instruction &
I);
143 bool foldShuffleOfShuffles(Instruction &
I);
144 bool foldPermuteOfIntrinsic(Instruction &
I);
145 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
146 bool foldShuffleOfIntrinsics(Instruction &
I);
147 bool foldShuffleToIdentity(Instruction &
I);
148 bool foldShuffleFromReductions(Instruction &
I);
149 bool foldShuffleChainsToReduce(Instruction &
I);
150 bool foldCastFromReductions(Instruction &
I);
151 bool foldSignBitReductionCmp(Instruction &
I);
152 bool foldICmpEqZeroVectorReduce(Instruction &
I);
153 bool foldEquivalentReductionCmp(Instruction &
I);
154 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
155 bool foldInterleaveIntrinsics(Instruction &
I);
156 bool shrinkType(Instruction &
I);
157 bool shrinkLoadForShuffles(Instruction &
I);
158 bool shrinkPhiOfShuffles(Instruction &
I);
160 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
166 Worklist.pushUsersToWorkList(*NewI);
167 Worklist.pushValue(NewI);
184 SmallPtrSet<Value *, 4> Visited;
189 OpI,
nullptr,
nullptr, [&](
Value *V) {
194 NextInst = NextInst->getNextNode();
199 Worklist.pushUsersToWorkList(*OpI);
200 Worklist.pushValue(OpI);
220 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
221 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
227 Type *ScalarTy = Load->getType()->getScalarType();
229 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
230 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
237bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
263 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
266 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
267 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
268 unsigned OffsetEltIndex = 0;
276 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
277 APInt
Offset(OffsetBitWidth, 0);
287 uint64_t ScalarSizeInBytes = ScalarSize / 8;
288 if (
Offset.urem(ScalarSizeInBytes) != 0)
292 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
293 if (OffsetEltIndex >= MinVecNumElts)
310 unsigned AS =
Load->getPointerAddressSpace();
329 unsigned OutputNumElts = Ty->getNumElements();
331 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
332 Mask[0] = OffsetEltIndex;
339 if (OldCost < NewCost || !NewCost.
isValid())
350 replaceValue(
I, *VecLd);
358bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
361 if (!Shuf->isIdentityWithPadding())
367 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
368 return M >= (int)(NumOpElts);
379 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
388 unsigned AS =
Load->getPointerAddressSpace();
403 if (OldCost < NewCost || !NewCost.
isValid())
410 replaceValue(
I, *VecLd);
417ExtractElementInst *VectorCombine::getShuffleExtract(
418 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
422 assert(Index0C && Index1C &&
"Expected constant extract indexes");
424 unsigned Index0 = Index0C->getZExtValue();
425 unsigned Index1 = Index1C->getZExtValue();
428 if (Index0 == Index1)
452 if (PreferredExtractIndex == Index0)
454 if (PreferredExtractIndex == Index1)
458 return Index0 > Index1 ? Ext0 : Ext1;
466bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
467 ExtractElementInst *Ext1,
468 const Instruction &
I,
469 ExtractElementInst *&ConvertToShuffle,
470 unsigned PreferredExtractIndex) {
473 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
475 unsigned Opcode =
I.getOpcode();
488 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
489 "Expected a compare");
499 unsigned Ext0Index = Ext0IndexC->getZExtValue();
500 unsigned Ext1Index = Ext1IndexC->getZExtValue();
514 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
515 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
516 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
521 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
526 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
528 OldCost = CheapExtractCost + ScalarOpCost;
529 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
533 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
534 NewCost = VectorOpCost + CheapExtractCost +
539 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
540 if (ConvertToShuffle) {
552 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
554 ShuffleMask[BestInsIndex] = BestExtIndex;
556 VecTy, VecTy, ShuffleMask,
CostKind, 0,
557 nullptr, {ConvertToShuffle});
560 VecTy, VecTy, {},
CostKind, 0,
nullptr,
568 return OldCost < NewCost;
580 ShufMask[NewIndex] = OldIndex;
581 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
633 V1,
"foldExtExtBinop");
638 VecBOInst->copyIRFlags(&
I);
644bool VectorCombine::foldExtractExtract(Instruction &
I) {
665 unsigned NumElts = FixedVecTy->getNumElements();
666 if (C0 >= NumElts || C1 >= NumElts)
682 ExtractElementInst *ExtractToChange;
683 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
689 if (ExtractToChange) {
690 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
695 if (ExtractToChange == Ext0)
704 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
705 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
708 replaceValue(
I, *NewExt);
714bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
717 uint64_t ExtIdx, InsIdx;
732 auto *DstVecScalarTy = DstVecTy->getScalarType();
734 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
739 unsigned NumDstElts = DstVecTy->getNumElements();
740 unsigned NumSrcElts = SrcVecTy->getNumElements();
741 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
747 SmallVector<int>
Mask(NumDstElts);
748 std::iota(
Mask.begin(),
Mask.end(), 0);
749 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
765 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
768 SmallVector<int> SrcMask;
771 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
773 DstVecTy, SrcVecTy, SrcMask,
CostKind);
777 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
779 if (NewCost > OldCost)
782 Value *NewShuf, *LenChgShuf =
nullptr;
796 replaceValue(
I, *NewShuf);
802bool VectorCombine::foldInsExtBinop(Instruction &
I) {
803 BinaryOperator *VecBinOp, *SclBinOp;
835 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
837 if (NewCost > OldCost)
848 NewInst->copyIRFlags(VecBinOp);
849 NewInst->andIRFlags(SclBinOp);
854 replaceValue(
I, *NewBO);
860bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
863 if (!BinOp || !BinOp->isBitwiseLogicOp())
869 if (!LHSCast || !RHSCast) {
870 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
876 if (CastOpcode != RHSCast->getOpcode())
880 switch (CastOpcode) {
881 case Instruction::BitCast:
882 case Instruction::Trunc:
883 case Instruction::SExt:
884 case Instruction::ZExt:
890 Value *LHSSrc = LHSCast->getOperand(0);
891 Value *RHSSrc = RHSCast->getOperand(0);
897 auto *SrcTy = LHSSrc->
getType();
898 auto *DstTy =
I.getType();
901 if (CastOpcode != Instruction::BitCast &&
906 if (!SrcTy->getScalarType()->isIntegerTy() ||
907 !DstTy->getScalarType()->isIntegerTy())
922 LHSCastCost + RHSCastCost;
933 if (!LHSCast->hasOneUse())
934 NewCost += LHSCastCost;
935 if (!RHSCast->hasOneUse())
936 NewCost += RHSCastCost;
939 <<
" NewCost=" << NewCost <<
"\n");
941 if (NewCost > OldCost)
946 BinOp->getName() +
".inner");
948 NewBinOp->copyIRFlags(BinOp);
962 replaceValue(
I, *Result);
971bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
987 switch (CastOpcode) {
988 case Instruction::BitCast:
989 case Instruction::ZExt:
990 case Instruction::SExt:
991 case Instruction::Trunc:
997 Value *LHSSrc = LHSCast->getOperand(0);
999 auto *SrcTy = LHSSrc->
getType();
1000 auto *DstTy =
I.getType();
1003 if (CastOpcode != Instruction::BitCast &&
1008 if (!SrcTy->getScalarType()->isIntegerTy() ||
1009 !DstTy->getScalarType()->isIntegerTy())
1013 PreservedCastFlags RHSFlags;
1038 if (!LHSCast->hasOneUse())
1039 NewCost += LHSCastCost;
1041 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1042 <<
" NewCost=" << NewCost <<
"\n");
1044 if (NewCost > OldCost)
1049 LHSSrc, InvC,
I.getName() +
".inner");
1051 NewBinOp->copyIRFlags(&
I);
1071 replaceValue(
I, *Result);
1078bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1092 if (!DestTy || !SrcTy)
1095 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1096 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1097 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1107 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1108 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1112 SmallVector<int, 16> NewMask;
1113 if (DestEltSize <= SrcEltSize) {
1116 if (SrcEltSize % DestEltSize != 0)
1118 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1123 if (DestEltSize % SrcEltSize != 0)
1125 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1132 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1133 auto *NewShuffleTy =
1135 auto *OldShuffleTy =
1137 unsigned NumOps = IsUnary ? 1 : 2;
1147 TargetTransformInfo::CastContextHint::None,
1152 TargetTransformInfo::CastContextHint::None,
1155 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1156 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1158 if (NewCost > OldCost || !NewCost.
isValid())
1166 replaceValue(
I, *Shuf);
1173bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1187 if (!ScalarOp0 || !ScalarOp1)
1195 auto IsAllTrueMask = [](
Value *MaskVal) {
1198 return ConstValue->isAllOnesValue();
1212 SmallVector<int>
Mask;
1214 Mask.resize(FVTy->getNumElements(), 0);
1223 Args.push_back(
V->getType());
1224 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1229 std::optional<unsigned> FunctionalOpcode =
1231 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1232 if (!FunctionalOpcode) {
1241 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1251 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1253 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1256 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1259 if (OldCost < NewCost || !NewCost.
isValid())
1270 bool SafeToSpeculate;
1276 *FunctionalOpcode, &VPI,
nullptr, SQ.
AC, SQ.
DT);
1277 if (!SafeToSpeculate &&
1284 {ScalarOp0, ScalarOp1})
1286 ScalarOp0, ScalarOp1);
1295bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1300 if (!UO && !BO && !CI && !
II)
1308 if (Arg->getType() !=
II->getType() &&
1318 for (User *U :
I.users())
1325 std::optional<uint64_t>
Index;
1327 auto Ops =
II ?
II->args() :
I.operands();
1331 uint64_t InsIdx = 0;
1336 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1342 else if (InsIdx != *Index)
1359 if (!
Index.has_value())
1363 Type *ScalarTy = VecTy->getScalarType();
1364 assert(VecTy->isVectorTy() &&
1367 "Unexpected types for insert element into binop or cmp");
1369 unsigned Opcode =
I.getOpcode();
1377 }
else if (UO || BO) {
1381 IntrinsicCostAttributes ScalarICA(
1382 II->getIntrinsicID(), ScalarTy,
1385 IntrinsicCostAttributes VectorICA(
1386 II->getIntrinsicID(), VecTy,
1393 Value *NewVecC =
nullptr;
1395 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1398 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1400 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1414 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1416 II->getIntrinsicID(), Idx, &
TTI)))
1419 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1420 OldCost += InsertCost;
1421 NewCost += !
Op->hasOneUse() * InsertCost;
1425 if (OldCost < NewCost || !NewCost.
isValid())
1435 ++NumScalarIntrinsic;
1445 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1451 Scalar->setName(
I.getName() +
".scalar");
1456 ScalarInst->copyIRFlags(&
I);
1459 replaceValue(
I, *Insert);
1466bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1471 if (!BI || !
I.getType()->isIntegerTy(1))
1476 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1479 CmpPredicate
P0,
P1;
1491 uint64_t Index0, Index1;
1498 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1501 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1502 "Unknown ExtractElementInst");
1507 unsigned CmpOpcode =
1522 Ext0Cost + Ext1Cost + CmpCost * 2 +
1528 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1529 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1534 ShufMask[CheapIndex] = ExpensiveIndex;
1539 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1540 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1545 if (OldCost < NewCost || !NewCost.
isValid())
1555 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1556 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1559 replaceValue(
I, *NewExt);
1586bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1593 if (!SrcVecTy || !DstVecTy)
1603 if (SrcEltBits != 32 && SrcEltBits != 64)
1606 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1623 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1626 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1629 if (!BC->hasNUsesOrMore(MinSelects))
1634 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1636 for (User *U : BC->users()) {
1641 for (User *ExtUser : Ext->users()) {
1645 Cond->getType()->isIntegerTy(1))
1650 if (CondToSelects.
empty())
1653 bool MadeChange =
false;
1654 Value *SrcVec = BC->getOperand(0);
1657 for (
auto [
Cond, Selects] : CondToSelects) {
1659 if (Selects.size() < MinSelects) {
1660 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1661 <<
"profitable (VecCost=" << VecSelCost
1662 <<
", ScalarCost=" << ScalarSelCost
1663 <<
", NumSelects=" << Selects.size() <<
")\n");
1668 auto InsertPt = std::next(BC->getIterator());
1672 InsertPt = std::next(CondInst->getIterator());
1680 for (SelectInst *Sel : Selects) {
1682 Value *Idx = Ext->getIndexOperand();
1686 replaceValue(*Sel, *NewExt);
1691 <<
" selects into vector select\n");
1705 unsigned ReductionOpc =
1711 CostBeforeReduction =
1712 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1714 CostAfterReduction =
1715 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1719 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1725 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1732 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1735 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1737 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1740 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1741 CostAfterReduction =
TTI.getMulAccReductionCost(
1742 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1745 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1749bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1752 if (BinOpOpc == Instruction::Sub)
1753 ReductionIID = Intrinsic::vector_reduce_add;
1757 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1762 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1763 return II->getArgOperand(0);
1767 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1770 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1779 unsigned ReductionOpc =
1792 CostOfRedOperand0 + CostOfRedOperand1 +
1795 if (NewCost >= OldCost || !NewCost.
isValid())
1799 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1802 if (BinOpOpc == Instruction::Or)
1803 VectorBO = Builder.
CreateOr(V0, V1,
"",
1809 replaceValue(
I, *Rdx);
1817 unsigned NumScanned = 0;
1818 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1827class ScalarizationResult {
1828 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1833 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1834 : Status(Status), ToFreeze(ToFreeze) {}
1837 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1838 ~ScalarizationResult() {
1839 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1842 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1843 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1844 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1845 return {StatusTy::SafeWithFreeze, ToFreeze};
1849 bool isSafe()
const {
return Status == StatusTy::Safe; }
1851 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1854 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1859 Status = StatusTy::Unsafe;
1863 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1864 assert(isSafeWithFreeze() &&
1865 "should only be used when freezing is required");
1867 "UserI must be a user of ToFreeze");
1868 IRBuilder<>::InsertPointGuard Guard(Builder);
1873 if (
U.get() == ToFreeze)
1888 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1892 if (
C->getValue().ult(NumElements))
1893 return ScalarizationResult::safe();
1894 return ScalarizationResult::unsafe();
1899 return ScalarizationResult::unsafe();
1901 APInt Zero(IntWidth, 0);
1902 APInt MaxElts(IntWidth, NumElements);
1909 return ScalarizationResult::safe();
1910 return ScalarizationResult::unsafe();
1923 if (ValidIndices.
contains(IdxRange))
1924 return ScalarizationResult::safeWithFreeze(IdxBase);
1925 return ScalarizationResult::unsafe();
1937 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1949bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1961 if (!
match(
SI->getValueOperand(),
1968 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1971 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1972 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1973 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1976 auto ScalarizableIdx =
1978 if (ScalarizableIdx.isUnsafe() ||
1985 Worklist.
push(Load);
1987 if (ScalarizableIdx.isSafeWithFreeze())
1990 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1991 {ConstantInt::get(Idx->getType(), 0), Idx});
1995 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1998 replaceValue(
I, *NSI);
2008bool VectorCombine::scalarizeLoad(Instruction &
I) {
2015 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2018 bool AllExtracts =
true;
2019 bool AllBitcasts =
true;
2021 unsigned NumInstChecked = 0;
2026 for (User *U : LI->users()) {
2028 if (!UI || UI->getParent() != LI->getParent())
2033 if (UI->use_empty())
2037 AllExtracts =
false;
2039 AllBitcasts =
false;
2043 for (Instruction &
I :
2044 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2051 LastCheckedInst = UI;
2056 return scalarizeLoadExtract(LI, VecTy, Ptr);
2058 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2063bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2068 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2071 for (
auto &Pair : NeedFreeze)
2072 Pair.second.discard();
2080 for (User *U : LI->
users()) {
2085 if (ScalarIdx.isUnsafe())
2087 if (ScalarIdx.isSafeWithFreeze()) {
2088 NeedFreeze.try_emplace(UI, ScalarIdx);
2089 ScalarIdx.discard();
2095 Index ?
Index->getZExtValue() : -1);
2103 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2104 <<
"\n LoadExtractCost: " << OriginalCost
2105 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2107 if (ScalarizedCost >= OriginalCost)
2114 Type *ElemType = VecTy->getElementType();
2117 for (User *U : LI->
users()) {
2119 Value *Idx = EI->getIndexOperand();
2122 auto It = NeedFreeze.find(EI);
2123 if (It != NeedFreeze.end())
2130 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2132 Align ScalarOpAlignment =
2134 NewLoad->setAlignment(ScalarOpAlignment);
2137 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2142 replaceValue(*EI, *NewLoad,
false);
2145 FailureGuard.release();
2150bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2156 Type *TargetScalarType =
nullptr;
2157 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2159 for (User *U : LI->
users()) {
2162 Type *DestTy = BC->getDestTy();
2166 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2167 if (DestBitWidth != VecBitWidth)
2171 if (!TargetScalarType)
2172 TargetScalarType = DestTy;
2173 else if (TargetScalarType != DestTy)
2181 if (!TargetScalarType)
2189 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2190 <<
"\n OriginalCost: " << OriginalCost
2191 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2193 if (ScalarizedCost >= OriginalCost)
2204 ScalarLoad->copyMetadata(*LI);
2207 for (User *U : LI->
users()) {
2209 replaceValue(*BC, *ScalarLoad,
false);
2215bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2230 Type *ScalarDstTy = DstTy->getElementType();
2231 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2237 unsigned ExtCnt = 0;
2238 bool ExtLane0 =
false;
2239 for (User *U : Ext->users()) {
2253 Instruction::And, ScalarDstTy,
CostKind,
2256 (ExtCnt - ExtLane0) *
2258 Instruction::LShr, ScalarDstTy,
CostKind,
2261 if (ScalarCost > VectorCost)
2264 Value *ScalarV = Ext->getOperand(0);
2271 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2272 bool AllExtractsTriggerUB =
true;
2273 ExtractElementInst *LastExtract =
nullptr;
2275 for (User *U : Ext->users()) {
2278 AllExtractsTriggerUB =
false;
2282 if (!LastExtract || LastExtract->
comesBefore(Extract))
2283 LastExtract = Extract;
2285 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2286 !AllExtractsTriggerUB ||
2294 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2295 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2298 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2299 for (User *U : Ext->users()) {
2305 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2306 : (Idx * SrcEltSizeInBits);
2309 U->replaceAllUsesWith(
And);
2317bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2318 Type *Ty =
I.getType();
2323 if (
DL->isBigEndian())
2334 uint64_t ShAmtX = 0;
2342 uint64_t ShAmtY = 0;
2350 if (ShAmtX > ShAmtY) {
2358 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2359 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2364 MaskTy->getNumElements() != ShAmtDiff ||
2365 MaskTy->getNumElements() > (
BitWidth / 2))
2370 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2371 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2374 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2391 if (Ty != ConcatIntTy)
2397 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2398 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2401 if (NewCost > OldCost)
2411 if (Ty != ConcatIntTy) {
2421 replaceValue(
I, *Result);
2427bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2428 BinaryOperator *BinOp;
2429 ArrayRef<int> OuterMask;
2437 Value *Op00, *Op01, *Op10, *Op11;
2438 ArrayRef<int> Mask0, Mask1;
2443 if (!Match0 && !Match1)
2456 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2459 unsigned NumSrcElts = BinOpTy->getNumElements();
2464 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2468 SmallVector<int> NewMask0, NewMask1;
2469 for (
int M : OuterMask) {
2470 if (M < 0 || M >= (
int)NumSrcElts) {
2474 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2475 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2479 unsigned NumOpElts = Op0Ty->getNumElements();
2480 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2481 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2483 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2484 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2493 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2494 0,
nullptr, {BinOp}, &
I);
2496 NewCost += BinOpCost;
2502 OldCost += Shuf0Cost;
2504 NewCost += Shuf0Cost;
2510 OldCost += Shuf1Cost;
2512 NewCost += Shuf1Cost;
2520 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2524 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2526 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2527 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2531 if (NewCost > OldCost)
2542 NewInst->copyIRFlags(BinOp);
2546 replaceValue(
I, *NewBO);
2552bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2553 ArrayRef<int> OldMask;
2560 if (
LHS->getOpcode() !=
RHS->getOpcode())
2564 bool IsCommutative =
false;
2573 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2584 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2587 bool SameBinOp =
LHS ==
RHS;
2588 unsigned NumSrcElts = BinOpTy->getNumElements();
2591 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2594 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2595 if (M >= (
int)NumSrcElts)
2599 SmallVector<int> NewMask0(OldMask);
2608 SmallVector<int> NewMask1(OldMask);
2627 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2637 ArrayRef<int> InnerMask;
2639 m_Mask(InnerMask)))) &&
2642 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2654 bool ReducedInstCount =
false;
2655 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2656 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2657 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2658 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2659 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2664 auto *ShuffleCmpTy =
2667 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2668 if (!SingleSrcBinOp)
2678 PredLHS,
CostKind, Op0Info, Op1Info);
2688 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2695 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2704 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2708 NewInst->copyIRFlags(
LHS);
2709 NewInst->andIRFlags(
RHS);
2714 replaceValue(
I, *NewBO);
2721bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2723 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2734 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2740 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2741 ((SI0FOp !=
nullptr) &&
2742 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2748 auto SelOp = Instruction::Select;
2756 CostSel1 + CostSel2 +
2758 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2762 Mask,
CostKind, 0,
nullptr, {C1, C2});
2772 if (!Sel1->hasOneUse())
2773 NewCost += CostSel1;
2774 if (!Sel2->hasOneUse())
2775 NewCost += CostSel2;
2778 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2780 if (NewCost > OldCost)
2789 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2790 SI0FOp->getFastMathFlags());
2792 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2797 replaceValue(
I, *NewSel);
2803bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2805 ArrayRef<int> OldMask;
2814 if (!C0 || (IsBinaryShuffle && !C1))
2821 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2824 if (IsBinaryShuffle) {
2825 if (C0->getSrcTy() != C1->getSrcTy())
2828 if (Opcode != C1->getOpcode()) {
2830 Opcode = Instruction::SExt;
2839 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2842 unsigned NumSrcElts = CastSrcTy->getNumElements();
2843 unsigned NumDstElts = CastDstTy->getNumElements();
2844 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2845 "Only bitcasts expected to alter src/dst element counts");
2849 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2850 (NumDstElts % NumSrcElts) != 0)
2853 SmallVector<int, 16> NewMask;
2854 if (NumSrcElts >= NumDstElts) {
2857 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2858 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2863 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2864 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2869 auto *NewShuffleDstTy =
2878 if (IsBinaryShuffle)
2893 if (IsBinaryShuffle) {
2903 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2905 if (NewCost > OldCost)
2909 if (IsBinaryShuffle)
2919 NewInst->copyIRFlags(C0);
2920 if (IsBinaryShuffle)
2921 NewInst->andIRFlags(C1);
2925 replaceValue(
I, *Cast);
2935bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2936 ArrayRef<int> OuterMask;
2937 Value *OuterV0, *OuterV1;
2942 ArrayRef<int> InnerMask0, InnerMask1;
2943 Value *X0, *X1, *Y0, *Y1;
2948 if (!Match0 && !Match1)
2953 SmallVector<int, 16> PoisonMask1;
2958 InnerMask1 = PoisonMask1;
2962 X0 = Match0 ? X0 : OuterV0;
2963 Y0 = Match0 ? Y0 : OuterV0;
2964 X1 = Match1 ? X1 : OuterV1;
2965 Y1 = Match1 ? Y1 : OuterV1;
2969 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2973 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2974 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2979 SmallVector<int, 16> NewMask(OuterMask);
2980 Value *NewX =
nullptr, *NewY =
nullptr;
2981 for (
int &M : NewMask) {
2982 Value *Src =
nullptr;
2983 if (0 <= M && M < (
int)NumImmElts) {
2987 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2988 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2990 }
else if (M >= (
int)NumImmElts) {
2995 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2996 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3000 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3009 if (!NewX || NewX == Src) {
3013 if (!NewY || NewY == Src) {
3029 replaceValue(
I, *NewX);
3046 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3052 nullptr, {NewX, NewY});
3054 NewCost += InnerCost0;
3056 NewCost += InnerCost1;
3059 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3061 if (NewCost > OldCost)
3065 replaceValue(
I, *Shuf);
3081bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3086 unsigned ChainLength = 0;
3087 SmallVector<int>
Mask;
3088 SmallVector<int> YMask;
3098 ArrayRef<int> OuterMask;
3099 Value *OuterV0, *OuterV1;
3100 if (ChainLength != 0 && !Trunk->
hasOneUse())
3103 m_Mask(OuterMask))))
3105 if (OuterV0->
getType() != TrunkType) {
3111 ArrayRef<int> InnerMask0, InnerMask1;
3112 Value *A0, *A1, *B0, *B1;
3117 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3118 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3119 if (Match0Leaf == Match1Leaf) {
3125 SmallVector<int> CommutedOuterMask;
3132 for (
int &M : CommutedOuterMask) {
3135 if (M < (
int)NumTrunkElts)
3140 OuterMask = CommutedOuterMask;
3159 int NumLeafElts = YType->getNumElements();
3160 SmallVector<int> LocalYMask(InnerMask1);
3161 for (
int &M : LocalYMask) {
3162 if (M >= NumLeafElts)
3172 Mask.assign(OuterMask);
3173 YMask.
assign(LocalYMask);
3174 OldCost = NewCost = LocalOldCost;
3181 SmallVector<int> NewYMask(YMask);
3183 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3184 if (LeafM == -1 || CombinedM == LeafM)
3186 if (CombinedM == -1) {
3196 SmallVector<int> NewMask;
3197 NewMask.
reserve(NumTrunkElts);
3198 for (
int M : Mask) {
3199 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3214 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3218 if (ChainLength == 1) {
3219 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3222 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3223 <<
" old cost: " << (OldCost + LocalOldCost)
3224 <<
" new cost: " << LocalNewCost <<
'\n';
3229 OldCost += LocalOldCost;
3230 NewCost = LocalNewCost;
3234 if (ChainLength <= 1)
3238 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3241 for (
int &M : Mask) {
3242 if (M >=
static_cast<int>(NumTrunkElts))
3243 M = YMask[
M - NumTrunkElts];
3247 replaceValue(
I, *Root);
3254 replaceValue(
I, *Root);
3260bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3262 ArrayRef<int> OldMask;
3272 if (IID != II1->getIntrinsicID())
3281 if (!ShuffleDstTy || !II0Ty)
3287 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3289 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3295 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3299 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3300 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3302 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3306 ShuffleDstTy->getNumElements());
3308 std::pair<Value *, Value *> OperandPair =
3309 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3310 if (!SeenOperandPairs.
insert(OperandPair).second) {
3316 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3319 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3322 if (!II0->hasOneUse())
3324 if (II1 != II0 && !II1->hasOneUse())
3328 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3331 if (NewCost > OldCost)
3335 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3336 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3340 std::pair<Value *, Value *> OperandPair =
3341 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3342 auto It = ShuffleCache.
find(OperandPair);
3343 if (It != ShuffleCache.
end()) {
3349 II1->getArgOperand(
I), OldMask);
3350 ShuffleCache[OperandPair] = Shuf;
3358 NewInst->copyIRFlags(II0);
3359 NewInst->andIRFlags(II1);
3362 replaceValue(
I, *NewIntrinsic);
3368bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3380 if (!ShuffleDstTy || !IntrinsicSrcTy)
3384 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3385 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3398 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3402 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3404 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3408 ShuffleDstTy->getNumElements());
3411 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3412 {II0->getArgOperand(
I)});
3415 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3420 if (!II0->hasOneUse())
3423 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3424 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3426 if (NewCost > OldCost)
3431 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3446 replaceValue(
I, *NewIntrinsic);
3456 int M = SV->getMaskValue(Lane);
3459 if (
static_cast<unsigned>(M) < NumElts) {
3460 V = SV->getOperand(0);
3463 V = SV->getOperand(1);
3474 auto [U, Lane] = IL;
3487 unsigned NumElts = Ty->getNumElements();
3488 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3494 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3500 unsigned NumSlices = Item.
size() / NumElts;
3505 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3506 Value *SliceV = Item[Slice * NumElts].first;
3507 if (!SliceV || SliceV->
getType() != Ty)
3509 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3510 auto [V, Lane] = Item[Slice * NumElts + Elt];
3511 if (Lane !=
static_cast<int>(Elt) || SliceV != V)
3520 const DenseSet<std::pair<Value *, Use *>> &IdentityLeafs,
3521 const DenseSet<std::pair<Value *, Use *>> &SplatLeafs,
3522 const DenseSet<std::pair<Value *, Use *>> &ConcatLeafs,
3524 auto [FrontV, FrontLane] = Item.
front();
3526 if (IdentityLeafs.contains(std::make_pair(FrontV, From))) {
3529 if (SplatLeafs.contains(std::make_pair(FrontV, From))) {
3531 return Builder.CreateShuffleVector(FrontV, Mask);
3533 if (ConcatLeafs.contains(std::make_pair(FrontV, From))) {
3537 for (
unsigned S = 0; S < Values.
size(); ++S)
3538 Values[S] = Item[S * NumElts].first;
3540 while (Values.
size() > 1) {
3543 std::iota(Mask.begin(), Mask.end(), 0);
3545 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3547 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3555 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3557 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3560 Ops[Idx] =
II->getOperand(Idx);
3564 &
I->getOperandUse(Idx), Ty, IdentityLeafs,
3565 SplatLeafs, ConcatLeafs, Builder,
TTI);
3569 for (
const auto &Lane : Item)
3582 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3592 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3597 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3611bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3613 if (!Ty ||
I.use_empty())
3617 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3621 Worklist.
push_back(std::make_pair(Start, &*
I.use_begin()));
3622 DenseSet<std::pair<Value *, Use *>> IdentityLeafs, SplatLeafs, ConcatLeafs;
3623 unsigned NumVisited = 0;
3625 while (!Worklist.
empty()) {
3630 auto Item = ItemFrom.first;
3631 auto From = ItemFrom.second;
3632 auto [FrontV, FrontLane] = Item.front();
3640 return X->getType() ==
Y->getType() &&
3645 if (FrontLane == 0 &&
3647 Ty->getNumElements() &&
3649 Value *FrontV = Item.front().first;
3650 return !
E.value().first || (IsEquiv(
E.value().first, FrontV) &&
3651 E.value().second == (int)
E.index());
3653 IdentityLeafs.
insert(std::make_pair(FrontV, From));
3658 C &&
C->getSplatValue() &&
3660 Value *FrontV = Item.front().first;
3666 SplatLeafs.
insert(std::make_pair(FrontV, From));
3671 auto [FrontV, FrontLane] = Item.front();
3672 auto [
V, Lane] = IL;
3673 return !
V || (
V == FrontV && Lane == FrontLane);
3675 SplatLeafs.
insert(std::make_pair(FrontV, From));
3681 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3682 Value *FrontV = Item.front().first;
3691 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3694 if (CI->getSrcTy()->getScalarType() !=
3699 SI->getOperand(0)->getType() !=
3706 II->getIntrinsicID() ==
3708 !
II->hasOperandBundles());
3715 BO && BO->isIntDivRem())
3722 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3723 FPToUIInst, SIToFPInst, UIToFPInst>(FrontV)) {
3731 if (DstTy && SrcTy &&
3732 SrcTy->getNumElements() == DstTy->getNumElements()) {
3734 &BitCast->getOperandUse(0));
3739 &Sel->getOperandUse(0));
3741 &Sel->getOperandUse(1));
3743 &Sel->getOperandUse(2));
3747 !
II->hasOperandBundles()) {
3748 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3752 Value *FrontV = Item.front().first;
3768 ConcatLeafs.
insert(std::make_pair(FrontV, From));
3775 if (NumVisited <= 1)
3778 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3784 SplatLeafs, ConcatLeafs, Builder, &
TTI);
3785 replaceValue(
I, *V);
3792bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3796 switch (
II->getIntrinsicID()) {
3797 case Intrinsic::vector_reduce_add:
3798 case Intrinsic::vector_reduce_mul:
3799 case Intrinsic::vector_reduce_and:
3800 case Intrinsic::vector_reduce_or:
3801 case Intrinsic::vector_reduce_xor:
3802 case Intrinsic::vector_reduce_smin:
3803 case Intrinsic::vector_reduce_smax:
3804 case Intrinsic::vector_reduce_umin:
3805 case Intrinsic::vector_reduce_umax:
3814 std::queue<Value *> Worklist;
3815 SmallPtrSet<Value *, 4> Visited;
3816 ShuffleVectorInst *Shuffle =
nullptr;
3820 while (!Worklist.empty()) {
3821 Value *CV = Worklist.front();
3833 if (CI->isBinaryOp()) {
3834 for (
auto *
Op : CI->operand_values())
3838 if (Shuffle && Shuffle != SV)
3855 for (
auto *V : Visited)
3856 for (
auto *U :
V->users())
3857 if (!Visited.contains(U) && U != &
I)
3860 FixedVectorType *VecType =
3864 FixedVectorType *ShuffleInputType =
3866 if (!ShuffleInputType)
3872 SmallVector<int> ConcatMask;
3874 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3875 bool UsesSecondVec =
3876 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3883 ShuffleInputType, ConcatMask,
CostKind);
3885 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3887 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3889 bool MadeChanges =
false;
3890 if (NewCost < OldCost) {
3894 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3895 replaceValue(*Shuffle, *NewShuffle);
3901 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3947bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3949 std::queue<Value *> InstWorklist;
3953 std::optional<unsigned int> CommonCallOp = std::nullopt;
3954 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3956 bool IsFirstCallOrBinInst =
true;
3957 bool ShouldBeCallOrBinInst =
true;
3963 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3973 int64_t
VecSize = FVT->getNumElements();
3979 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3980 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3990 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3991 Cur = (Cur + 1) / 2, --
Mask) {
3993 ExpectedParityMask |= (1ll <<
Mask);
3996 InstWorklist.push(VecOpEE);
3998 while (!InstWorklist.empty()) {
3999 Value *CI = InstWorklist.front();
4003 if (!ShouldBeCallOrBinInst)
4006 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4011 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4013 IsFirstCallOrBinInst =
false;
4016 CommonCallOp =
II->getIntrinsicID();
4017 if (
II->getIntrinsicID() != *CommonCallOp)
4020 switch (
II->getIntrinsicID()) {
4021 case Intrinsic::umin:
4022 case Intrinsic::umax:
4023 case Intrinsic::smin:
4024 case Intrinsic::smax: {
4025 auto *Op0 =
II->getOperand(0);
4026 auto *Op1 =
II->getOperand(1);
4034 ShouldBeCallOrBinInst ^= 1;
4036 IntrinsicCostAttributes ICA(
4037 *CommonCallOp,
II->getType(),
4038 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4045 InstWorklist.push(PrevVecV[1]);
4046 InstWorklist.push(PrevVecV[0]);
4050 if (!ShouldBeCallOrBinInst)
4053 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4056 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4058 IsFirstCallOrBinInst =
false;
4066 switch (*CommonBinOp) {
4067 case BinaryOperator::Add:
4068 case BinaryOperator::Mul:
4069 case BinaryOperator::Or:
4070 case BinaryOperator::And:
4071 case BinaryOperator::Xor: {
4081 ShouldBeCallOrBinInst ^= 1;
4088 InstWorklist.push(PrevVecV[1]);
4089 InstWorklist.push(PrevVecV[0]);
4093 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4096 if (SVInst != PrevVecV[1])
4099 ArrayRef<int> CurMask;
4105 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4106 if (Mask < ShuffleMaskHalf &&
4107 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4109 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4114 ShuffleMaskHalf *= 2;
4115 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4116 ExpectedParityMask >>= 1;
4119 SVInst->getType(), SVInst->getType(),
4123 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4126 ShouldBeCallOrBinInst ^= 1;
4133 if (ShouldBeCallOrBinInst)
4136 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4138 Value *FinalVecV = PrevVecV[0];
4150 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
4153 if (NewCost >= OrigCost)
4156 auto *ReducedResult =
4158 replaceValue(
I, *ReducedResult);
4167bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4172 bool TruncOnly =
false;
4175 case Intrinsic::vector_reduce_add:
4176 case Intrinsic::vector_reduce_mul:
4179 case Intrinsic::vector_reduce_and:
4180 case Intrinsic::vector_reduce_or:
4181 case Intrinsic::vector_reduce_xor:
4188 Value *ReductionSrc =
I.getOperand(0);
4200 Type *ResultTy =
I.getType();
4203 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4213 if (OldCost <= NewCost || !NewCost.
isValid())
4217 II->getIntrinsicID(), {Src});
4219 replaceValue(
I, *NewCast);
4247bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4249 IntrinsicInst *ReduceOp;
4250 const APInt *CmpVal;
4257 case Intrinsic::vector_reduce_or:
4258 case Intrinsic::vector_reduce_umax:
4259 case Intrinsic::vector_reduce_and:
4260 case Intrinsic::vector_reduce_umin:
4261 case Intrinsic::vector_reduce_add:
4272 unsigned BitWidth = VecTy->getScalarSizeInBits();
4276 unsigned NumElts = VecTy->getNumElements();
4285 case Intrinsic::vector_reduce_or:
4286 case Intrinsic::vector_reduce_umax:
4287 TreeOpcode = Instruction::Or;
4289 case Intrinsic::vector_reduce_and:
4290 case Intrinsic::vector_reduce_umin:
4291 TreeOpcode = Instruction::And;
4293 case Intrinsic::vector_reduce_add:
4294 TreeOpcode = Instruction::Add;
4302 SmallVector<Value *, 8> Worklist;
4303 SmallVector<Value *, 8> Sources;
4305 std::optional<bool> IsAShr;
4306 constexpr unsigned MaxSources = 8;
4311 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4312 Sources.
size() <= MaxSources) {
4321 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4323 IsAShr = ThisIsAShr;
4324 else if (*IsAShr != ThisIsAShr)
4350 if (Sources.
empty() || Sources.
size() > MaxSources ||
4351 Worklist.
size() > MaxSources || !IsAShr)
4354 unsigned NumSources = Sources.
size();
4358 if (OrigIID == Intrinsic::vector_reduce_add &&
4366 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4369 NegativeVal.negate();
4401 TestsNegative =
false;
4402 }
else if (*CmpVal == NegativeVal) {
4403 TestsNegative =
true;
4407 IsEq = Pred == ICmpInst::ICMP_EQ;
4408 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4410 TestsNegative = (RangeHigh == NegativeVal);
4411 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4413 TestsNegative = (RangeHigh == NegativeVal);
4414 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4416 TestsNegative = (RangeLow == NegativeVal);
4417 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4419 TestsNegative = (RangeLow == NegativeVal);
4462 enum CheckKind :
unsigned {
4469 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4471 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4473 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4477 case Intrinsic::vector_reduce_or:
4478 case Intrinsic::vector_reduce_umax:
4479 Base = TestsNegative ? AnyNeg : AllNonNeg;
4481 case Intrinsic::vector_reduce_and:
4482 case Intrinsic::vector_reduce_umin:
4483 Base = TestsNegative ? AllNeg : AnyNonNeg;
4485 case Intrinsic::vector_reduce_add:
4486 Base = TestsNegative ? AllNeg : AllNonNeg;
4501 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4502 : std::make_pair(MinMax, MinMaxCost);
4506 auto [NewIID, NewCost] = RequiresOr(
Check)
4507 ? PickCheaper(Intrinsic::vector_reduce_or,
4508 Intrinsic::vector_reduce_umax)
4509 : PickCheaper(
Intrinsic::vector_reduce_and,
4513 if (NumSources > 1) {
4514 unsigned CombineOpc =
4515 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4520 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4521 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4523 if (NewCost > OldCost)
4528 Type *ScalarTy = VecTy->getScalarType();
4531 if (NumSources == 1) {
4542 replaceValue(
I, *NewCmp);
4567bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4578 switch (
II->getIntrinsicID()) {
4579 case Intrinsic::vector_reduce_add:
4580 case Intrinsic::vector_reduce_or:
4581 case Intrinsic::vector_reduce_umin:
4582 case Intrinsic::vector_reduce_umax:
4583 case Intrinsic::vector_reduce_smin:
4584 case Intrinsic::vector_reduce_smax:
4590 Value *InnerOp =
II->getArgOperand(0);
4633 switch (
II->getIntrinsicID()) {
4634 case Intrinsic::vector_reduce_add: {
4639 unsigned NumElems = XTy->getNumElements();
4645 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4653 case Intrinsic::vector_reduce_smin:
4654 case Intrinsic::vector_reduce_smax:
4664 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4680 case Intrinsic::vector_reduce_add:
4681 case Intrinsic::vector_reduce_or:
4687 case Intrinsic::vector_reduce_umin:
4688 case Intrinsic::vector_reduce_umax:
4689 case Intrinsic::vector_reduce_smin:
4690 case Intrinsic::vector_reduce_smax:
4702 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4704 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4705 << *InnerOp <<
"\n OldCost: " << OldCost
4706 <<
" vs NewCost: " << NewCost <<
"\n");
4712 if (NewCost > OldCost)
4721 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4722 replaceValue(
I, *NewCmp);
4753bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4756 const APInt *CmpVal;
4761 if (!
II || !
II->hasOneUse())
4764 const auto IsValidOrUmaxCmp = [&]() {
4773 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4775 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4776 Pred == ICmpInst::ICMP_SLT;
4777 return IsEquality || IsPositive || IsNegative;
4780 const auto IsValidAndUminCmp = [&]() {
4785 const auto LeadingOnes = CmpVal->
countl_one();
4792 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4801 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4802 return IsEquality || IsNegative || IsPositive;
4810 switch (OriginalIID) {
4811 case Intrinsic::vector_reduce_or:
4812 if (!IsValidOrUmaxCmp())
4814 AlternativeIID = Intrinsic::vector_reduce_umax;
4816 case Intrinsic::vector_reduce_umax:
4817 if (!IsValidOrUmaxCmp())
4819 AlternativeIID = Intrinsic::vector_reduce_or;
4821 case Intrinsic::vector_reduce_and:
4822 if (!IsValidAndUminCmp())
4824 AlternativeIID = Intrinsic::vector_reduce_umin;
4826 case Intrinsic::vector_reduce_umin:
4827 if (!IsValidAndUminCmp())
4829 AlternativeIID = Intrinsic::vector_reduce_and;
4842 if (ReductionOpc != Instruction::ICmp)
4853 <<
"\n OrigCost: " << OrigCost
4854 <<
" vs AltCost: " << AltCost <<
"\n");
4856 if (AltCost >= OrigCost)
4860 Type *ScalarTy = VecTy->getScalarType();
4863 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4865 replaceValue(
I, *NewCmp);
4874 constexpr unsigned MaxVisited = 32;
4877 bool FoundReduction =
false;
4880 while (!WorkList.
empty()) {
4882 for (
User *U :
I->users()) {
4884 if (!UI || !Visited.
insert(UI).second)
4886 if (Visited.
size() > MaxVisited)
4892 switch (
II->getIntrinsicID()) {
4893 case Intrinsic::vector_reduce_add:
4894 case Intrinsic::vector_reduce_mul:
4895 case Intrinsic::vector_reduce_and:
4896 case Intrinsic::vector_reduce_or:
4897 case Intrinsic::vector_reduce_xor:
4898 case Intrinsic::vector_reduce_smin:
4899 case Intrinsic::vector_reduce_smax:
4900 case Intrinsic::vector_reduce_umin:
4901 case Intrinsic::vector_reduce_umax:
4902 FoundReduction =
true;
4915 return FoundReduction;
4928bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
4933 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
4941 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
4943 if (!
I ||
I->getOperand(0)->getType() != VT)
4945 return any_of(
I->users(), [&](User *U) {
4946 return U != Op0 && U != Op1 &&
4947 !(isa<ShuffleVectorInst>(U) &&
4948 (InputShuffles.contains(cast<Instruction>(U)) ||
4949 isInstructionTriviallyDead(cast<Instruction>(U))));
4952 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
4953 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
4961 for (
auto *U :
I->users()) {
4963 if (!SV || SV->getType() != VT)
4965 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
4966 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
4973 if (!collectShuffles(Op0) || !collectShuffles(Op1))
4977 if (FromReduction && Shuffles.
size() > 1)
4982 if (!FromReduction) {
4983 for (ShuffleVectorInst *SV : Shuffles) {
4984 for (
auto *U : SV->users()) {
4987 Shuffles.push_back(SSV);
4999 int MaxV1Elt = 0, MaxV2Elt = 0;
5000 unsigned NumElts = VT->getNumElements();
5001 for (ShuffleVectorInst *SVN : Shuffles) {
5002 SmallVector<int>
Mask;
5003 SVN->getShuffleMask(Mask);
5007 Value *SVOp0 = SVN->getOperand(0);
5008 Value *SVOp1 = SVN->getOperand(1);
5013 for (
int &Elem : Mask) {
5019 if (SVOp0 == Op1 && SVOp1 == Op0) {
5023 if (SVOp0 != Op0 || SVOp1 != Op1)
5029 SmallVector<int> ReconstructMask;
5030 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5033 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5034 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5035 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
5036 return Mask[
I] ==
A.first;
5045 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5046 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5047 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5061 sort(ReconstructMask);
5062 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5070 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
5071 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5083 if (InputShuffles.contains(SSV))
5085 return SV->getMaskValue(M);
5093 std::pair<int, int>
Y) {
5094 int MXA = GetBaseMaskValue(
A,
X.first);
5095 int MYA = GetBaseMaskValue(
A,
Y.first);
5098 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5099 return SortBase(SVI0A,
A,
B);
5101 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5102 return SortBase(SVI1A,
A,
B);
5107 for (
const auto &Mask : OrigReconstructMasks) {
5108 SmallVector<int> ReconstructMask;
5109 for (
int M : Mask) {
5111 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5112 assert(It !=
V.end() &&
"Expected all entries in Mask");
5113 return std::distance(
V.begin(), It);
5117 else if (M <
static_cast<int>(NumElts)) {
5118 ReconstructMask.
push_back(FindIndex(V1, M));
5120 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5123 ReconstructMasks.
push_back(std::move(ReconstructMask));
5128 SmallVector<int> V1A, V1B, V2A, V2B;
5129 for (
unsigned I = 0;
I < V1.
size();
I++) {
5130 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
5131 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
5133 for (
unsigned I = 0;
I < V2.
size();
I++) {
5134 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5135 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5137 while (V1A.
size() < NumElts) {
5141 while (V2A.
size() < NumElts) {
5153 VT, VT, SV->getShuffleMask(),
CostKind);
5160 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5161 unsigned MaxVectorSize =
5163 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5164 if (MaxElementsInVector == 0)
5173 std::set<SmallVector<int, 4>> UniqueShuffles;
5178 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5179 if (NumFullVectors < 2)
5180 return C + ShuffleCost;
5181 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5182 unsigned NumUniqueGroups = 0;
5183 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5186 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5187 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5188 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5189 if (UniqueShuffles.insert(SubShuffle).second)
5190 NumUniqueGroups += 1;
5192 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5198 SmallVector<int, 16>
Mask;
5199 SV->getShuffleMask(Mask);
5200 return AddShuffleMaskAdjustedCost(
C, Mask);
5203 auto AllShufflesHaveSameOperands =
5204 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5205 if (InputShuffles.size() < 2)
5207 ShuffleVectorInst *FirstSV =
5214 std::next(InputShuffles.begin()), InputShuffles.end(),
5215 [&](Instruction *
I) {
5216 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5217 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5226 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5228 if (AllShufflesHaveSameOperands(InputShuffles)) {
5229 UniqueShuffles.clear();
5230 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5233 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5239 FixedVectorType *Op0SmallVT =
5241 FixedVectorType *Op1SmallVT =
5246 UniqueShuffles.clear();
5247 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5249 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5251 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5254 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5256 <<
" vs CostAfter: " << CostAfter <<
"\n");
5257 if (CostBefore < CostAfter ||
5268 if (InputShuffles.contains(SSV))
5270 return SV->getOperand(
Op);
5274 GetShuffleOperand(SVI0A, 1), V1A);
5277 GetShuffleOperand(SVI0B, 1), V1B);
5280 GetShuffleOperand(SVI1A, 1), V2A);
5283 GetShuffleOperand(SVI1B, 1), V2B);
5288 I->copyIRFlags(Op0,
true);
5293 I->copyIRFlags(Op1,
true);
5295 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5298 replaceValue(*Shuffles[S], *NSV,
false);
5301 Worklist.pushValue(NSV0A);
5302 Worklist.pushValue(NSV0B);
5303 Worklist.pushValue(NSV1A);
5304 Worklist.pushValue(NSV1B);
5314bool VectorCombine::shrinkType(Instruction &
I) {
5315 Value *ZExted, *OtherOperand;
5321 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5325 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5327 if (
I.getOpcode() == Instruction::LShr) {
5344 Instruction::ZExt, BigTy, SmallTy,
5345 TargetTransformInfo::CastContextHint::None,
CostKind);
5350 for (User *U : ZExtOperand->
users()) {
5357 ShrinkCost += ZExtCost;
5372 ShrinkCost += ZExtCost;
5379 Instruction::Trunc, SmallTy, BigTy,
5380 TargetTransformInfo::CastContextHint::None,
CostKind);
5385 if (ShrinkCost > CurrentCost)
5389 Value *Op0 = ZExted;
5392 if (
I.getOperand(0) == OtherOperand)
5399 replaceValue(
I, *NewZExtr);
5405bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5406 Value *DstVec, *SrcVec;
5407 uint64_t ExtIdx, InsIdx;
5417 if (!DstVecTy || !SrcVecTy ||
5423 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5430 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5432 if (NeedDstSrcSwap) {
5434 Mask[InsIdx] = ExtIdx % NumDstElts;
5438 std::iota(
Mask.begin(),
Mask.end(), 0);
5439 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5452 SmallVector<int> ExtToVecMask;
5453 if (!NeedExpOrNarrow) {
5458 nullptr, {DstVec, SrcVec});
5464 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5467 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5471 if (!Ext->hasOneUse())
5474 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5475 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5478 if (OldCost < NewCost)
5481 if (NeedExpOrNarrow) {
5482 if (!NeedDstSrcSwap)
5495 replaceValue(
I, *Shuf);
5504bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5505 const APInt *SplatVal0, *SplatVal1;
5515 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5516 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5525 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5526 << *
I.getType() <<
" is too high.\n");
5530 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5531 NewSplatVal <<= Width;
5532 NewSplatVal |= SplatVal0->
zext(Width * 2);
5534 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5542bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5544 if (!OldLoad || !OldLoad->isSimple())
5551 unsigned const OldNumElements = OldLoadTy->getNumElements();
5557 using IndexRange = std::pair<int, int>;
5558 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5559 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5560 for (llvm::Use &Use :
I.uses()) {
5562 User *Shuffle =
Use.getUser();
5567 return std::nullopt;
5574 for (
int Index : Mask) {
5575 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5576 OutputRange.first = std::min(Index, OutputRange.first);
5577 OutputRange.second = std::max(Index, OutputRange.second);
5582 if (OutputRange.second < OutputRange.first)
5583 return std::nullopt;
5589 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5590 unsigned const NewNumElements = Indices->second + 1u;
5594 if (NewNumElements < OldNumElements) {
5599 Type *ElemTy = OldLoadTy->getElementType();
5601 Value *PtrOp = OldLoad->getPointerOperand();
5604 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
5605 OldLoad->getPointerAddressSpace(),
CostKind);
5608 OldLoad->getPointerAddressSpace(),
CostKind);
5610 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
5612 unsigned const MaxIndex = NewNumElements * 2u;
5614 for (llvm::Use &Use :
I.uses()) {
5621 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
5627 for (
int Index : OldMask) {
5628 if (Index >=
static_cast<int>(MaxIndex))
5642 dbgs() <<
"Found a load used only by shufflevector instructions: "
5643 <<
I <<
"\n OldCost: " << OldCost
5644 <<
" vs NewCost: " << NewCost <<
"\n");
5646 if (OldCost < NewCost || !NewCost.
isValid())
5652 NewLoad->copyMetadata(
I);
5655 for (UseEntry &Use : NewUses) {
5656 ShuffleVectorInst *Shuffle =
Use.first;
5657 std::vector<int> &NewMask =
Use.second;
5664 replaceValue(*Shuffle, *NewShuffle,
false);
5677bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
5679 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
5683 ArrayRef<int> Mask0;
5684 ArrayRef<int> Mask1;
5697 auto const InputNumElements = InputVT->getNumElements();
5699 if (InputNumElements >= ResultVT->getNumElements())
5704 SmallVector<int, 16> NewMask;
5707 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
5708 if (
M0 >= 0 &&
M1 >= 0)
5710 else if (
M0 == -1 &&
M1 == -1)
5723 int MaskOffset = NewMask[0
u];
5724 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
5727 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
5741 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5744 if (NewCost > OldCost)
5756 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
5758 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
5764 replaceValue(*Phi, *NewShuf1);
5770bool VectorCombine::run() {
5784 auto Opcode =
I.getOpcode();
5792 if (IsFixedVectorType) {
5794 case Instruction::InsertElement:
5795 if (vectorizeLoadInsert(
I))
5798 case Instruction::ShuffleVector:
5799 if (widenSubvectorLoad(
I))
5810 if (scalarizeOpOrCmp(
I))
5812 if (scalarizeLoad(
I))
5814 if (scalarizeExtExtract(
I))
5816 if (scalarizeVPIntrinsic(
I))
5818 if (foldInterleaveIntrinsics(
I))
5822 if (Opcode == Instruction::Store)
5823 if (foldSingleElementStore(
I))
5827 if (TryEarlyFoldsOnly)
5834 if (IsFixedVectorType) {
5836 case Instruction::InsertElement:
5837 if (foldInsExtFNeg(
I))
5839 if (foldInsExtBinop(
I))
5841 if (foldInsExtVectorToShuffle(
I))
5844 case Instruction::ShuffleVector:
5845 if (foldPermuteOfBinops(
I))
5847 if (foldShuffleOfBinops(
I))
5849 if (foldShuffleOfSelects(
I))
5851 if (foldShuffleOfCastops(
I))
5853 if (foldShuffleOfShuffles(
I))
5855 if (foldPermuteOfIntrinsic(
I))
5857 if (foldShufflesOfLengthChangingShuffles(
I))
5859 if (foldShuffleOfIntrinsics(
I))
5861 if (foldSelectShuffle(
I))
5863 if (foldShuffleToIdentity(
I))
5866 case Instruction::Load:
5867 if (shrinkLoadForShuffles(
I))
5870 case Instruction::BitCast:
5871 if (foldBitcastShuffle(
I))
5873 if (foldSelectsFromBitcast(
I))
5876 case Instruction::And:
5877 case Instruction::Or:
5878 case Instruction::Xor:
5879 if (foldBitOpOfCastops(
I))
5881 if (foldBitOpOfCastConstant(
I))
5884 case Instruction::PHI:
5885 if (shrinkPhiOfShuffles(
I))
5895 case Instruction::Call:
5896 if (foldShuffleFromReductions(
I))
5898 if (foldCastFromReductions(
I))
5901 case Instruction::ExtractElement:
5902 if (foldShuffleChainsToReduce(
I))
5905 case Instruction::ICmp:
5906 if (foldSignBitReductionCmp(
I))
5908 if (foldICmpEqZeroVectorReduce(
I))
5910 if (foldEquivalentReductionCmp(
I))
5913 case Instruction::FCmp:
5914 if (foldExtractExtract(
I))
5917 case Instruction::Or:
5918 if (foldConcatOfBoolMasks(
I))
5923 if (foldExtractExtract(
I))
5925 if (foldExtractedCmps(
I))
5927 if (foldBinopOfReductions(
I))
5936 bool MadeChange =
false;
5937 for (BasicBlock &BB :
F) {
5949 if (!
I->isDebugOrPseudoInst())
5950 MadeChange |= FoldInst(*
I);
5957 while (!Worklist.isEmpty()) {
5967 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, Use *From, FixedVectorType *Ty, const DenseSet< std::pair< Value *, Use * > > &IdentityLeafs, const DenseSet< std::pair< Value *, Use * > > &SplatLeafs, const DenseSet< std::pair< Value *, Use * > > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
std::pair< Value *, int > InstLane
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, const SimplifyQuery &SQ)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static InstLane lookThroughShuffles(Value *V, int Lane)
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
auto m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const