44#define DEBUG_TYPE "vector-combine"
50STATISTIC(NumVecLoad,
"Number of vector loads formed");
51STATISTIC(NumVecCmp,
"Number of vector compares formed");
52STATISTIC(NumVecBO,
"Number of vector binops formed");
53STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
54STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
55STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
56STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
57STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
61 cl::desc(
"Disable all vector combine transforms"));
65 cl::desc(
"Disable binop extract to shuffle transforms"));
69 cl::desc(
"Max number of instructions to scan for vector combining."));
71static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
79 bool TryEarlyFoldsOnly)
82 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 const TargetTransformInfo &TTI;
90 const DominatorTree &DT;
95 const SimplifyQuery SQ;
99 bool TryEarlyFoldsOnly;
101 InstructionWorklist Worklist;
110 bool vectorizeLoadInsert(Instruction &
I);
111 bool widenSubvectorLoad(Instruction &
I);
112 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
113 ExtractElementInst *Ext1,
114 unsigned PreferredExtractIndex)
const;
115 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
116 const Instruction &
I,
117 ExtractElementInst *&ConvertToShuffle,
118 unsigned PreferredExtractIndex);
121 bool foldExtractExtract(Instruction &
I);
122 bool foldInsExtFNeg(Instruction &
I);
123 bool foldInsExtBinop(Instruction &
I);
124 bool foldInsExtVectorToShuffle(Instruction &
I);
125 bool foldBitOpOfCastops(Instruction &
I);
126 bool foldBitOpOfCastConstant(Instruction &
I);
127 bool foldBitcastShuffle(Instruction &
I);
128 bool scalarizeOpOrCmp(Instruction &
I);
129 bool scalarizeVPIntrinsic(Instruction &
I);
130 bool foldExtractedCmps(Instruction &
I);
131 bool foldSelectsFromBitcast(Instruction &
I);
132 bool foldBinopOfReductions(Instruction &
I);
133 bool foldSingleElementStore(Instruction &
I);
134 bool scalarizeLoad(Instruction &
I);
135 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
136 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeExtExtract(Instruction &
I);
138 bool foldConcatOfBoolMasks(Instruction &
I);
139 bool foldPermuteOfBinops(Instruction &
I);
140 bool foldShuffleOfBinops(Instruction &
I);
141 bool foldShuffleOfSelects(Instruction &
I);
142 bool foldShuffleOfCastops(Instruction &
I);
143 bool foldShuffleOfShuffles(Instruction &
I);
144 bool foldPermuteOfIntrinsic(Instruction &
I);
145 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
146 bool foldShuffleOfIntrinsics(Instruction &
I);
147 bool foldShuffleToIdentity(Instruction &
I);
148 bool foldShuffleFromReductions(Instruction &
I);
149 bool foldShuffleChainsToReduce(Instruction &
I);
150 bool foldCastFromReductions(Instruction &
I);
151 bool foldSignBitReductionCmp(Instruction &
I);
152 bool foldICmpEqZeroVectorReduce(Instruction &
I);
153 bool foldEquivalentReductionCmp(Instruction &
I);
154 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
155 bool foldInterleaveIntrinsics(Instruction &
I);
156 bool shrinkType(Instruction &
I);
157 bool shrinkLoadForShuffles(Instruction &
I);
158 bool shrinkPhiOfShuffles(Instruction &
I);
160 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
166 Worklist.pushUsersToWorkList(*NewI);
167 Worklist.pushValue(NewI);
184 SmallPtrSet<Value *, 4> Visited;
189 OpI,
nullptr,
nullptr, [&](
Value *V) {
194 NextInst = NextInst->getNextNode();
199 Worklist.pushUsersToWorkList(*OpI);
200 Worklist.pushValue(OpI);
220 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
221 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
227 Type *ScalarTy = Load->getType()->getScalarType();
229 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
230 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
237bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
263 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
266 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
267 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
268 unsigned OffsetEltIndex = 0;
276 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
277 APInt
Offset(OffsetBitWidth, 0);
287 uint64_t ScalarSizeInBytes = ScalarSize / 8;
288 if (
Offset.urem(ScalarSizeInBytes) != 0)
292 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
293 if (OffsetEltIndex >= MinVecNumElts)
310 unsigned AS =
Load->getPointerAddressSpace();
329 unsigned OutputNumElts = Ty->getNumElements();
331 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
332 Mask[0] = OffsetEltIndex;
339 if (OldCost < NewCost || !NewCost.
isValid())
350 replaceValue(
I, *VecLd);
358bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
361 if (!Shuf->isIdentityWithPadding())
367 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
368 return M >= (int)(NumOpElts);
379 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
387 unsigned AS =
Load->getPointerAddressSpace();
402 if (OldCost < NewCost || !NewCost.
isValid())
409 replaceValue(
I, *VecLd);
416ExtractElementInst *VectorCombine::getShuffleExtract(
417 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
421 assert(Index0C && Index1C &&
"Expected constant extract indexes");
423 unsigned Index0 = Index0C->getZExtValue();
424 unsigned Index1 = Index1C->getZExtValue();
427 if (Index0 == Index1)
451 if (PreferredExtractIndex == Index0)
453 if (PreferredExtractIndex == Index1)
457 return Index0 > Index1 ? Ext0 : Ext1;
465bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
466 ExtractElementInst *Ext1,
467 const Instruction &
I,
468 ExtractElementInst *&ConvertToShuffle,
469 unsigned PreferredExtractIndex) {
472 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
474 unsigned Opcode =
I.getOpcode();
487 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
488 "Expected a compare");
498 unsigned Ext0Index = Ext0IndexC->getZExtValue();
499 unsigned Ext1Index = Ext1IndexC->getZExtValue();
513 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
514 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
515 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
520 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
525 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
527 OldCost = CheapExtractCost + ScalarOpCost;
528 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
532 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
533 NewCost = VectorOpCost + CheapExtractCost +
538 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
539 if (ConvertToShuffle) {
551 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
553 ShuffleMask[BestInsIndex] = BestExtIndex;
555 VecTy, VecTy, ShuffleMask,
CostKind, 0,
556 nullptr, {ConvertToShuffle});
559 VecTy, VecTy, {},
CostKind, 0,
nullptr,
567 return OldCost < NewCost;
579 ShufMask[NewIndex] = OldIndex;
580 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
632 V1,
"foldExtExtBinop");
637 VecBOInst->copyIRFlags(&
I);
643bool VectorCombine::foldExtractExtract(Instruction &
I) {
674 ExtractElementInst *ExtractToChange;
675 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
681 if (ExtractToChange) {
682 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
687 if (ExtractToChange == Ext0)
696 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
697 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
700 replaceValue(
I, *NewExt);
706bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
709 uint64_t ExtIdx, InsIdx;
724 auto *DstVecScalarTy = DstVecTy->getScalarType();
726 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
731 unsigned NumDstElts = DstVecTy->getNumElements();
732 unsigned NumSrcElts = SrcVecTy->getNumElements();
733 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
739 SmallVector<int>
Mask(NumDstElts);
740 std::iota(
Mask.begin(),
Mask.end(), 0);
741 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
757 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
760 SmallVector<int> SrcMask;
763 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
765 DstVecTy, SrcVecTy, SrcMask,
CostKind);
769 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
771 if (NewCost > OldCost)
774 Value *NewShuf, *LenChgShuf =
nullptr;
788 replaceValue(
I, *NewShuf);
794bool VectorCombine::foldInsExtBinop(Instruction &
I) {
795 BinaryOperator *VecBinOp, *SclBinOp;
827 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
829 if (NewCost > OldCost)
840 NewInst->copyIRFlags(VecBinOp);
841 NewInst->andIRFlags(SclBinOp);
846 replaceValue(
I, *NewBO);
852bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
855 if (!BinOp || !BinOp->isBitwiseLogicOp())
861 if (!LHSCast || !RHSCast) {
862 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
868 if (CastOpcode != RHSCast->getOpcode())
872 switch (CastOpcode) {
873 case Instruction::BitCast:
874 case Instruction::Trunc:
875 case Instruction::SExt:
876 case Instruction::ZExt:
882 Value *LHSSrc = LHSCast->getOperand(0);
883 Value *RHSSrc = RHSCast->getOperand(0);
889 auto *SrcTy = LHSSrc->
getType();
890 auto *DstTy =
I.getType();
893 if (CastOpcode != Instruction::BitCast &&
898 if (!SrcTy->getScalarType()->isIntegerTy() ||
899 !DstTy->getScalarType()->isIntegerTy())
914 LHSCastCost + RHSCastCost;
925 if (!LHSCast->hasOneUse())
926 NewCost += LHSCastCost;
927 if (!RHSCast->hasOneUse())
928 NewCost += RHSCastCost;
931 <<
" NewCost=" << NewCost <<
"\n");
933 if (NewCost > OldCost)
938 BinOp->getName() +
".inner");
940 NewBinOp->copyIRFlags(BinOp);
954 replaceValue(
I, *Result);
963bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
979 switch (CastOpcode) {
980 case Instruction::BitCast:
981 case Instruction::ZExt:
982 case Instruction::SExt:
983 case Instruction::Trunc:
989 Value *LHSSrc = LHSCast->getOperand(0);
991 auto *SrcTy = LHSSrc->
getType();
992 auto *DstTy =
I.getType();
995 if (CastOpcode != Instruction::BitCast &&
1000 if (!SrcTy->getScalarType()->isIntegerTy() ||
1001 !DstTy->getScalarType()->isIntegerTy())
1005 PreservedCastFlags RHSFlags;
1030 if (!LHSCast->hasOneUse())
1031 NewCost += LHSCastCost;
1033 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1034 <<
" NewCost=" << NewCost <<
"\n");
1036 if (NewCost > OldCost)
1041 LHSSrc, InvC,
I.getName() +
".inner");
1043 NewBinOp->copyIRFlags(&
I);
1063 replaceValue(
I, *Result);
1070bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1084 if (!DestTy || !SrcTy)
1087 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1088 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1089 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1099 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1100 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1104 SmallVector<int, 16> NewMask;
1105 if (DestEltSize <= SrcEltSize) {
1108 if (SrcEltSize % DestEltSize != 0)
1110 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1115 if (DestEltSize % SrcEltSize != 0)
1117 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1124 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1125 auto *NewShuffleTy =
1127 auto *OldShuffleTy =
1129 unsigned NumOps = IsUnary ? 1 : 2;
1139 TargetTransformInfo::CastContextHint::None,
1144 TargetTransformInfo::CastContextHint::None,
1147 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1148 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1150 if (NewCost > OldCost || !NewCost.
isValid())
1158 replaceValue(
I, *Shuf);
1165bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1179 if (!ScalarOp0 || !ScalarOp1)
1187 auto IsAllTrueMask = [](
Value *MaskVal) {
1190 return ConstValue->isAllOnesValue();
1204 SmallVector<int>
Mask;
1206 Mask.resize(FVTy->getNumElements(), 0);
1215 Args.push_back(
V->getType());
1216 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1221 std::optional<unsigned> FunctionalOpcode =
1223 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1224 if (!FunctionalOpcode) {
1233 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1243 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1245 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1248 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1251 if (OldCost < NewCost || !NewCost.
isValid())
1262 bool SafeToSpeculate;
1268 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1269 if (!SafeToSpeculate &&
1276 {ScalarOp0, ScalarOp1})
1278 ScalarOp0, ScalarOp1);
1287bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1292 if (!UO && !BO && !CI && !
II)
1300 if (Arg->getType() !=
II->getType() &&
1310 for (User *U :
I.users())
1317 std::optional<uint64_t>
Index;
1319 auto Ops =
II ?
II->args() :
I.operands();
1323 uint64_t InsIdx = 0;
1328 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1334 else if (InsIdx != *Index)
1351 if (!
Index.has_value())
1355 Type *ScalarTy = VecTy->getScalarType();
1356 assert(VecTy->isVectorTy() &&
1359 "Unexpected types for insert element into binop or cmp");
1361 unsigned Opcode =
I.getOpcode();
1369 }
else if (UO || BO) {
1373 IntrinsicCostAttributes ScalarICA(
1374 II->getIntrinsicID(), ScalarTy,
1377 IntrinsicCostAttributes VectorICA(
1378 II->getIntrinsicID(), VecTy,
1385 Value *NewVecC =
nullptr;
1387 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1390 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1392 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1406 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1408 II->getIntrinsicID(), Idx, &
TTI)))
1411 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1412 OldCost += InsertCost;
1413 NewCost += !
Op->hasOneUse() * InsertCost;
1417 if (OldCost < NewCost || !NewCost.
isValid())
1427 ++NumScalarIntrinsic;
1437 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1443 Scalar->setName(
I.getName() +
".scalar");
1448 ScalarInst->copyIRFlags(&
I);
1451 replaceValue(
I, *Insert);
1458bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1463 if (!BI || !
I.getType()->isIntegerTy(1))
1468 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1471 CmpPredicate
P0,
P1;
1483 uint64_t Index0, Index1;
1490 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1493 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1494 "Unknown ExtractElementInst");
1499 unsigned CmpOpcode =
1514 Ext0Cost + Ext1Cost + CmpCost * 2 +
1520 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1521 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1526 ShufMask[CheapIndex] = ExpensiveIndex;
1531 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1532 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1537 if (OldCost < NewCost || !NewCost.
isValid())
1547 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1548 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1551 replaceValue(
I, *NewExt);
1578bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1585 if (!SrcVecTy || !DstVecTy)
1595 if (SrcEltBits != 32 && SrcEltBits != 64)
1598 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1615 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1618 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1621 if (!BC->hasNUsesOrMore(MinSelects))
1626 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1628 for (User *U : BC->users()) {
1633 for (User *ExtUser : Ext->users()) {
1637 Cond->getType()->isIntegerTy(1))
1642 if (CondToSelects.
empty())
1645 bool MadeChange =
false;
1646 Value *SrcVec = BC->getOperand(0);
1649 for (
auto [
Cond, Selects] : CondToSelects) {
1651 if (Selects.size() < MinSelects) {
1652 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1653 <<
"profitable (VecCost=" << VecSelCost
1654 <<
", ScalarCost=" << ScalarSelCost
1655 <<
", NumSelects=" << Selects.size() <<
")\n");
1660 auto InsertPt = std::next(BC->getIterator());
1664 InsertPt = std::next(CondInst->getIterator());
1672 for (SelectInst *Sel : Selects) {
1674 Value *Idx = Ext->getIndexOperand();
1678 replaceValue(*Sel, *NewExt);
1683 <<
" selects into vector select\n");
1697 unsigned ReductionOpc =
1703 CostBeforeReduction =
1704 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1706 CostAfterReduction =
1707 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1711 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1717 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1724 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1727 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1729 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1732 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1733 CostAfterReduction =
TTI.getMulAccReductionCost(
1734 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1737 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1741bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1744 if (BinOpOpc == Instruction::Sub)
1745 ReductionIID = Intrinsic::vector_reduce_add;
1749 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1754 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1755 return II->getArgOperand(0);
1759 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1762 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1771 unsigned ReductionOpc =
1784 CostOfRedOperand0 + CostOfRedOperand1 +
1787 if (NewCost >= OldCost || !NewCost.
isValid())
1791 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1794 if (BinOpOpc == Instruction::Or)
1795 VectorBO = Builder.
CreateOr(V0, V1,
"",
1801 replaceValue(
I, *Rdx);
1809 unsigned NumScanned = 0;
1810 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1819class ScalarizationResult {
1820 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1825 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1826 : Status(Status), ToFreeze(ToFreeze) {}
1829 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1830 ~ScalarizationResult() {
1831 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1834 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1835 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1836 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1837 return {StatusTy::SafeWithFreeze, ToFreeze};
1841 bool isSafe()
const {
return Status == StatusTy::Safe; }
1843 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1846 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1851 Status = StatusTy::Unsafe;
1855 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1856 assert(isSafeWithFreeze() &&
1857 "should only be used when freezing is required");
1859 "UserI must be a user of ToFreeze");
1860 IRBuilder<>::InsertPointGuard Guard(Builder);
1865 if (
U.get() == ToFreeze)
1882 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1886 if (
C->getValue().ult(NumElements))
1887 return ScalarizationResult::safe();
1888 return ScalarizationResult::unsafe();
1893 return ScalarizationResult::unsafe();
1895 APInt Zero(IntWidth, 0);
1896 APInt MaxElts(IntWidth, NumElements);
1902 true, &AC, CtxI, &DT)))
1903 return ScalarizationResult::safe();
1904 return ScalarizationResult::unsafe();
1917 if (ValidIndices.
contains(IdxRange))
1918 return ScalarizationResult::safeWithFreeze(IdxBase);
1919 return ScalarizationResult::unsafe();
1931 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1943bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1955 if (!
match(
SI->getValueOperand(),
1962 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1965 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1966 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1967 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1971 if (ScalarizableIdx.isUnsafe() ||
1978 Worklist.
push(Load);
1980 if (ScalarizableIdx.isSafeWithFreeze())
1983 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1984 {ConstantInt::get(Idx->getType(), 0), Idx});
1988 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1991 replaceValue(
I, *NSI);
2001bool VectorCombine::scalarizeLoad(Instruction &
I) {
2008 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2011 bool AllExtracts =
true;
2012 bool AllBitcasts =
true;
2014 unsigned NumInstChecked = 0;
2019 for (User *U : LI->users()) {
2021 if (!UI || UI->getParent() != LI->getParent())
2026 if (UI->use_empty())
2030 AllExtracts =
false;
2032 AllBitcasts =
false;
2036 for (Instruction &
I :
2037 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2044 LastCheckedInst = UI;
2049 return scalarizeLoadExtract(LI, VecTy, Ptr);
2051 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2056bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2061 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2064 for (
auto &Pair : NeedFreeze)
2065 Pair.second.discard();
2073 for (User *U : LI->
users()) {
2078 if (ScalarIdx.isUnsafe())
2080 if (ScalarIdx.isSafeWithFreeze()) {
2081 NeedFreeze.try_emplace(UI, ScalarIdx);
2082 ScalarIdx.discard();
2088 Index ?
Index->getZExtValue() : -1);
2096 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2097 <<
"\n LoadExtractCost: " << OriginalCost
2098 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2100 if (ScalarizedCost >= OriginalCost)
2107 Type *ElemType = VecTy->getElementType();
2110 for (User *U : LI->
users()) {
2112 Value *Idx = EI->getIndexOperand();
2115 auto It = NeedFreeze.find(EI);
2116 if (It != NeedFreeze.end())
2123 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2125 Align ScalarOpAlignment =
2127 NewLoad->setAlignment(ScalarOpAlignment);
2130 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2135 replaceValue(*EI, *NewLoad,
false);
2138 FailureGuard.release();
2143bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2149 Type *TargetScalarType =
nullptr;
2150 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2152 for (User *U : LI->
users()) {
2155 Type *DestTy = BC->getDestTy();
2159 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2160 if (DestBitWidth != VecBitWidth)
2164 if (!TargetScalarType)
2165 TargetScalarType = DestTy;
2166 else if (TargetScalarType != DestTy)
2174 if (!TargetScalarType)
2182 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2183 <<
"\n OriginalCost: " << OriginalCost
2184 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2186 if (ScalarizedCost >= OriginalCost)
2197 ScalarLoad->copyMetadata(*LI);
2200 for (User *U : LI->
users()) {
2202 replaceValue(*BC, *ScalarLoad,
false);
2208bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2223 Type *ScalarDstTy = DstTy->getElementType();
2224 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2230 unsigned ExtCnt = 0;
2231 bool ExtLane0 =
false;
2232 for (User *U : Ext->users()) {
2246 Instruction::And, ScalarDstTy,
CostKind,
2249 (ExtCnt - ExtLane0) *
2251 Instruction::LShr, ScalarDstTy,
CostKind,
2254 if (ScalarCost > VectorCost)
2257 Value *ScalarV = Ext->getOperand(0);
2264 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2265 bool AllExtractsTriggerUB =
true;
2266 ExtractElementInst *LastExtract =
nullptr;
2268 for (User *U : Ext->users()) {
2271 AllExtractsTriggerUB =
false;
2275 if (!LastExtract || LastExtract->
comesBefore(Extract))
2276 LastExtract = Extract;
2278 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2279 !AllExtractsTriggerUB ||
2287 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2288 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2291 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2292 for (User *U : Ext->users()) {
2298 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2299 : (Idx * SrcEltSizeInBits);
2302 U->replaceAllUsesWith(
And);
2310bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2311 Type *Ty =
I.getType();
2316 if (
DL->isBigEndian())
2327 uint64_t ShAmtX = 0;
2335 uint64_t ShAmtY = 0;
2343 if (ShAmtX > ShAmtY) {
2351 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2352 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2357 MaskTy->getNumElements() != ShAmtDiff ||
2358 MaskTy->getNumElements() > (
BitWidth / 2))
2363 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2364 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2367 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2384 if (Ty != ConcatIntTy)
2390 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2391 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2394 if (NewCost > OldCost)
2404 if (Ty != ConcatIntTy) {
2414 replaceValue(
I, *Result);
2420bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2421 BinaryOperator *BinOp;
2422 ArrayRef<int> OuterMask;
2430 Value *Op00, *Op01, *Op10, *Op11;
2431 ArrayRef<int> Mask0, Mask1;
2436 if (!Match0 && !Match1)
2449 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2452 unsigned NumSrcElts = BinOpTy->getNumElements();
2457 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2461 SmallVector<int> NewMask0, NewMask1;
2462 for (
int M : OuterMask) {
2463 if (M < 0 || M >= (
int)NumSrcElts) {
2467 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2468 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2472 unsigned NumOpElts = Op0Ty->getNumElements();
2473 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2474 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2476 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2477 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2486 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2487 0,
nullptr, {BinOp}, &
I);
2489 NewCost += BinOpCost;
2495 OldCost += Shuf0Cost;
2497 NewCost += Shuf0Cost;
2503 OldCost += Shuf1Cost;
2505 NewCost += Shuf1Cost;
2513 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2517 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2519 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2520 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2524 if (NewCost > OldCost)
2535 NewInst->copyIRFlags(BinOp);
2539 replaceValue(
I, *NewBO);
2545bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2546 ArrayRef<int> OldMask;
2553 if (
LHS->getOpcode() !=
RHS->getOpcode())
2557 bool IsCommutative =
false;
2566 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2577 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2580 bool SameBinOp =
LHS ==
RHS;
2581 unsigned NumSrcElts = BinOpTy->getNumElements();
2584 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2587 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2588 if (M >= (
int)NumSrcElts)
2592 SmallVector<int> NewMask0(OldMask);
2601 SmallVector<int> NewMask1(OldMask);
2620 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2630 ArrayRef<int> InnerMask;
2632 m_Mask(InnerMask)))) &&
2635 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2647 bool ReducedInstCount =
false;
2648 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2649 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2650 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2651 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2652 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2657 auto *ShuffleCmpTy =
2660 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2661 if (!SingleSrcBinOp)
2671 PredLHS,
CostKind, Op0Info, Op1Info);
2681 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2688 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2697 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2701 NewInst->copyIRFlags(
LHS);
2702 NewInst->andIRFlags(
RHS);
2707 replaceValue(
I, *NewBO);
2714bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2716 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2727 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2733 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2734 ((SI0FOp !=
nullptr) &&
2735 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2741 auto SelOp = Instruction::Select;
2749 CostSel1 + CostSel2 +
2751 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2755 Mask,
CostKind, 0,
nullptr, {C1, C2});
2765 if (!Sel1->hasOneUse())
2766 NewCost += CostSel1;
2767 if (!Sel2->hasOneUse())
2768 NewCost += CostSel2;
2771 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2773 if (NewCost > OldCost)
2782 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2783 SI0FOp->getFastMathFlags());
2785 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2790 replaceValue(
I, *NewSel);
2796bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2798 ArrayRef<int> OldMask;
2807 if (!C0 || (IsBinaryShuffle && !C1))
2814 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2817 if (IsBinaryShuffle) {
2818 if (C0->getSrcTy() != C1->getSrcTy())
2821 if (Opcode != C1->getOpcode()) {
2823 Opcode = Instruction::SExt;
2832 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2835 unsigned NumSrcElts = CastSrcTy->getNumElements();
2836 unsigned NumDstElts = CastDstTy->getNumElements();
2837 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2838 "Only bitcasts expected to alter src/dst element counts");
2842 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2843 (NumDstElts % NumSrcElts) != 0)
2846 SmallVector<int, 16> NewMask;
2847 if (NumSrcElts >= NumDstElts) {
2850 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2851 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2856 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2857 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2862 auto *NewShuffleDstTy =
2871 if (IsBinaryShuffle)
2886 if (IsBinaryShuffle) {
2896 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2898 if (NewCost > OldCost)
2902 if (IsBinaryShuffle)
2912 NewInst->copyIRFlags(C0);
2913 if (IsBinaryShuffle)
2914 NewInst->andIRFlags(C1);
2918 replaceValue(
I, *Cast);
2928bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2929 ArrayRef<int> OuterMask;
2930 Value *OuterV0, *OuterV1;
2935 ArrayRef<int> InnerMask0, InnerMask1;
2936 Value *X0, *X1, *Y0, *Y1;
2941 if (!Match0 && !Match1)
2946 SmallVector<int, 16> PoisonMask1;
2951 InnerMask1 = PoisonMask1;
2955 X0 = Match0 ? X0 : OuterV0;
2956 Y0 = Match0 ? Y0 : OuterV0;
2957 X1 = Match1 ? X1 : OuterV1;
2958 Y1 = Match1 ? Y1 : OuterV1;
2962 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2966 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2967 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2972 SmallVector<int, 16> NewMask(OuterMask);
2973 Value *NewX =
nullptr, *NewY =
nullptr;
2974 for (
int &M : NewMask) {
2975 Value *Src =
nullptr;
2976 if (0 <= M && M < (
int)NumImmElts) {
2980 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2981 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2983 }
else if (M >= (
int)NumImmElts) {
2988 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2989 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2993 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3002 if (!NewX || NewX == Src) {
3006 if (!NewY || NewY == Src) {
3022 replaceValue(
I, *NewX);
3039 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3045 nullptr, {NewX, NewY});
3047 NewCost += InnerCost0;
3049 NewCost += InnerCost1;
3052 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3054 if (NewCost > OldCost)
3058 replaceValue(
I, *Shuf);
3074bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3079 unsigned ChainLength = 0;
3080 SmallVector<int>
Mask;
3081 SmallVector<int> YMask;
3091 ArrayRef<int> OuterMask;
3092 Value *OuterV0, *OuterV1;
3093 if (ChainLength != 0 && !Trunk->
hasOneUse())
3096 m_Mask(OuterMask))))
3098 if (OuterV0->
getType() != TrunkType) {
3104 ArrayRef<int> InnerMask0, InnerMask1;
3105 Value *A0, *A1, *B0, *B1;
3110 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3111 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3112 if (Match0Leaf == Match1Leaf) {
3118 SmallVector<int> CommutedOuterMask;
3125 for (
int &M : CommutedOuterMask) {
3128 if (M < (
int)NumTrunkElts)
3133 OuterMask = CommutedOuterMask;
3152 int NumLeafElts = YType->getNumElements();
3153 SmallVector<int> LocalYMask(InnerMask1);
3154 for (
int &M : LocalYMask) {
3155 if (M >= NumLeafElts)
3165 Mask.assign(OuterMask);
3166 YMask.
assign(LocalYMask);
3167 OldCost = NewCost = LocalOldCost;
3174 SmallVector<int> NewYMask(YMask);
3176 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3177 if (LeafM == -1 || CombinedM == LeafM)
3179 if (CombinedM == -1) {
3189 SmallVector<int> NewMask;
3190 NewMask.
reserve(NumTrunkElts);
3191 for (
int M : Mask) {
3192 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3207 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3211 if (ChainLength == 1) {
3212 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3215 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3216 <<
" old cost: " << (OldCost + LocalOldCost)
3217 <<
" new cost: " << LocalNewCost <<
'\n';
3222 OldCost += LocalOldCost;
3223 NewCost = LocalNewCost;
3227 if (ChainLength <= 1)
3231 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3234 for (
int &M : Mask) {
3235 if (M >=
static_cast<int>(NumTrunkElts))
3236 M = YMask[
M - NumTrunkElts];
3240 replaceValue(
I, *Root);
3247 replaceValue(
I, *Root);
3253bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3255 ArrayRef<int> OldMask;
3265 if (IID != II1->getIntrinsicID())
3274 if (!ShuffleDstTy || !II0Ty)
3280 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3282 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3288 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3292 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3293 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3295 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3299 ShuffleDstTy->getNumElements());
3301 std::pair<Value *, Value *> OperandPair =
3302 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3303 if (!SeenOperandPairs.
insert(OperandPair).second) {
3309 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3312 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3315 if (!II0->hasOneUse())
3317 if (II1 != II0 && !II1->hasOneUse())
3321 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3324 if (NewCost > OldCost)
3328 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3329 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3333 std::pair<Value *, Value *> OperandPair =
3334 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3335 auto It = ShuffleCache.
find(OperandPair);
3336 if (It != ShuffleCache.
end()) {
3342 II1->getArgOperand(
I), OldMask);
3343 ShuffleCache[OperandPair] = Shuf;
3351 NewInst->copyIRFlags(II0);
3352 NewInst->andIRFlags(II1);
3355 replaceValue(
I, *NewIntrinsic);
3361bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3373 if (!ShuffleDstTy || !IntrinsicSrcTy)
3377 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3378 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3391 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3395 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3397 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3401 ShuffleDstTy->getNumElements());
3404 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3405 {II0->getArgOperand(
I)});
3408 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3413 if (!II0->hasOneUse())
3416 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3417 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3419 if (NewCost > OldCost)
3424 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3439 replaceValue(
I, *NewIntrinsic);
3449 int M = SV->getMaskValue(Lane);
3452 if (
static_cast<unsigned>(M) < NumElts) {
3453 V = SV->getOperand(0);
3456 V = SV->getOperand(1);
3467 auto [U, Lane] = IL;
3480 unsigned NumElts = Ty->getNumElements();
3481 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3487 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3493 unsigned NumSlices = Item.
size() / NumElts;
3498 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3499 Value *SliceV = Item[Slice * NumElts].first;
3500 if (!SliceV || SliceV->
getType() != Ty)
3502 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3503 auto [V, Lane] = Item[Slice * NumElts + Elt];
3504 if (Lane !=
static_cast<int>(Elt) || SliceV != V)
3513 const DenseSet<std::pair<Value *, Use *>> &IdentityLeafs,
3514 const DenseSet<std::pair<Value *, Use *>> &SplatLeafs,
3515 const DenseSet<std::pair<Value *, Use *>> &ConcatLeafs,
3517 auto [FrontV, FrontLane] = Item.
front();
3519 if (IdentityLeafs.contains(std::make_pair(FrontV, From))) {
3522 if (SplatLeafs.contains(std::make_pair(FrontV, From))) {
3524 return Builder.CreateShuffleVector(FrontV, Mask);
3526 if (ConcatLeafs.contains(std::make_pair(FrontV, From))) {
3530 for (
unsigned S = 0; S < Values.
size(); ++S)
3531 Values[S] = Item[S * NumElts].first;
3533 while (Values.
size() > 1) {
3536 std::iota(Mask.begin(), Mask.end(), 0);
3538 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3540 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3548 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3550 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3553 Ops[Idx] =
II->getOperand(Idx);
3557 &
I->getOperandUse(Idx), Ty, IdentityLeafs,
3558 SplatLeafs, ConcatLeafs, Builder,
TTI);
3562 for (
const auto &Lane : Item)
3575 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3585 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3590 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3604bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3606 if (!Ty ||
I.use_empty())
3610 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3614 Worklist.
push_back(std::make_pair(Start, &*
I.use_begin()));
3615 DenseSet<std::pair<Value *, Use *>> IdentityLeafs, SplatLeafs, ConcatLeafs;
3616 unsigned NumVisited = 0;
3618 while (!Worklist.
empty()) {
3623 auto Item = ItemFrom.first;
3624 auto From = ItemFrom.second;
3625 auto [FrontV, FrontLane] = Item.front();
3633 return X->getType() ==
Y->getType() &&
3638 if (FrontLane == 0 &&
3640 Ty->getNumElements() &&
3642 Value *FrontV = Item.front().first;
3643 return !
E.value().first || (IsEquiv(
E.value().first, FrontV) &&
3644 E.value().second == (int)
E.index());
3646 IdentityLeafs.
insert(std::make_pair(FrontV, From));
3651 C &&
C->getSplatValue() &&
3653 Value *FrontV = Item.front().first;
3659 SplatLeafs.
insert(std::make_pair(FrontV, From));
3664 auto [FrontV, FrontLane] = Item.front();
3665 auto [
V, Lane] = IL;
3666 return !
V || (
V == FrontV && Lane == FrontLane);
3668 SplatLeafs.
insert(std::make_pair(FrontV, From));
3674 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3675 Value *FrontV = Item.front().first;
3684 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3687 if (CI->getSrcTy()->getScalarType() !=
3692 SI->getOperand(0)->getType() !=
3699 II->getIntrinsicID() ==
3701 !
II->hasOperandBundles());
3708 BO && BO->isIntDivRem())
3715 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3716 FPToUIInst, SIToFPInst, UIToFPInst>(FrontV)) {
3724 if (DstTy && SrcTy &&
3725 SrcTy->getNumElements() == DstTy->getNumElements()) {
3727 &BitCast->getOperandUse(0));
3732 &Sel->getOperandUse(0));
3734 &Sel->getOperandUse(1));
3736 &Sel->getOperandUse(2));
3740 !
II->hasOperandBundles()) {
3741 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3745 Value *FrontV = Item.front().first;
3761 ConcatLeafs.
insert(std::make_pair(FrontV, From));
3768 if (NumVisited <= 1)
3771 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3777 SplatLeafs, ConcatLeafs, Builder, &
TTI);
3778 replaceValue(
I, *V);
3785bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3789 switch (
II->getIntrinsicID()) {
3790 case Intrinsic::vector_reduce_add:
3791 case Intrinsic::vector_reduce_mul:
3792 case Intrinsic::vector_reduce_and:
3793 case Intrinsic::vector_reduce_or:
3794 case Intrinsic::vector_reduce_xor:
3795 case Intrinsic::vector_reduce_smin:
3796 case Intrinsic::vector_reduce_smax:
3797 case Intrinsic::vector_reduce_umin:
3798 case Intrinsic::vector_reduce_umax:
3807 std::queue<Value *> Worklist;
3808 SmallPtrSet<Value *, 4> Visited;
3809 ShuffleVectorInst *Shuffle =
nullptr;
3813 while (!Worklist.empty()) {
3814 Value *CV = Worklist.front();
3826 if (CI->isBinaryOp()) {
3827 for (
auto *
Op : CI->operand_values())
3831 if (Shuffle && Shuffle != SV)
3848 for (
auto *V : Visited)
3849 for (
auto *U :
V->users())
3850 if (!Visited.contains(U) && U != &
I)
3853 FixedVectorType *VecType =
3857 FixedVectorType *ShuffleInputType =
3859 if (!ShuffleInputType)
3865 SmallVector<int> ConcatMask;
3867 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3868 bool UsesSecondVec =
3869 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3876 ShuffleInputType, ConcatMask,
CostKind);
3878 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3880 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3882 bool MadeChanges =
false;
3883 if (NewCost < OldCost) {
3887 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3888 replaceValue(*Shuffle, *NewShuffle);
3894 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3940bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3942 std::queue<Value *> InstWorklist;
3946 std::optional<unsigned int> CommonCallOp = std::nullopt;
3947 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3949 bool IsFirstCallOrBinInst =
true;
3950 bool ShouldBeCallOrBinInst =
true;
3956 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3966 int64_t
VecSize = FVT->getNumElements();
3972 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3973 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3983 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3984 Cur = (Cur + 1) / 2, --
Mask) {
3986 ExpectedParityMask |= (1ll <<
Mask);
3989 InstWorklist.push(VecOpEE);
3991 while (!InstWorklist.empty()) {
3992 Value *CI = InstWorklist.front();
3996 if (!ShouldBeCallOrBinInst)
3999 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4004 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4006 IsFirstCallOrBinInst =
false;
4009 CommonCallOp =
II->getIntrinsicID();
4010 if (
II->getIntrinsicID() != *CommonCallOp)
4013 switch (
II->getIntrinsicID()) {
4014 case Intrinsic::umin:
4015 case Intrinsic::umax:
4016 case Intrinsic::smin:
4017 case Intrinsic::smax: {
4018 auto *Op0 =
II->getOperand(0);
4019 auto *Op1 =
II->getOperand(1);
4027 ShouldBeCallOrBinInst ^= 1;
4029 IntrinsicCostAttributes ICA(
4030 *CommonCallOp,
II->getType(),
4031 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4038 InstWorklist.push(PrevVecV[1]);
4039 InstWorklist.push(PrevVecV[0]);
4043 if (!ShouldBeCallOrBinInst)
4046 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4049 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4051 IsFirstCallOrBinInst =
false;
4059 switch (*CommonBinOp) {
4060 case BinaryOperator::Add:
4061 case BinaryOperator::Mul:
4062 case BinaryOperator::Or:
4063 case BinaryOperator::And:
4064 case BinaryOperator::Xor: {
4074 ShouldBeCallOrBinInst ^= 1;
4081 InstWorklist.push(PrevVecV[1]);
4082 InstWorklist.push(PrevVecV[0]);
4086 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4089 if (SVInst != PrevVecV[1])
4092 ArrayRef<int> CurMask;
4098 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4099 if (Mask < ShuffleMaskHalf &&
4100 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4102 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4107 ShuffleMaskHalf *= 2;
4108 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4109 ExpectedParityMask >>= 1;
4112 SVInst->getType(), SVInst->getType(),
4116 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4119 ShouldBeCallOrBinInst ^= 1;
4126 if (ShouldBeCallOrBinInst)
4129 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4131 Value *FinalVecV = PrevVecV[0];
4143 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
4146 if (NewCost >= OrigCost)
4149 auto *ReducedResult =
4151 replaceValue(
I, *ReducedResult);
4160bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4165 bool TruncOnly =
false;
4168 case Intrinsic::vector_reduce_add:
4169 case Intrinsic::vector_reduce_mul:
4172 case Intrinsic::vector_reduce_and:
4173 case Intrinsic::vector_reduce_or:
4174 case Intrinsic::vector_reduce_xor:
4181 Value *ReductionSrc =
I.getOperand(0);
4193 Type *ResultTy =
I.getType();
4196 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4206 if (OldCost <= NewCost || !NewCost.
isValid())
4210 II->getIntrinsicID(), {Src});
4212 replaceValue(
I, *NewCast);
4240bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4242 IntrinsicInst *ReduceOp;
4243 const APInt *CmpVal;
4250 case Intrinsic::vector_reduce_or:
4251 case Intrinsic::vector_reduce_umax:
4252 case Intrinsic::vector_reduce_and:
4253 case Intrinsic::vector_reduce_umin:
4254 case Intrinsic::vector_reduce_add:
4265 unsigned BitWidth = VecTy->getScalarSizeInBits();
4269 unsigned NumElts = VecTy->getNumElements();
4278 case Intrinsic::vector_reduce_or:
4279 case Intrinsic::vector_reduce_umax:
4280 TreeOpcode = Instruction::Or;
4282 case Intrinsic::vector_reduce_and:
4283 case Intrinsic::vector_reduce_umin:
4284 TreeOpcode = Instruction::And;
4286 case Intrinsic::vector_reduce_add:
4287 TreeOpcode = Instruction::Add;
4295 SmallVector<Value *, 8> Worklist;
4296 SmallVector<Value *, 8> Sources;
4298 std::optional<bool> IsAShr;
4299 constexpr unsigned MaxSources = 8;
4304 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4305 Sources.
size() <= MaxSources) {
4314 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4316 IsAShr = ThisIsAShr;
4317 else if (*IsAShr != ThisIsAShr)
4343 if (Sources.
empty() || Sources.
size() > MaxSources ||
4344 Worklist.
size() > MaxSources || !IsAShr)
4347 unsigned NumSources = Sources.
size();
4351 if (OrigIID == Intrinsic::vector_reduce_add &&
4359 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4362 NegativeVal.negate();
4394 TestsNegative =
false;
4395 }
else if (*CmpVal == NegativeVal) {
4396 TestsNegative =
true;
4400 IsEq = Pred == ICmpInst::ICMP_EQ;
4401 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4403 TestsNegative = (RangeHigh == NegativeVal);
4404 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4406 TestsNegative = (RangeHigh == NegativeVal);
4407 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4409 TestsNegative = (RangeLow == NegativeVal);
4410 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4412 TestsNegative = (RangeLow == NegativeVal);
4455 enum CheckKind :
unsigned {
4462 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4464 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4466 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4470 case Intrinsic::vector_reduce_or:
4471 case Intrinsic::vector_reduce_umax:
4472 Base = TestsNegative ? AnyNeg : AllNonNeg;
4474 case Intrinsic::vector_reduce_and:
4475 case Intrinsic::vector_reduce_umin:
4476 Base = TestsNegative ? AllNeg : AnyNonNeg;
4478 case Intrinsic::vector_reduce_add:
4479 Base = TestsNegative ? AllNeg : AllNonNeg;
4494 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4495 : std::make_pair(MinMax, MinMaxCost);
4499 auto [NewIID, NewCost] = RequiresOr(
Check)
4500 ? PickCheaper(Intrinsic::vector_reduce_or,
4501 Intrinsic::vector_reduce_umax)
4502 : PickCheaper(
Intrinsic::vector_reduce_and,
4506 if (NumSources > 1) {
4507 unsigned CombineOpc =
4508 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4513 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4514 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4516 if (NewCost > OldCost)
4521 Type *ScalarTy = VecTy->getScalarType();
4524 if (NumSources == 1) {
4535 replaceValue(
I, *NewCmp);
4560bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4571 switch (
II->getIntrinsicID()) {
4572 case Intrinsic::vector_reduce_add:
4573 case Intrinsic::vector_reduce_or:
4574 case Intrinsic::vector_reduce_umin:
4575 case Intrinsic::vector_reduce_umax:
4576 case Intrinsic::vector_reduce_smin:
4577 case Intrinsic::vector_reduce_smax:
4583 Value *InnerOp =
II->getArgOperand(0);
4626 switch (
II->getIntrinsicID()) {
4627 case Intrinsic::vector_reduce_add: {
4632 unsigned NumElems = XTy->getNumElements();
4638 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4646 case Intrinsic::vector_reduce_smin:
4647 case Intrinsic::vector_reduce_smax:
4657 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4673 case Intrinsic::vector_reduce_add:
4674 case Intrinsic::vector_reduce_or:
4680 case Intrinsic::vector_reduce_umin:
4681 case Intrinsic::vector_reduce_umax:
4682 case Intrinsic::vector_reduce_smin:
4683 case Intrinsic::vector_reduce_smax:
4695 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4697 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4698 << *InnerOp <<
"\n OldCost: " << OldCost
4699 <<
" vs NewCost: " << NewCost <<
"\n");
4705 if (NewCost > OldCost)
4714 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4715 replaceValue(
I, *NewCmp);
4746bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4749 const APInt *CmpVal;
4754 if (!
II || !
II->hasOneUse())
4757 const auto IsValidOrUmaxCmp = [&]() {
4766 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4768 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4769 Pred == ICmpInst::ICMP_SLT;
4770 return IsEquality || IsPositive || IsNegative;
4773 const auto IsValidAndUminCmp = [&]() {
4778 const auto LeadingOnes = CmpVal->
countl_one();
4785 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4794 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4795 return IsEquality || IsNegative || IsPositive;
4803 switch (OriginalIID) {
4804 case Intrinsic::vector_reduce_or:
4805 if (!IsValidOrUmaxCmp())
4807 AlternativeIID = Intrinsic::vector_reduce_umax;
4809 case Intrinsic::vector_reduce_umax:
4810 if (!IsValidOrUmaxCmp())
4812 AlternativeIID = Intrinsic::vector_reduce_or;
4814 case Intrinsic::vector_reduce_and:
4815 if (!IsValidAndUminCmp())
4817 AlternativeIID = Intrinsic::vector_reduce_umin;
4819 case Intrinsic::vector_reduce_umin:
4820 if (!IsValidAndUminCmp())
4822 AlternativeIID = Intrinsic::vector_reduce_and;
4835 if (ReductionOpc != Instruction::ICmp)
4846 <<
"\n OrigCost: " << OrigCost
4847 <<
" vs AltCost: " << AltCost <<
"\n");
4849 if (AltCost >= OrigCost)
4853 Type *ScalarTy = VecTy->getScalarType();
4856 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4858 replaceValue(
I, *NewCmp);
4867 constexpr unsigned MaxVisited = 32;
4870 bool FoundReduction =
false;
4873 while (!WorkList.
empty()) {
4875 for (
User *U :
I->users()) {
4877 if (!UI || !Visited.
insert(UI).second)
4879 if (Visited.
size() > MaxVisited)
4885 switch (
II->getIntrinsicID()) {
4886 case Intrinsic::vector_reduce_add:
4887 case Intrinsic::vector_reduce_mul:
4888 case Intrinsic::vector_reduce_and:
4889 case Intrinsic::vector_reduce_or:
4890 case Intrinsic::vector_reduce_xor:
4891 case Intrinsic::vector_reduce_smin:
4892 case Intrinsic::vector_reduce_smax:
4893 case Intrinsic::vector_reduce_umin:
4894 case Intrinsic::vector_reduce_umax:
4895 FoundReduction =
true;
4908 return FoundReduction;
4921bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
4926 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
4934 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
4936 if (!
I ||
I->getOperand(0)->getType() != VT)
4938 return any_of(
I->users(), [&](User *U) {
4939 return U != Op0 && U != Op1 &&
4940 !(isa<ShuffleVectorInst>(U) &&
4941 (InputShuffles.contains(cast<Instruction>(U)) ||
4942 isInstructionTriviallyDead(cast<Instruction>(U))));
4945 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
4946 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
4954 for (
auto *U :
I->users()) {
4956 if (!SV || SV->getType() != VT)
4958 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
4959 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
4966 if (!collectShuffles(Op0) || !collectShuffles(Op1))
4970 if (FromReduction && Shuffles.
size() > 1)
4975 if (!FromReduction) {
4976 for (ShuffleVectorInst *SV : Shuffles) {
4977 for (
auto *U : SV->users()) {
4980 Shuffles.push_back(SSV);
4992 int MaxV1Elt = 0, MaxV2Elt = 0;
4993 unsigned NumElts = VT->getNumElements();
4994 for (ShuffleVectorInst *SVN : Shuffles) {
4995 SmallVector<int>
Mask;
4996 SVN->getShuffleMask(Mask);
5000 Value *SVOp0 = SVN->getOperand(0);
5001 Value *SVOp1 = SVN->getOperand(1);
5006 for (
int &Elem : Mask) {
5012 if (SVOp0 == Op1 && SVOp1 == Op0) {
5016 if (SVOp0 != Op0 || SVOp1 != Op1)
5022 SmallVector<int> ReconstructMask;
5023 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5026 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5027 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5028 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
5029 return Mask[
I] ==
A.first;
5038 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5039 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5040 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5054 sort(ReconstructMask);
5055 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5063 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
5064 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5076 if (InputShuffles.contains(SSV))
5078 return SV->getMaskValue(M);
5086 std::pair<int, int>
Y) {
5087 int MXA = GetBaseMaskValue(
A,
X.first);
5088 int MYA = GetBaseMaskValue(
A,
Y.first);
5091 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5092 return SortBase(SVI0A,
A,
B);
5094 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5095 return SortBase(SVI1A,
A,
B);
5100 for (
const auto &Mask : OrigReconstructMasks) {
5101 SmallVector<int> ReconstructMask;
5102 for (
int M : Mask) {
5104 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5105 assert(It !=
V.end() &&
"Expected all entries in Mask");
5106 return std::distance(
V.begin(), It);
5110 else if (M <
static_cast<int>(NumElts)) {
5111 ReconstructMask.
push_back(FindIndex(V1, M));
5113 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5116 ReconstructMasks.
push_back(std::move(ReconstructMask));
5121 SmallVector<int> V1A, V1B, V2A, V2B;
5122 for (
unsigned I = 0;
I < V1.
size();
I++) {
5123 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
5124 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
5126 for (
unsigned I = 0;
I < V2.
size();
I++) {
5127 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5128 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5130 while (V1A.
size() < NumElts) {
5134 while (V2A.
size() < NumElts) {
5146 VT, VT, SV->getShuffleMask(),
CostKind);
5153 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5154 unsigned MaxVectorSize =
5156 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5157 if (MaxElementsInVector == 0)
5166 std::set<SmallVector<int, 4>> UniqueShuffles;
5171 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5172 if (NumFullVectors < 2)
5173 return C + ShuffleCost;
5174 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5175 unsigned NumUniqueGroups = 0;
5176 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5179 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5180 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5181 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5182 if (UniqueShuffles.insert(SubShuffle).second)
5183 NumUniqueGroups += 1;
5185 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5191 SmallVector<int, 16>
Mask;
5192 SV->getShuffleMask(Mask);
5193 return AddShuffleMaskAdjustedCost(
C, Mask);
5196 auto AllShufflesHaveSameOperands =
5197 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5198 if (InputShuffles.size() < 2)
5200 ShuffleVectorInst *FirstSV =
5207 std::next(InputShuffles.begin()), InputShuffles.end(),
5208 [&](Instruction *
I) {
5209 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5210 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5219 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5221 if (AllShufflesHaveSameOperands(InputShuffles)) {
5222 UniqueShuffles.clear();
5223 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5226 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5232 FixedVectorType *Op0SmallVT =
5234 FixedVectorType *Op1SmallVT =
5239 UniqueShuffles.clear();
5240 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5242 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5244 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5247 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5249 <<
" vs CostAfter: " << CostAfter <<
"\n");
5250 if (CostBefore < CostAfter ||
5261 if (InputShuffles.contains(SSV))
5263 return SV->getOperand(
Op);
5267 GetShuffleOperand(SVI0A, 1), V1A);
5270 GetShuffleOperand(SVI0B, 1), V1B);
5273 GetShuffleOperand(SVI1A, 1), V2A);
5276 GetShuffleOperand(SVI1B, 1), V2B);
5281 I->copyIRFlags(Op0,
true);
5286 I->copyIRFlags(Op1,
true);
5288 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5291 replaceValue(*Shuffles[S], *NSV,
false);
5294 Worklist.pushValue(NSV0A);
5295 Worklist.pushValue(NSV0B);
5296 Worklist.pushValue(NSV1A);
5297 Worklist.pushValue(NSV1B);
5307bool VectorCombine::shrinkType(Instruction &
I) {
5308 Value *ZExted, *OtherOperand;
5314 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5318 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5320 if (
I.getOpcode() == Instruction::LShr) {
5337 Instruction::ZExt, BigTy, SmallTy,
5338 TargetTransformInfo::CastContextHint::None,
CostKind);
5343 for (User *U : ZExtOperand->
users()) {
5350 ShrinkCost += ZExtCost;
5365 ShrinkCost += ZExtCost;
5372 Instruction::Trunc, SmallTy, BigTy,
5373 TargetTransformInfo::CastContextHint::None,
CostKind);
5378 if (ShrinkCost > CurrentCost)
5382 Value *Op0 = ZExted;
5385 if (
I.getOperand(0) == OtherOperand)
5392 replaceValue(
I, *NewZExtr);
5398bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5399 Value *DstVec, *SrcVec;
5400 uint64_t ExtIdx, InsIdx;
5410 if (!DstVecTy || !SrcVecTy ||
5416 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5423 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5425 if (NeedDstSrcSwap) {
5427 Mask[InsIdx] = ExtIdx % NumDstElts;
5431 std::iota(
Mask.begin(),
Mask.end(), 0);
5432 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5445 SmallVector<int> ExtToVecMask;
5446 if (!NeedExpOrNarrow) {
5451 nullptr, {DstVec, SrcVec});
5457 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5460 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5464 if (!Ext->hasOneUse())
5467 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5468 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5471 if (OldCost < NewCost)
5474 if (NeedExpOrNarrow) {
5475 if (!NeedDstSrcSwap)
5488 replaceValue(
I, *Shuf);
5497bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5498 const APInt *SplatVal0, *SplatVal1;
5508 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5509 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5518 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5519 << *
I.getType() <<
" is too high.\n");
5523 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5524 NewSplatVal <<= Width;
5525 NewSplatVal |= SplatVal0->
zext(Width * 2);
5527 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5535bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5537 if (!OldLoad || !OldLoad->isSimple())
5544 unsigned const OldNumElements = OldLoadTy->getNumElements();
5550 using IndexRange = std::pair<int, int>;
5551 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5552 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5553 for (llvm::Use &Use :
I.uses()) {
5555 User *Shuffle =
Use.getUser();
5560 return std::nullopt;
5567 for (
int Index : Mask) {
5568 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5569 OutputRange.first = std::min(Index, OutputRange.first);
5570 OutputRange.second = std::max(Index, OutputRange.second);
5575 if (OutputRange.second < OutputRange.first)
5576 return std::nullopt;
5582 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5583 unsigned const NewNumElements = Indices->second + 1u;
5587 if (NewNumElements < OldNumElements) {
5592 Type *ElemTy = OldLoadTy->getElementType();
5594 Value *PtrOp = OldLoad->getPointerOperand();
5597 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
5598 OldLoad->getPointerAddressSpace(),
CostKind);
5601 OldLoad->getPointerAddressSpace(),
CostKind);
5603 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
5605 unsigned const MaxIndex = NewNumElements * 2u;
5607 for (llvm::Use &Use :
I.uses()) {
5614 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
5620 for (
int Index : OldMask) {
5621 if (Index >=
static_cast<int>(MaxIndex))
5635 dbgs() <<
"Found a load used only by shufflevector instructions: "
5636 <<
I <<
"\n OldCost: " << OldCost
5637 <<
" vs NewCost: " << NewCost <<
"\n");
5639 if (OldCost < NewCost || !NewCost.
isValid())
5645 NewLoad->copyMetadata(
I);
5648 for (UseEntry &Use : NewUses) {
5649 ShuffleVectorInst *Shuffle =
Use.first;
5650 std::vector<int> &NewMask =
Use.second;
5657 replaceValue(*Shuffle, *NewShuffle,
false);
5670bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
5672 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
5676 ArrayRef<int> Mask0;
5677 ArrayRef<int> Mask1;
5690 auto const InputNumElements = InputVT->getNumElements();
5692 if (InputNumElements >= ResultVT->getNumElements())
5697 SmallVector<int, 16> NewMask;
5700 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
5701 if (
M0 >= 0 &&
M1 >= 0)
5703 else if (
M0 == -1 &&
M1 == -1)
5716 int MaskOffset = NewMask[0
u];
5717 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
5720 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
5734 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5737 if (NewCost > OldCost)
5749 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
5751 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
5757 replaceValue(*Phi, *NewShuf1);
5763bool VectorCombine::run() {
5777 auto Opcode =
I.getOpcode();
5785 if (IsFixedVectorType) {
5787 case Instruction::InsertElement:
5788 if (vectorizeLoadInsert(
I))
5791 case Instruction::ShuffleVector:
5792 if (widenSubvectorLoad(
I))
5803 if (scalarizeOpOrCmp(
I))
5805 if (scalarizeLoad(
I))
5807 if (scalarizeExtExtract(
I))
5809 if (scalarizeVPIntrinsic(
I))
5811 if (foldInterleaveIntrinsics(
I))
5815 if (Opcode == Instruction::Store)
5816 if (foldSingleElementStore(
I))
5820 if (TryEarlyFoldsOnly)
5827 if (IsFixedVectorType) {
5829 case Instruction::InsertElement:
5830 if (foldInsExtFNeg(
I))
5832 if (foldInsExtBinop(
I))
5834 if (foldInsExtVectorToShuffle(
I))
5837 case Instruction::ShuffleVector:
5838 if (foldPermuteOfBinops(
I))
5840 if (foldShuffleOfBinops(
I))
5842 if (foldShuffleOfSelects(
I))
5844 if (foldShuffleOfCastops(
I))
5846 if (foldShuffleOfShuffles(
I))
5848 if (foldPermuteOfIntrinsic(
I))
5850 if (foldShufflesOfLengthChangingShuffles(
I))
5852 if (foldShuffleOfIntrinsics(
I))
5854 if (foldSelectShuffle(
I))
5856 if (foldShuffleToIdentity(
I))
5859 case Instruction::Load:
5860 if (shrinkLoadForShuffles(
I))
5863 case Instruction::BitCast:
5864 if (foldBitcastShuffle(
I))
5866 if (foldSelectsFromBitcast(
I))
5869 case Instruction::And:
5870 case Instruction::Or:
5871 case Instruction::Xor:
5872 if (foldBitOpOfCastops(
I))
5874 if (foldBitOpOfCastConstant(
I))
5877 case Instruction::PHI:
5878 if (shrinkPhiOfShuffles(
I))
5888 case Instruction::Call:
5889 if (foldShuffleFromReductions(
I))
5891 if (foldCastFromReductions(
I))
5894 case Instruction::ExtractElement:
5895 if (foldShuffleChainsToReduce(
I))
5898 case Instruction::ICmp:
5899 if (foldSignBitReductionCmp(
I))
5901 if (foldICmpEqZeroVectorReduce(
I))
5903 if (foldEquivalentReductionCmp(
I))
5906 case Instruction::FCmp:
5907 if (foldExtractExtract(
I))
5910 case Instruction::Or:
5911 if (foldConcatOfBoolMasks(
I))
5916 if (foldExtractExtract(
I))
5918 if (foldExtractedCmps(
I))
5920 if (foldBinopOfReductions(
I))
5929 bool MadeChange =
false;
5930 for (BasicBlock &BB :
F) {
5942 if (!
I->isDebugOrPseudoInst())
5943 MadeChange |= FoldInst(*
I);
5950 while (!Worklist.isEmpty()) {
5960 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, Use *From, FixedVectorType *Ty, const DenseSet< std::pair< Value *, Use * > > &IdentityLeafs, const DenseSet< std::pair< Value *, Use * > > &SplatLeafs, const DenseSet< std::pair< Value *, Use * > > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
std::pair< Value *, int > InstLane
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static InstLane lookThroughShuffles(Value *V, int Lane)
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
class_match< IntrinsicInst > m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const