44#define DEBUG_TYPE "vector-combine"
50STATISTIC(NumVecLoad,
"Number of vector loads formed");
51STATISTIC(NumVecCmp,
"Number of vector compares formed");
52STATISTIC(NumVecBO,
"Number of vector binops formed");
53STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
54STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
55STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
56STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
57STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
61 cl::desc(
"Disable all vector combine transforms"));
65 cl::desc(
"Disable binop extract to shuffle transforms"));
69 cl::desc(
"Max number of instructions to scan for vector combining."));
71static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
79 bool TryEarlyFoldsOnly)
82 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 const TargetTransformInfo &TTI;
90 const DominatorTree &DT;
95 const SimplifyQuery SQ;
99 bool TryEarlyFoldsOnly;
101 InstructionWorklist Worklist;
110 bool vectorizeLoadInsert(Instruction &
I);
111 bool widenSubvectorLoad(Instruction &
I);
112 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
113 ExtractElementInst *Ext1,
114 unsigned PreferredExtractIndex)
const;
115 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
116 const Instruction &
I,
117 ExtractElementInst *&ConvertToShuffle,
118 unsigned PreferredExtractIndex);
121 bool foldExtractExtract(Instruction &
I);
122 bool foldInsExtFNeg(Instruction &
I);
123 bool foldInsExtBinop(Instruction &
I);
124 bool foldInsExtVectorToShuffle(Instruction &
I);
125 bool foldBitOpOfCastops(Instruction &
I);
126 bool foldBitOpOfCastConstant(Instruction &
I);
127 bool foldBitcastShuffle(Instruction &
I);
128 bool scalarizeOpOrCmp(Instruction &
I);
129 bool scalarizeVPIntrinsic(Instruction &
I);
130 bool foldExtractedCmps(Instruction &
I);
131 bool foldSelectsFromBitcast(Instruction &
I);
132 bool foldBinopOfReductions(Instruction &
I);
133 bool foldSingleElementStore(Instruction &
I);
134 bool scalarizeLoad(Instruction &
I);
135 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
136 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeExtExtract(Instruction &
I);
138 bool foldConcatOfBoolMasks(Instruction &
I);
139 bool foldPermuteOfBinops(Instruction &
I);
140 bool foldShuffleOfBinops(Instruction &
I);
141 bool foldShuffleOfSelects(Instruction &
I);
142 bool foldShuffleOfCastops(Instruction &
I);
143 bool foldShuffleOfShuffles(Instruction &
I);
144 bool foldPermuteOfIntrinsic(Instruction &
I);
145 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
146 bool foldShuffleOfIntrinsics(Instruction &
I);
147 bool foldShuffleToIdentity(Instruction &
I);
148 bool foldShuffleFromReductions(Instruction &
I);
149 bool foldShuffleChainsToReduce(Instruction &
I);
150 bool foldCastFromReductions(Instruction &
I);
151 bool foldSignBitReductionCmp(Instruction &
I);
152 bool foldICmpEqZeroVectorReduce(Instruction &
I);
153 bool foldEquivalentReductionCmp(Instruction &
I);
154 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
155 bool foldInterleaveIntrinsics(Instruction &
I);
156 bool shrinkType(Instruction &
I);
157 bool shrinkLoadForShuffles(Instruction &
I);
158 bool shrinkPhiOfShuffles(Instruction &
I);
160 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
166 Worklist.pushUsersToWorkList(*NewI);
167 Worklist.pushValue(NewI);
184 SmallPtrSet<Value *, 4> Visited;
189 OpI,
nullptr,
nullptr, [&](
Value *V) {
194 NextInst = NextInst->getNextNode();
199 Worklist.pushUsersToWorkList(*OpI);
200 Worklist.pushValue(OpI);
220 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
221 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
227 Type *ScalarTy = Load->getType()->getScalarType();
229 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
230 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
237bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
263 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
266 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
267 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
268 unsigned OffsetEltIndex = 0;
276 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
277 APInt
Offset(OffsetBitWidth, 0);
287 uint64_t ScalarSizeInBytes = ScalarSize / 8;
288 if (
Offset.urem(ScalarSizeInBytes) != 0)
292 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
293 if (OffsetEltIndex >= MinVecNumElts)
310 unsigned AS =
Load->getPointerAddressSpace();
329 unsigned OutputNumElts = Ty->getNumElements();
331 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
332 Mask[0] = OffsetEltIndex;
339 if (OldCost < NewCost || !NewCost.
isValid())
350 replaceValue(
I, *VecLd);
358bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
361 if (!Shuf->isIdentityWithPadding())
367 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
368 return M >= (int)(NumOpElts);
379 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
387 unsigned AS =
Load->getPointerAddressSpace();
402 if (OldCost < NewCost || !NewCost.
isValid())
409 replaceValue(
I, *VecLd);
416ExtractElementInst *VectorCombine::getShuffleExtract(
417 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
421 assert(Index0C && Index1C &&
"Expected constant extract indexes");
423 unsigned Index0 = Index0C->getZExtValue();
424 unsigned Index1 = Index1C->getZExtValue();
427 if (Index0 == Index1)
451 if (PreferredExtractIndex == Index0)
453 if (PreferredExtractIndex == Index1)
457 return Index0 > Index1 ? Ext0 : Ext1;
465bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
466 ExtractElementInst *Ext1,
467 const Instruction &
I,
468 ExtractElementInst *&ConvertToShuffle,
469 unsigned PreferredExtractIndex) {
472 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
474 unsigned Opcode =
I.getOpcode();
487 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
488 "Expected a compare");
498 unsigned Ext0Index = Ext0IndexC->getZExtValue();
499 unsigned Ext1Index = Ext1IndexC->getZExtValue();
513 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
514 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
515 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
520 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
525 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
527 OldCost = CheapExtractCost + ScalarOpCost;
528 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
532 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
533 NewCost = VectorOpCost + CheapExtractCost +
538 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
539 if (ConvertToShuffle) {
551 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
553 ShuffleMask[BestInsIndex] = BestExtIndex;
555 VecTy, VecTy, ShuffleMask,
CostKind, 0,
556 nullptr, {ConvertToShuffle});
559 VecTy, VecTy, {},
CostKind, 0,
nullptr,
567 return OldCost < NewCost;
579 ShufMask[NewIndex] = OldIndex;
580 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
632 V1,
"foldExtExtBinop");
637 VecBOInst->copyIRFlags(&
I);
643bool VectorCombine::foldExtractExtract(Instruction &
I) {
674 ExtractElementInst *ExtractToChange;
675 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
681 if (ExtractToChange) {
682 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
687 if (ExtractToChange == Ext0)
696 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
697 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
700 replaceValue(
I, *NewExt);
706bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
709 uint64_t ExtIdx, InsIdx;
724 auto *DstVecScalarTy = DstVecTy->getScalarType();
726 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
731 unsigned NumDstElts = DstVecTy->getNumElements();
732 unsigned NumSrcElts = SrcVecTy->getNumElements();
733 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
739 SmallVector<int>
Mask(NumDstElts);
740 std::iota(
Mask.begin(),
Mask.end(), 0);
741 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
757 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
760 SmallVector<int> SrcMask;
763 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
765 DstVecTy, SrcVecTy, SrcMask,
CostKind);
769 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
771 if (NewCost > OldCost)
774 Value *NewShuf, *LenChgShuf =
nullptr;
788 replaceValue(
I, *NewShuf);
794bool VectorCombine::foldInsExtBinop(Instruction &
I) {
795 BinaryOperator *VecBinOp, *SclBinOp;
827 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
829 if (NewCost > OldCost)
840 NewInst->copyIRFlags(VecBinOp);
841 NewInst->andIRFlags(SclBinOp);
846 replaceValue(
I, *NewBO);
852bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
855 if (!BinOp || !BinOp->isBitwiseLogicOp())
861 if (!LHSCast || !RHSCast) {
862 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
868 if (CastOpcode != RHSCast->getOpcode())
872 switch (CastOpcode) {
873 case Instruction::BitCast:
874 case Instruction::Trunc:
875 case Instruction::SExt:
876 case Instruction::ZExt:
882 Value *LHSSrc = LHSCast->getOperand(0);
883 Value *RHSSrc = RHSCast->getOperand(0);
889 auto *SrcTy = LHSSrc->
getType();
890 auto *DstTy =
I.getType();
893 if (CastOpcode != Instruction::BitCast &&
898 if (!SrcTy->getScalarType()->isIntegerTy() ||
899 !DstTy->getScalarType()->isIntegerTy())
914 LHSCastCost + RHSCastCost;
925 if (!LHSCast->hasOneUse())
926 NewCost += LHSCastCost;
927 if (!RHSCast->hasOneUse())
928 NewCost += RHSCastCost;
931 <<
" NewCost=" << NewCost <<
"\n");
933 if (NewCost > OldCost)
938 BinOp->getName() +
".inner");
940 NewBinOp->copyIRFlags(BinOp);
954 replaceValue(
I, *Result);
963bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
979 switch (CastOpcode) {
980 case Instruction::BitCast:
981 case Instruction::ZExt:
982 case Instruction::SExt:
983 case Instruction::Trunc:
989 Value *LHSSrc = LHSCast->getOperand(0);
991 auto *SrcTy = LHSSrc->
getType();
992 auto *DstTy =
I.getType();
995 if (CastOpcode != Instruction::BitCast &&
1000 if (!SrcTy->getScalarType()->isIntegerTy() ||
1001 !DstTy->getScalarType()->isIntegerTy())
1005 PreservedCastFlags RHSFlags;
1030 if (!LHSCast->hasOneUse())
1031 NewCost += LHSCastCost;
1033 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1034 <<
" NewCost=" << NewCost <<
"\n");
1036 if (NewCost > OldCost)
1041 LHSSrc, InvC,
I.getName() +
".inner");
1043 NewBinOp->copyIRFlags(&
I);
1063 replaceValue(
I, *Result);
1070bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1084 if (!DestTy || !SrcTy)
1087 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1088 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1089 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1099 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1100 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1104 SmallVector<int, 16> NewMask;
1105 if (DestEltSize <= SrcEltSize) {
1108 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1109 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1114 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1115 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1122 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1123 auto *NewShuffleTy =
1125 auto *OldShuffleTy =
1127 unsigned NumOps = IsUnary ? 1 : 2;
1137 TargetTransformInfo::CastContextHint::None,
1142 TargetTransformInfo::CastContextHint::None,
1145 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1146 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1148 if (NewCost > OldCost || !NewCost.
isValid())
1156 replaceValue(
I, *Shuf);
1163bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1177 if (!ScalarOp0 || !ScalarOp1)
1185 auto IsAllTrueMask = [](
Value *MaskVal) {
1188 return ConstValue->isAllOnesValue();
1202 SmallVector<int>
Mask;
1204 Mask.resize(FVTy->getNumElements(), 0);
1213 Args.push_back(
V->getType());
1214 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1219 std::optional<unsigned> FunctionalOpcode =
1221 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1222 if (!FunctionalOpcode) {
1231 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1241 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1243 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1246 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1249 if (OldCost < NewCost || !NewCost.
isValid())
1260 bool SafeToSpeculate;
1266 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1267 if (!SafeToSpeculate &&
1274 {ScalarOp0, ScalarOp1})
1276 ScalarOp0, ScalarOp1);
1285bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1290 if (!UO && !BO && !CI && !
II)
1298 if (Arg->getType() !=
II->getType() &&
1308 for (User *U :
I.users())
1315 std::optional<uint64_t>
Index;
1317 auto Ops =
II ?
II->args() :
I.operands();
1321 uint64_t InsIdx = 0;
1326 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1332 else if (InsIdx != *Index)
1349 if (!
Index.has_value())
1353 Type *ScalarTy = VecTy->getScalarType();
1354 assert(VecTy->isVectorTy() &&
1357 "Unexpected types for insert element into binop or cmp");
1359 unsigned Opcode =
I.getOpcode();
1367 }
else if (UO || BO) {
1371 IntrinsicCostAttributes ScalarICA(
1372 II->getIntrinsicID(), ScalarTy,
1375 IntrinsicCostAttributes VectorICA(
1376 II->getIntrinsicID(), VecTy,
1383 Value *NewVecC =
nullptr;
1385 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1388 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1390 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1404 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1406 II->getIntrinsicID(), Idx, &
TTI)))
1409 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1410 OldCost += InsertCost;
1411 NewCost += !
Op->hasOneUse() * InsertCost;
1415 if (OldCost < NewCost || !NewCost.
isValid())
1425 ++NumScalarIntrinsic;
1435 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1441 Scalar->setName(
I.getName() +
".scalar");
1446 ScalarInst->copyIRFlags(&
I);
1449 replaceValue(
I, *Insert);
1456bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1461 if (!BI || !
I.getType()->isIntegerTy(1))
1466 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1469 CmpPredicate
P0,
P1;
1481 uint64_t Index0, Index1;
1488 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1491 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1492 "Unknown ExtractElementInst");
1497 unsigned CmpOpcode =
1512 Ext0Cost + Ext1Cost + CmpCost * 2 +
1518 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1519 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1524 ShufMask[CheapIndex] = ExpensiveIndex;
1529 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1530 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1535 if (OldCost < NewCost || !NewCost.
isValid())
1545 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1546 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1549 replaceValue(
I, *NewExt);
1576bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1583 if (!SrcVecTy || !DstVecTy)
1593 if (SrcEltBits != 32 && SrcEltBits != 64)
1596 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1613 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1616 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1619 if (!BC->hasNUsesOrMore(MinSelects))
1624 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1626 for (User *U : BC->users()) {
1631 for (User *ExtUser : Ext->users()) {
1635 Cond->getType()->isIntegerTy(1))
1640 if (CondToSelects.
empty())
1643 bool MadeChange =
false;
1644 Value *SrcVec = BC->getOperand(0);
1647 for (
auto [
Cond, Selects] : CondToSelects) {
1649 if (Selects.size() < MinSelects) {
1650 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1651 <<
"profitable (VecCost=" << VecSelCost
1652 <<
", ScalarCost=" << ScalarSelCost
1653 <<
", NumSelects=" << Selects.size() <<
")\n");
1658 auto InsertPt = std::next(BC->getIterator());
1662 InsertPt = std::next(CondInst->getIterator());
1670 for (SelectInst *Sel : Selects) {
1672 Value *Idx = Ext->getIndexOperand();
1676 replaceValue(*Sel, *NewExt);
1681 <<
" selects into vector select\n");
1695 unsigned ReductionOpc =
1701 CostBeforeReduction =
1702 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1704 CostAfterReduction =
1705 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1709 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1715 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1722 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1725 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1727 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1730 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1731 CostAfterReduction =
TTI.getMulAccReductionCost(
1732 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1735 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1739bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1742 if (BinOpOpc == Instruction::Sub)
1743 ReductionIID = Intrinsic::vector_reduce_add;
1747 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1752 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1753 return II->getArgOperand(0);
1757 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1760 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1769 unsigned ReductionOpc =
1782 CostOfRedOperand0 + CostOfRedOperand1 +
1785 if (NewCost >= OldCost || !NewCost.
isValid())
1789 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1792 if (BinOpOpc == Instruction::Or)
1793 VectorBO = Builder.
CreateOr(V0, V1,
"",
1799 replaceValue(
I, *Rdx);
1807 unsigned NumScanned = 0;
1808 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1817class ScalarizationResult {
1818 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1823 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1824 : Status(Status), ToFreeze(ToFreeze) {}
1827 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1828 ~ScalarizationResult() {
1829 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1832 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1833 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1834 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1835 return {StatusTy::SafeWithFreeze, ToFreeze};
1839 bool isSafe()
const {
return Status == StatusTy::Safe; }
1841 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1844 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1849 Status = StatusTy::Unsafe;
1853 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1854 assert(isSafeWithFreeze() &&
1855 "should only be used when freezing is required");
1857 "UserI must be a user of ToFreeze");
1858 IRBuilder<>::InsertPointGuard Guard(Builder);
1863 if (
U.get() == ToFreeze)
1880 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1884 if (
C->getValue().ult(NumElements))
1885 return ScalarizationResult::safe();
1886 return ScalarizationResult::unsafe();
1891 return ScalarizationResult::unsafe();
1893 APInt Zero(IntWidth, 0);
1894 APInt MaxElts(IntWidth, NumElements);
1900 true, &AC, CtxI, &DT)))
1901 return ScalarizationResult::safe();
1902 return ScalarizationResult::unsafe();
1915 if (ValidIndices.
contains(IdxRange))
1916 return ScalarizationResult::safeWithFreeze(IdxBase);
1917 return ScalarizationResult::unsafe();
1929 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1941bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1953 if (!
match(
SI->getValueOperand(),
1960 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1963 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1964 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1965 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1969 if (ScalarizableIdx.isUnsafe() ||
1976 Worklist.
push(Load);
1978 if (ScalarizableIdx.isSafeWithFreeze())
1981 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1982 {ConstantInt::get(Idx->getType(), 0), Idx});
1986 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1989 replaceValue(
I, *NSI);
1999bool VectorCombine::scalarizeLoad(Instruction &
I) {
2006 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2009 bool AllExtracts =
true;
2010 bool AllBitcasts =
true;
2012 unsigned NumInstChecked = 0;
2017 for (User *U : LI->users()) {
2019 if (!UI || UI->getParent() != LI->getParent())
2024 if (UI->use_empty())
2028 AllExtracts =
false;
2030 AllBitcasts =
false;
2034 for (Instruction &
I :
2035 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2042 LastCheckedInst = UI;
2047 return scalarizeLoadExtract(LI, VecTy, Ptr);
2049 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2054bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2059 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2062 for (
auto &Pair : NeedFreeze)
2063 Pair.second.discard();
2071 for (User *U : LI->
users()) {
2076 if (ScalarIdx.isUnsafe())
2078 if (ScalarIdx.isSafeWithFreeze()) {
2079 NeedFreeze.try_emplace(UI, ScalarIdx);
2080 ScalarIdx.discard();
2086 Index ?
Index->getZExtValue() : -1);
2094 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2095 <<
"\n LoadExtractCost: " << OriginalCost
2096 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2098 if (ScalarizedCost >= OriginalCost)
2105 Type *ElemType = VecTy->getElementType();
2108 for (User *U : LI->
users()) {
2110 Value *Idx = EI->getIndexOperand();
2113 auto It = NeedFreeze.find(EI);
2114 if (It != NeedFreeze.end())
2121 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2123 Align ScalarOpAlignment =
2125 NewLoad->setAlignment(ScalarOpAlignment);
2128 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2133 replaceValue(*EI, *NewLoad,
false);
2136 FailureGuard.release();
2141bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2147 Type *TargetScalarType =
nullptr;
2148 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2150 for (User *U : LI->
users()) {
2153 Type *DestTy = BC->getDestTy();
2157 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2158 if (DestBitWidth != VecBitWidth)
2162 if (!TargetScalarType)
2163 TargetScalarType = DestTy;
2164 else if (TargetScalarType != DestTy)
2172 if (!TargetScalarType)
2180 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2181 <<
"\n OriginalCost: " << OriginalCost
2182 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2184 if (ScalarizedCost >= OriginalCost)
2195 ScalarLoad->copyMetadata(*LI);
2198 for (User *U : LI->
users()) {
2200 replaceValue(*BC, *ScalarLoad,
false);
2206bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2221 Type *ScalarDstTy = DstTy->getElementType();
2222 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2228 unsigned ExtCnt = 0;
2229 bool ExtLane0 =
false;
2230 for (User *U : Ext->users()) {
2244 Instruction::And, ScalarDstTy,
CostKind,
2247 (ExtCnt - ExtLane0) *
2249 Instruction::LShr, ScalarDstTy,
CostKind,
2252 if (ScalarCost > VectorCost)
2255 Value *ScalarV = Ext->getOperand(0);
2262 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2263 bool AllExtractsTriggerUB =
true;
2264 ExtractElementInst *LastExtract =
nullptr;
2266 for (User *U : Ext->users()) {
2269 AllExtractsTriggerUB =
false;
2273 if (!LastExtract || LastExtract->
comesBefore(Extract))
2274 LastExtract = Extract;
2276 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2277 !AllExtractsTriggerUB ||
2285 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2286 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2289 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2290 for (User *U : Ext->users()) {
2296 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2297 : (Idx * SrcEltSizeInBits);
2300 U->replaceAllUsesWith(
And);
2308bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2309 Type *Ty =
I.getType();
2314 if (
DL->isBigEndian())
2325 uint64_t ShAmtX = 0;
2333 uint64_t ShAmtY = 0;
2341 if (ShAmtX > ShAmtY) {
2349 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2350 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2355 MaskTy->getNumElements() != ShAmtDiff ||
2356 MaskTy->getNumElements() > (
BitWidth / 2))
2361 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2362 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2365 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2382 if (Ty != ConcatIntTy)
2388 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2389 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2392 if (NewCost > OldCost)
2402 if (Ty != ConcatIntTy) {
2412 replaceValue(
I, *Result);
2418bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2419 BinaryOperator *BinOp;
2420 ArrayRef<int> OuterMask;
2428 Value *Op00, *Op01, *Op10, *Op11;
2429 ArrayRef<int> Mask0, Mask1;
2434 if (!Match0 && !Match1)
2447 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2450 unsigned NumSrcElts = BinOpTy->getNumElements();
2455 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2459 SmallVector<int> NewMask0, NewMask1;
2460 for (
int M : OuterMask) {
2461 if (M < 0 || M >= (
int)NumSrcElts) {
2465 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2466 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2470 unsigned NumOpElts = Op0Ty->getNumElements();
2471 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2472 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2474 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2475 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2484 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2485 0,
nullptr, {BinOp}, &
I);
2487 NewCost += BinOpCost;
2493 OldCost += Shuf0Cost;
2495 NewCost += Shuf0Cost;
2501 OldCost += Shuf1Cost;
2503 NewCost += Shuf1Cost;
2511 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2515 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2517 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2518 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2522 if (NewCost > OldCost)
2533 NewInst->copyIRFlags(BinOp);
2537 replaceValue(
I, *NewBO);
2543bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2544 ArrayRef<int> OldMask;
2551 if (
LHS->getOpcode() !=
RHS->getOpcode())
2555 bool IsCommutative =
false;
2564 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2575 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2578 bool SameBinOp =
LHS ==
RHS;
2579 unsigned NumSrcElts = BinOpTy->getNumElements();
2582 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2585 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2586 if (M >= (
int)NumSrcElts)
2590 SmallVector<int> NewMask0(OldMask);
2599 SmallVector<int> NewMask1(OldMask);
2618 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2628 ArrayRef<int> InnerMask;
2630 m_Mask(InnerMask)))) &&
2633 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2645 bool ReducedInstCount =
false;
2646 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2647 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2648 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2649 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2650 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2655 auto *ShuffleCmpTy =
2658 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2659 if (!SingleSrcBinOp)
2669 PredLHS,
CostKind, Op0Info, Op1Info);
2679 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2686 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2695 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2699 NewInst->copyIRFlags(
LHS);
2700 NewInst->andIRFlags(
RHS);
2705 replaceValue(
I, *NewBO);
2712bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2714 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2725 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2731 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2732 ((SI0FOp !=
nullptr) &&
2733 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2739 auto SelOp = Instruction::Select;
2747 CostSel1 + CostSel2 +
2749 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2753 Mask,
CostKind, 0,
nullptr, {C1, C2});
2763 if (!Sel1->hasOneUse())
2764 NewCost += CostSel1;
2765 if (!Sel2->hasOneUse())
2766 NewCost += CostSel2;
2769 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2771 if (NewCost > OldCost)
2780 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2781 SI0FOp->getFastMathFlags());
2783 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2788 replaceValue(
I, *NewSel);
2794bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2796 ArrayRef<int> OldMask;
2805 if (!C0 || (IsBinaryShuffle && !C1))
2812 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2815 if (IsBinaryShuffle) {
2816 if (C0->getSrcTy() != C1->getSrcTy())
2819 if (Opcode != C1->getOpcode()) {
2821 Opcode = Instruction::SExt;
2830 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2833 unsigned NumSrcElts = CastSrcTy->getNumElements();
2834 unsigned NumDstElts = CastDstTy->getNumElements();
2835 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2836 "Only bitcasts expected to alter src/dst element counts");
2840 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2841 (NumDstElts % NumSrcElts) != 0)
2844 SmallVector<int, 16> NewMask;
2845 if (NumSrcElts >= NumDstElts) {
2848 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2849 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2854 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2855 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2860 auto *NewShuffleDstTy =
2869 if (IsBinaryShuffle)
2884 if (IsBinaryShuffle) {
2894 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2896 if (NewCost > OldCost)
2900 if (IsBinaryShuffle)
2910 NewInst->copyIRFlags(C0);
2911 if (IsBinaryShuffle)
2912 NewInst->andIRFlags(C1);
2916 replaceValue(
I, *Cast);
2926bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2927 ArrayRef<int> OuterMask;
2928 Value *OuterV0, *OuterV1;
2933 ArrayRef<int> InnerMask0, InnerMask1;
2934 Value *X0, *X1, *Y0, *Y1;
2939 if (!Match0 && !Match1)
2944 SmallVector<int, 16> PoisonMask1;
2949 InnerMask1 = PoisonMask1;
2953 X0 = Match0 ? X0 : OuterV0;
2954 Y0 = Match0 ? Y0 : OuterV0;
2955 X1 = Match1 ? X1 : OuterV1;
2956 Y1 = Match1 ? Y1 : OuterV1;
2960 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2964 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2965 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2970 SmallVector<int, 16> NewMask(OuterMask);
2971 Value *NewX =
nullptr, *NewY =
nullptr;
2972 for (
int &M : NewMask) {
2973 Value *Src =
nullptr;
2974 if (0 <= M && M < (
int)NumImmElts) {
2978 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2979 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2981 }
else if (M >= (
int)NumImmElts) {
2986 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2987 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2991 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3000 if (!NewX || NewX == Src) {
3004 if (!NewY || NewY == Src) {
3020 replaceValue(
I, *NewX);
3037 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3043 nullptr, {NewX, NewY});
3045 NewCost += InnerCost0;
3047 NewCost += InnerCost1;
3050 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3052 if (NewCost > OldCost)
3056 replaceValue(
I, *Shuf);
3072bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3077 unsigned ChainLength = 0;
3078 SmallVector<int>
Mask;
3079 SmallVector<int> YMask;
3089 ArrayRef<int> OuterMask;
3090 Value *OuterV0, *OuterV1;
3091 if (ChainLength != 0 && !Trunk->
hasOneUse())
3094 m_Mask(OuterMask))))
3096 if (OuterV0->
getType() != TrunkType) {
3102 ArrayRef<int> InnerMask0, InnerMask1;
3103 Value *A0, *A1, *B0, *B1;
3108 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3109 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3110 if (Match0Leaf == Match1Leaf) {
3116 SmallVector<int> CommutedOuterMask;
3123 for (
int &M : CommutedOuterMask) {
3126 if (M < (
int)NumTrunkElts)
3131 OuterMask = CommutedOuterMask;
3150 int NumLeafElts = YType->getNumElements();
3151 SmallVector<int> LocalYMask(InnerMask1);
3152 for (
int &M : LocalYMask) {
3153 if (M >= NumLeafElts)
3163 Mask.assign(OuterMask);
3164 YMask.
assign(LocalYMask);
3165 OldCost = NewCost = LocalOldCost;
3172 SmallVector<int> NewYMask(YMask);
3174 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3175 if (LeafM == -1 || CombinedM == LeafM)
3177 if (CombinedM == -1) {
3187 SmallVector<int> NewMask;
3188 NewMask.
reserve(NumTrunkElts);
3189 for (
int M : Mask) {
3190 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3205 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3209 if (ChainLength == 1) {
3210 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3213 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3214 <<
" old cost: " << (OldCost + LocalOldCost)
3215 <<
" new cost: " << LocalNewCost <<
'\n';
3220 OldCost += LocalOldCost;
3221 NewCost = LocalNewCost;
3225 if (ChainLength <= 1)
3229 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3232 for (
int &M : Mask) {
3233 if (M >=
static_cast<int>(NumTrunkElts))
3234 M = YMask[
M - NumTrunkElts];
3238 replaceValue(
I, *Root);
3245 replaceValue(
I, *Root);
3251bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3253 ArrayRef<int> OldMask;
3263 if (IID != II1->getIntrinsicID())
3272 if (!ShuffleDstTy || !II0Ty)
3278 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3280 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3286 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3290 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3291 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3293 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3297 ShuffleDstTy->getNumElements());
3299 std::pair<Value *, Value *> OperandPair =
3300 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3301 if (!SeenOperandPairs.
insert(OperandPair).second) {
3307 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3310 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3313 if (!II0->hasOneUse())
3315 if (II1 != II0 && !II1->hasOneUse())
3319 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3322 if (NewCost > OldCost)
3326 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3327 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3331 std::pair<Value *, Value *> OperandPair =
3332 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3333 auto It = ShuffleCache.
find(OperandPair);
3334 if (It != ShuffleCache.
end()) {
3340 II1->getArgOperand(
I), OldMask);
3341 ShuffleCache[OperandPair] = Shuf;
3349 NewInst->copyIRFlags(II0);
3350 NewInst->andIRFlags(II1);
3353 replaceValue(
I, *NewIntrinsic);
3359bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3371 if (!ShuffleDstTy || !IntrinsicSrcTy)
3375 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3376 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3389 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3393 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3395 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3399 ShuffleDstTy->getNumElements());
3402 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3403 {II0->getArgOperand(
I)});
3406 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3411 if (!II0->hasOneUse())
3414 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3415 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3417 if (NewCost > OldCost)
3422 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3437 replaceValue(
I, *NewIntrinsic);
3447 int M = SV->getMaskValue(Lane);
3450 if (
static_cast<unsigned>(M) < NumElts) {
3451 U = &SV->getOperandUse(0);
3454 U = &SV->getOperandUse(1);
3465 auto [U, Lane] = IL;
3479 unsigned NumElts = Ty->getNumElements();
3480 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3486 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3492 unsigned NumSlices = Item.
size() / NumElts;
3497 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3498 Use *SliceV = Item[Slice * NumElts].first;
3499 if (!SliceV || SliceV->get()->
getType() != Ty)
3501 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3502 auto [V, Lane] = Item[Slice * NumElts + Elt];
3503 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
3516 auto [FrontU, FrontLane] = Item.
front();
3518 if (IdentityLeafs.
contains(FrontU)) {
3519 return FrontU->get();
3523 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3525 if (ConcatLeafs.
contains(FrontU)) {
3529 for (
unsigned S = 0; S < Values.
size(); ++S)
3530 Values[S] = Item[S * NumElts].first->get();
3532 while (Values.
size() > 1) {
3535 std::iota(Mask.begin(), Mask.end(), 0);
3537 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3539 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3547 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3549 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3552 Ops[Idx] =
II->getOperand(Idx);
3556 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3561 for (
const auto &Lane : Item)
3574 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3584 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3589 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3603bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3605 if (!Ty ||
I.use_empty())
3609 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3614 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3615 unsigned NumVisited = 0;
3617 while (!Worklist.
empty()) {
3622 auto [FrontU, FrontLane] = Item.
front();
3630 return X->getType() ==
Y->getType() &&
3635 if (FrontLane == 0 &&
3637 Ty->getNumElements() &&
3640 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3641 E.value().second == (int)
E.index());
3643 IdentityLeafs.
insert(FrontU);
3648 C &&
C->getSplatValue() &&
3656 SplatLeafs.
insert(FrontU);
3661 auto [FrontU, FrontLane] = Item.
front();
3662 auto [
U, Lane] = IL;
3663 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3665 SplatLeafs.
insert(FrontU);
3671 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3675 Value *
V = IL.first->get();
3681 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3684 if (CI->getSrcTy()->getScalarType() !=
3689 SI->getOperand(0)->getType() !=
3696 II->getIntrinsicID() ==
3698 !
II->hasOperandBundles());
3705 BO && BO->isIntDivRem())
3710 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3711 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3718 if (DstTy && SrcTy &&
3719 SrcTy->getNumElements() == DstTy->getNumElements()) {
3730 !
II->hasOperandBundles()) {
3731 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3750 ConcatLeafs.
insert(FrontU);
3757 if (NumVisited <= 1)
3760 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3766 ConcatLeafs, Builder, &
TTI);
3767 replaceValue(
I, *V);
3774bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3778 switch (
II->getIntrinsicID()) {
3779 case Intrinsic::vector_reduce_add:
3780 case Intrinsic::vector_reduce_mul:
3781 case Intrinsic::vector_reduce_and:
3782 case Intrinsic::vector_reduce_or:
3783 case Intrinsic::vector_reduce_xor:
3784 case Intrinsic::vector_reduce_smin:
3785 case Intrinsic::vector_reduce_smax:
3786 case Intrinsic::vector_reduce_umin:
3787 case Intrinsic::vector_reduce_umax:
3796 std::queue<Value *> Worklist;
3797 SmallPtrSet<Value *, 4> Visited;
3798 ShuffleVectorInst *Shuffle =
nullptr;
3802 while (!Worklist.empty()) {
3803 Value *CV = Worklist.front();
3815 if (CI->isBinaryOp()) {
3816 for (
auto *
Op : CI->operand_values())
3820 if (Shuffle && Shuffle != SV)
3837 for (
auto *V : Visited)
3838 for (
auto *U :
V->users())
3839 if (!Visited.contains(U) && U != &
I)
3842 FixedVectorType *VecType =
3846 FixedVectorType *ShuffleInputType =
3848 if (!ShuffleInputType)
3854 SmallVector<int> ConcatMask;
3856 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3857 bool UsesSecondVec =
3858 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3865 ShuffleInputType, ConcatMask,
CostKind);
3867 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3869 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3871 bool MadeChanges =
false;
3872 if (NewCost < OldCost) {
3876 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3877 replaceValue(*Shuffle, *NewShuffle);
3883 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3929bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3931 std::queue<Value *> InstWorklist;
3935 std::optional<unsigned int> CommonCallOp = std::nullopt;
3936 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3938 bool IsFirstCallOrBinInst =
true;
3939 bool ShouldBeCallOrBinInst =
true;
3945 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3955 int64_t
VecSize = FVT->getNumElements();
3961 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3962 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3972 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3973 Cur = (Cur + 1) / 2, --
Mask) {
3975 ExpectedParityMask |= (1ll <<
Mask);
3978 InstWorklist.push(VecOpEE);
3980 while (!InstWorklist.empty()) {
3981 Value *CI = InstWorklist.front();
3985 if (!ShouldBeCallOrBinInst)
3988 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
3993 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3995 IsFirstCallOrBinInst =
false;
3998 CommonCallOp =
II->getIntrinsicID();
3999 if (
II->getIntrinsicID() != *CommonCallOp)
4002 switch (
II->getIntrinsicID()) {
4003 case Intrinsic::umin:
4004 case Intrinsic::umax:
4005 case Intrinsic::smin:
4006 case Intrinsic::smax: {
4007 auto *Op0 =
II->getOperand(0);
4008 auto *Op1 =
II->getOperand(1);
4016 ShouldBeCallOrBinInst ^= 1;
4018 IntrinsicCostAttributes ICA(
4019 *CommonCallOp,
II->getType(),
4020 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4027 InstWorklist.push(PrevVecV[1]);
4028 InstWorklist.push(PrevVecV[0]);
4032 if (!ShouldBeCallOrBinInst)
4035 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4038 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4040 IsFirstCallOrBinInst =
false;
4048 switch (*CommonBinOp) {
4049 case BinaryOperator::Add:
4050 case BinaryOperator::Mul:
4051 case BinaryOperator::Or:
4052 case BinaryOperator::And:
4053 case BinaryOperator::Xor: {
4063 ShouldBeCallOrBinInst ^= 1;
4070 InstWorklist.push(PrevVecV[1]);
4071 InstWorklist.push(PrevVecV[0]);
4075 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4078 if (SVInst != PrevVecV[1])
4081 ArrayRef<int> CurMask;
4087 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4088 if (Mask < ShuffleMaskHalf &&
4089 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4091 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4096 ShuffleMaskHalf *= 2;
4097 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4098 ExpectedParityMask >>= 1;
4101 SVInst->getType(), SVInst->getType(),
4105 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4108 ShouldBeCallOrBinInst ^= 1;
4115 if (ShouldBeCallOrBinInst)
4118 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4120 Value *FinalVecV = PrevVecV[0];
4132 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
4135 if (NewCost >= OrigCost)
4138 auto *ReducedResult =
4140 replaceValue(
I, *ReducedResult);
4149bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4154 bool TruncOnly =
false;
4157 case Intrinsic::vector_reduce_add:
4158 case Intrinsic::vector_reduce_mul:
4161 case Intrinsic::vector_reduce_and:
4162 case Intrinsic::vector_reduce_or:
4163 case Intrinsic::vector_reduce_xor:
4170 Value *ReductionSrc =
I.getOperand(0);
4182 Type *ResultTy =
I.getType();
4185 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4195 if (OldCost <= NewCost || !NewCost.
isValid())
4199 II->getIntrinsicID(), {Src});
4201 replaceValue(
I, *NewCast);
4223bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4226 const APInt *CmpVal;
4231 if (!
II || !
II->hasOneUse())
4236 case Intrinsic::vector_reduce_or:
4237 case Intrinsic::vector_reduce_umax:
4238 case Intrinsic::vector_reduce_and:
4239 case Intrinsic::vector_reduce_umin:
4240 case Intrinsic::vector_reduce_add:
4246 Value *ReductionSrc =
II->getArgOperand(0);
4254 unsigned BitWidth = VecTy->getScalarSizeInBits();
4258 unsigned NumElts = VecTy->getNumElements();
4263 if (OrigIID == Intrinsic::vector_reduce_add && !
isIntN(
BitWidth, NumElts))
4275 unsigned Count = (OrigIID == Intrinsic::vector_reduce_add) ? NumElts : 1;
4278 NegativeVal.negate();
4310 TestsNegative =
false;
4311 }
else if (*CmpVal == NegativeVal) {
4312 TestsNegative =
true;
4316 IsEq = Pred == ICmpInst::ICMP_EQ;
4317 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4319 TestsNegative = (RangeHigh == NegativeVal);
4320 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4322 TestsNegative = (RangeHigh == NegativeVal);
4323 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4325 TestsNegative = (RangeLow == NegativeVal);
4326 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4328 TestsNegative = (RangeLow == NegativeVal);
4371 enum CheckKind :
unsigned {
4378 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4380 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4382 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4386 case Intrinsic::vector_reduce_or:
4387 case Intrinsic::vector_reduce_umax:
4388 Base = TestsNegative ? AnyNeg : AllNonNeg;
4390 case Intrinsic::vector_reduce_and:
4391 case Intrinsic::vector_reduce_umin:
4392 Base = TestsNegative ? AllNeg : AnyNonNeg;
4394 case Intrinsic::vector_reduce_add:
4395 Base = TestsNegative ? AllNeg : AllNonNeg;
4415 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4416 : std::make_pair(MinMax, MinMaxCost);
4420 auto [NewIID, NewCost] = RequiresOr(
Check)
4421 ? PickCheaper(Intrinsic::vector_reduce_or,
4422 Intrinsic::vector_reduce_umax)
4423 : PickCheaper(
Intrinsic::vector_reduce_and,
4426 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4427 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4429 if (NewCost > OldCost)
4435 Type *ScalarTy = VecTy->getScalarType();
4439 replaceValue(
I, *NewCmp);
4464bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4475 switch (
II->getIntrinsicID()) {
4476 case Intrinsic::vector_reduce_add:
4477 case Intrinsic::vector_reduce_or:
4478 case Intrinsic::vector_reduce_umin:
4479 case Intrinsic::vector_reduce_umax:
4480 case Intrinsic::vector_reduce_smin:
4481 case Intrinsic::vector_reduce_smax:
4487 Value *InnerOp =
II->getArgOperand(0);
4530 switch (
II->getIntrinsicID()) {
4531 case Intrinsic::vector_reduce_add: {
4536 unsigned NumElems = XTy->getNumElements();
4542 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4550 case Intrinsic::vector_reduce_smin:
4551 case Intrinsic::vector_reduce_smax:
4561 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4577 case Intrinsic::vector_reduce_add:
4578 case Intrinsic::vector_reduce_or:
4584 case Intrinsic::vector_reduce_umin:
4585 case Intrinsic::vector_reduce_umax:
4586 case Intrinsic::vector_reduce_smin:
4587 case Intrinsic::vector_reduce_smax:
4599 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4601 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4602 << *InnerOp <<
"\n OldCost: " << OldCost
4603 <<
" vs NewCost: " << NewCost <<
"\n");
4609 if (NewCost > OldCost)
4618 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4619 replaceValue(
I, *NewCmp);
4650bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4653 const APInt *CmpVal;
4658 if (!
II || !
II->hasOneUse())
4661 const auto IsValidOrUmaxCmp = [&]() {
4670 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4672 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4673 Pred == ICmpInst::ICMP_SLT;
4674 return IsEquality || IsPositive || IsNegative;
4677 const auto IsValidAndUminCmp = [&]() {
4682 const auto LeadingOnes = CmpVal->
countl_one();
4689 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4698 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4699 return IsEquality || IsNegative || IsPositive;
4707 switch (OriginalIID) {
4708 case Intrinsic::vector_reduce_or:
4709 if (!IsValidOrUmaxCmp())
4711 AlternativeIID = Intrinsic::vector_reduce_umax;
4713 case Intrinsic::vector_reduce_umax:
4714 if (!IsValidOrUmaxCmp())
4716 AlternativeIID = Intrinsic::vector_reduce_or;
4718 case Intrinsic::vector_reduce_and:
4719 if (!IsValidAndUminCmp())
4721 AlternativeIID = Intrinsic::vector_reduce_umin;
4723 case Intrinsic::vector_reduce_umin:
4724 if (!IsValidAndUminCmp())
4726 AlternativeIID = Intrinsic::vector_reduce_and;
4739 if (ReductionOpc != Instruction::ICmp)
4750 <<
"\n OrigCost: " << OrigCost
4751 <<
" vs AltCost: " << AltCost <<
"\n");
4753 if (AltCost >= OrigCost)
4757 Type *ScalarTy = VecTy->getScalarType();
4760 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4762 replaceValue(
I, *NewCmp);
4771 constexpr unsigned MaxVisited = 32;
4774 bool FoundReduction =
false;
4777 while (!WorkList.
empty()) {
4779 for (
User *U :
I->users()) {
4781 if (!UI || !Visited.
insert(UI).second)
4783 if (Visited.
size() > MaxVisited)
4789 switch (
II->getIntrinsicID()) {
4790 case Intrinsic::vector_reduce_add:
4791 case Intrinsic::vector_reduce_mul:
4792 case Intrinsic::vector_reduce_and:
4793 case Intrinsic::vector_reduce_or:
4794 case Intrinsic::vector_reduce_xor:
4795 case Intrinsic::vector_reduce_smin:
4796 case Intrinsic::vector_reduce_smax:
4797 case Intrinsic::vector_reduce_umin:
4798 case Intrinsic::vector_reduce_umax:
4799 FoundReduction =
true;
4812 return FoundReduction;
4825bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
4830 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
4838 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
4840 if (!
I ||
I->getOperand(0)->getType() != VT)
4842 return any_of(
I->users(), [&](User *U) {
4843 return U != Op0 && U != Op1 &&
4844 !(isa<ShuffleVectorInst>(U) &&
4845 (InputShuffles.contains(cast<Instruction>(U)) ||
4846 isInstructionTriviallyDead(cast<Instruction>(U))));
4849 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
4850 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
4858 for (
auto *U :
I->users()) {
4860 if (!SV || SV->getType() != VT)
4862 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
4863 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
4870 if (!collectShuffles(Op0) || !collectShuffles(Op1))
4874 if (FromReduction && Shuffles.
size() > 1)
4879 if (!FromReduction) {
4880 for (ShuffleVectorInst *SV : Shuffles) {
4881 for (
auto *U : SV->users()) {
4884 Shuffles.push_back(SSV);
4896 int MaxV1Elt = 0, MaxV2Elt = 0;
4897 unsigned NumElts = VT->getNumElements();
4898 for (ShuffleVectorInst *SVN : Shuffles) {
4899 SmallVector<int>
Mask;
4900 SVN->getShuffleMask(Mask);
4904 Value *SVOp0 = SVN->getOperand(0);
4905 Value *SVOp1 = SVN->getOperand(1);
4910 for (
int &Elem : Mask) {
4916 if (SVOp0 == Op1 && SVOp1 == Op0) {
4920 if (SVOp0 != Op0 || SVOp1 != Op1)
4926 SmallVector<int> ReconstructMask;
4927 for (
unsigned I = 0;
I <
Mask.size();
I++) {
4930 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
4931 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
4932 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
4933 return Mask[
I] ==
A.first;
4942 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
4943 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
4944 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
4958 sort(ReconstructMask);
4959 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
4967 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
4968 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
4980 if (InputShuffles.contains(SSV))
4982 return SV->getMaskValue(M);
4990 std::pair<int, int>
Y) {
4991 int MXA = GetBaseMaskValue(
A,
X.first);
4992 int MYA = GetBaseMaskValue(
A,
Y.first);
4995 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4996 return SortBase(SVI0A,
A,
B);
4998 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4999 return SortBase(SVI1A,
A,
B);
5004 for (
const auto &Mask : OrigReconstructMasks) {
5005 SmallVector<int> ReconstructMask;
5006 for (
int M : Mask) {
5008 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5009 assert(It !=
V.end() &&
"Expected all entries in Mask");
5010 return std::distance(
V.begin(), It);
5014 else if (M <
static_cast<int>(NumElts)) {
5015 ReconstructMask.
push_back(FindIndex(V1, M));
5017 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5020 ReconstructMasks.
push_back(std::move(ReconstructMask));
5025 SmallVector<int> V1A, V1B, V2A, V2B;
5026 for (
unsigned I = 0;
I < V1.
size();
I++) {
5027 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
5028 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
5030 for (
unsigned I = 0;
I < V2.
size();
I++) {
5031 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5032 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5034 while (V1A.
size() < NumElts) {
5038 while (V2A.
size() < NumElts) {
5050 VT, VT, SV->getShuffleMask(),
CostKind);
5057 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5058 unsigned MaxVectorSize =
5060 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5061 if (MaxElementsInVector == 0)
5070 std::set<SmallVector<int, 4>> UniqueShuffles;
5075 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5076 if (NumFullVectors < 2)
5077 return C + ShuffleCost;
5078 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5079 unsigned NumUniqueGroups = 0;
5080 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5083 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5084 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5085 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5086 if (UniqueShuffles.insert(SubShuffle).second)
5087 NumUniqueGroups += 1;
5089 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5095 SmallVector<int, 16>
Mask;
5096 SV->getShuffleMask(Mask);
5097 return AddShuffleMaskAdjustedCost(
C, Mask);
5100 auto AllShufflesHaveSameOperands =
5101 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5102 if (InputShuffles.size() < 2)
5104 ShuffleVectorInst *FirstSV =
5111 std::next(InputShuffles.begin()), InputShuffles.end(),
5112 [&](Instruction *
I) {
5113 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5114 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5123 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5125 if (AllShufflesHaveSameOperands(InputShuffles)) {
5126 UniqueShuffles.clear();
5127 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5130 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5136 FixedVectorType *Op0SmallVT =
5138 FixedVectorType *Op1SmallVT =
5143 UniqueShuffles.clear();
5144 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5146 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5148 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5151 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5153 <<
" vs CostAfter: " << CostAfter <<
"\n");
5154 if (CostBefore < CostAfter ||
5165 if (InputShuffles.contains(SSV))
5167 return SV->getOperand(
Op);
5171 GetShuffleOperand(SVI0A, 1), V1A);
5174 GetShuffleOperand(SVI0B, 1), V1B);
5177 GetShuffleOperand(SVI1A, 1), V2A);
5180 GetShuffleOperand(SVI1B, 1), V2B);
5185 I->copyIRFlags(Op0,
true);
5190 I->copyIRFlags(Op1,
true);
5192 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5195 replaceValue(*Shuffles[S], *NSV,
false);
5198 Worklist.pushValue(NSV0A);
5199 Worklist.pushValue(NSV0B);
5200 Worklist.pushValue(NSV1A);
5201 Worklist.pushValue(NSV1B);
5211bool VectorCombine::shrinkType(Instruction &
I) {
5212 Value *ZExted, *OtherOperand;
5218 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5222 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5224 if (
I.getOpcode() == Instruction::LShr) {
5241 Instruction::ZExt, BigTy, SmallTy,
5242 TargetTransformInfo::CastContextHint::None,
CostKind);
5247 for (User *U : ZExtOperand->
users()) {
5254 ShrinkCost += ZExtCost;
5269 ShrinkCost += ZExtCost;
5276 Instruction::Trunc, SmallTy, BigTy,
5277 TargetTransformInfo::CastContextHint::None,
CostKind);
5282 if (ShrinkCost > CurrentCost)
5286 Value *Op0 = ZExted;
5289 if (
I.getOperand(0) == OtherOperand)
5296 replaceValue(
I, *NewZExtr);
5302bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5303 Value *DstVec, *SrcVec;
5304 uint64_t ExtIdx, InsIdx;
5314 if (!DstVecTy || !SrcVecTy ||
5320 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5327 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5329 if (NeedDstSrcSwap) {
5331 Mask[InsIdx] = ExtIdx % NumDstElts;
5335 std::iota(
Mask.begin(),
Mask.end(), 0);
5336 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5349 SmallVector<int> ExtToVecMask;
5350 if (!NeedExpOrNarrow) {
5355 nullptr, {DstVec, SrcVec});
5361 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5364 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5368 if (!Ext->hasOneUse())
5371 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5372 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5375 if (OldCost < NewCost)
5378 if (NeedExpOrNarrow) {
5379 if (!NeedDstSrcSwap)
5392 replaceValue(
I, *Shuf);
5401bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5402 const APInt *SplatVal0, *SplatVal1;
5412 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5413 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5422 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5423 << *
I.getType() <<
" is too high.\n");
5427 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5428 NewSplatVal <<= Width;
5429 NewSplatVal |= SplatVal0->
zext(Width * 2);
5431 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5439bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5441 if (!OldLoad || !OldLoad->isSimple())
5448 unsigned const OldNumElements = OldLoadTy->getNumElements();
5454 using IndexRange = std::pair<int, int>;
5455 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5456 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5457 for (llvm::Use &Use :
I.uses()) {
5459 User *Shuffle =
Use.getUser();
5464 return std::nullopt;
5471 for (
int Index : Mask) {
5472 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5473 OutputRange.first = std::min(Index, OutputRange.first);
5474 OutputRange.second = std::max(Index, OutputRange.second);
5479 if (OutputRange.second < OutputRange.first)
5480 return std::nullopt;
5486 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5487 unsigned const NewNumElements = Indices->second + 1u;
5491 if (NewNumElements < OldNumElements) {
5496 Type *ElemTy = OldLoadTy->getElementType();
5498 Value *PtrOp = OldLoad->getPointerOperand();
5501 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
5502 OldLoad->getPointerAddressSpace(),
CostKind);
5505 OldLoad->getPointerAddressSpace(),
CostKind);
5507 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
5509 unsigned const MaxIndex = NewNumElements * 2u;
5511 for (llvm::Use &Use :
I.uses()) {
5518 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
5524 for (
int Index : OldMask) {
5525 if (Index >=
static_cast<int>(MaxIndex))
5539 dbgs() <<
"Found a load used only by shufflevector instructions: "
5540 <<
I <<
"\n OldCost: " << OldCost
5541 <<
" vs NewCost: " << NewCost <<
"\n");
5543 if (OldCost < NewCost || !NewCost.
isValid())
5549 NewLoad->copyMetadata(
I);
5552 for (UseEntry &Use : NewUses) {
5553 ShuffleVectorInst *Shuffle =
Use.first;
5554 std::vector<int> &NewMask =
Use.second;
5561 replaceValue(*Shuffle, *NewShuffle,
false);
5574bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
5576 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
5580 ArrayRef<int> Mask0;
5581 ArrayRef<int> Mask1;
5594 auto const InputNumElements = InputVT->getNumElements();
5596 if (InputNumElements >= ResultVT->getNumElements())
5601 SmallVector<int, 16> NewMask;
5604 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
5605 if (
M0 >= 0 &&
M1 >= 0)
5607 else if (
M0 == -1 &&
M1 == -1)
5620 int MaskOffset = NewMask[0
u];
5621 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
5624 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
5638 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5641 if (NewCost > OldCost)
5653 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
5655 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
5661 replaceValue(*Phi, *NewShuf1);
5667bool VectorCombine::run() {
5681 auto Opcode =
I.getOpcode();
5689 if (IsFixedVectorType) {
5691 case Instruction::InsertElement:
5692 if (vectorizeLoadInsert(
I))
5695 case Instruction::ShuffleVector:
5696 if (widenSubvectorLoad(
I))
5707 if (scalarizeOpOrCmp(
I))
5709 if (scalarizeLoad(
I))
5711 if (scalarizeExtExtract(
I))
5713 if (scalarizeVPIntrinsic(
I))
5715 if (foldInterleaveIntrinsics(
I))
5719 if (Opcode == Instruction::Store)
5720 if (foldSingleElementStore(
I))
5724 if (TryEarlyFoldsOnly)
5731 if (IsFixedVectorType) {
5733 case Instruction::InsertElement:
5734 if (foldInsExtFNeg(
I))
5736 if (foldInsExtBinop(
I))
5738 if (foldInsExtVectorToShuffle(
I))
5741 case Instruction::ShuffleVector:
5742 if (foldPermuteOfBinops(
I))
5744 if (foldShuffleOfBinops(
I))
5746 if (foldShuffleOfSelects(
I))
5748 if (foldShuffleOfCastops(
I))
5750 if (foldShuffleOfShuffles(
I))
5752 if (foldPermuteOfIntrinsic(
I))
5754 if (foldShufflesOfLengthChangingShuffles(
I))
5756 if (foldShuffleOfIntrinsics(
I))
5758 if (foldSelectShuffle(
I))
5760 if (foldShuffleToIdentity(
I))
5763 case Instruction::Load:
5764 if (shrinkLoadForShuffles(
I))
5767 case Instruction::BitCast:
5768 if (foldBitcastShuffle(
I))
5770 if (foldSelectsFromBitcast(
I))
5773 case Instruction::And:
5774 case Instruction::Or:
5775 case Instruction::Xor:
5776 if (foldBitOpOfCastops(
I))
5778 if (foldBitOpOfCastConstant(
I))
5781 case Instruction::PHI:
5782 if (shrinkPhiOfShuffles(
I))
5792 case Instruction::Call:
5793 if (foldShuffleFromReductions(
I))
5795 if (foldCastFromReductions(
I))
5798 case Instruction::ExtractElement:
5799 if (foldShuffleChainsToReduce(
I))
5802 case Instruction::ICmp:
5803 if (foldSignBitReductionCmp(
I))
5805 if (foldICmpEqZeroVectorReduce(
I))
5807 if (foldEquivalentReductionCmp(
I))
5810 case Instruction::FCmp:
5811 if (foldExtractExtract(
I))
5814 case Instruction::Or:
5815 if (foldConcatOfBoolMasks(
I))
5820 if (foldExtractExtract(
I))
5822 if (foldExtractedCmps(
I))
5824 if (foldBinopOfReductions(
I))
5833 bool MadeChange =
false;
5834 for (BasicBlock &BB :
F) {
5846 if (!
I->isDebugOrPseudoInst())
5847 MadeChange |= FoldInst(*
I);
5854 while (!Worklist.isEmpty()) {
5864 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const