43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
80 DT(DT), AA(AA), AC(AC),
DL(
DL), CostKind(CostKind), SQ(*
DL),
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
98 bool TryEarlyFoldsOnly;
113 unsigned PreferredExtractIndex)
const;
117 unsigned PreferredExtractIndex);
145 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
155 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
178 if (
auto *OpI = dyn_cast<Instruction>(
Op)) {
180 OpI,
nullptr,
nullptr, [&](
Value *V) {
181 if (
auto *
I = dyn_cast<Instruction>(V)) {
202 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
203 V = BitCast->getOperand(0);
211 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
212 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
218 Type *ScalarTy = Load->getType()->getScalarType();
221 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
228bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
242 auto *
Load = dyn_cast<LoadInst>(
X);
246 Type *ScalarTy = Scalar->getType();
254 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
255 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
257 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
258 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
259 unsigned OffsetEltIndex = 0;
267 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
278 uint64_t ScalarSizeInBytes = ScalarSize / 8;
279 if (
Offset.urem(ScalarSizeInBytes) != 0)
283 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
284 if (OffsetEltIndex >= MinVecNumElts)
301 unsigned AS =
Load->getPointerAddressSpace();
319 auto *Ty = cast<FixedVectorType>(
I.getType());
320 unsigned OutputNumElts = Ty->getNumElements();
322 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
323 Mask[0] = OffsetEltIndex;
330 if (OldCost < NewCost || !NewCost.
isValid())
341 replaceValue(
I, *VecLd);
351 auto *Shuf = cast<ShuffleVectorInst>(&
I);
352 if (!Shuf->isIdentityWithPadding())
357 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
358 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
359 return M >= (int)(NumOpElts);
362 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
369 auto *Ty = cast<FixedVectorType>(
I.getType());
370 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
371 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
378 unsigned AS =
Load->getPointerAddressSpace();
393 if (OldCost < NewCost || !NewCost.
isValid())
400 replaceValue(
I, *VecLd);
412 assert(Index0C && Index1C &&
"Expected constant extract indexes");
414 unsigned Index0 = Index0C->getZExtValue();
415 unsigned Index1 = Index1C->getZExtValue();
418 if (Index0 == Index1)
442 if (PreferredExtractIndex == Index0)
444 if (PreferredExtractIndex == Index1)
448 return Index0 > Index1 ? Ext0 : Ext1;
460 unsigned PreferredExtractIndex) {
463 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
465 unsigned Opcode =
I.getOpcode();
469 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
478 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
479 "Expected a compare");
489 unsigned Ext0Index = Ext0IndexC->getZExtValue();
490 unsigned Ext1Index = Ext1IndexC->getZExtValue();
504 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
505 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
506 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
511 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
516 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
518 OldCost = CheapExtractCost + ScalarOpCost;
519 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
523 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
524 NewCost = VectorOpCost + CheapExtractCost +
529 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
530 if (ConvertToShuffle) {
541 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
544 ShuffleMask[BestInsIndex] = BestExtIndex;
546 VecTy, VecTy, ShuffleMask,
CostKind, 0,
547 nullptr, {ConvertToShuffle});
550 VecTy, VecTy, {},
CostKind, 0,
nullptr,
558 return OldCost < NewCost;
568 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
570 ShufMask[NewIndex] = OldIndex;
582 if (!isa<FixedVectorType>(
X->getType()))
588 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
589 if (isa<Constant>(
X))
602 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
617 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
623 V1,
"foldExtExtBinop");
627 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
628 VecBOInst->copyIRFlags(&
I);
658 auto *Ext0 = cast<ExtractElementInst>(I0);
659 auto *Ext1 = cast<ExtractElementInst>(I1);
666 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
672 if (ExtractToChange) {
673 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
678 if (ExtractToChange == Ext0)
687 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
688 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
691 replaceValue(
I, *NewExt);
714 auto *VecTy = cast<FixedVectorType>(
I.getType());
716 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
717 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
721 unsigned NumElts = VecTy->getNumElements();
722 if (Index >= NumElts)
729 std::iota(
Mask.begin(),
Mask.end(), 0);
746 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
754 VecTy, SrcVecTy, SrcMask,
CostKind);
757 if (NewCost > OldCost)
772 replaceValue(
I, *NewShuf);
791 auto *ResultTy = dyn_cast<FixedVectorType>(
I.getType());
811 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
813 if (NewCost > OldCost)
823 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
824 NewInst->copyIRFlags(VecBinOp);
825 NewInst->andIRFlags(SclBinOp);
830 replaceValue(
I, *NewBO);
838 auto *BinOp = dyn_cast<BinaryOperator>(&
I);
839 if (!BinOp || !BinOp->isBitwiseLogicOp())
843 auto *LHSCast = dyn_cast<CastInst>(BinOp->getOperand(0));
844 auto *RHSCast = dyn_cast<CastInst>(BinOp->getOperand(1));
845 if (!LHSCast || !RHSCast) {
846 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
852 if (CastOpcode != RHSCast->getOpcode())
856 switch (CastOpcode) {
857 case Instruction::BitCast:
858 case Instruction::Trunc:
859 case Instruction::SExt:
860 case Instruction::ZExt:
866 Value *LHSSrc = LHSCast->getOperand(0);
867 Value *RHSSrc = RHSCast->getOperand(0);
874 auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->
getType());
875 auto *DstVecTy = dyn_cast<FixedVectorType>(
I.getType());
876 if (!SrcVecTy || !DstVecTy)
879 if (!SrcVecTy->getScalarType()->isIntegerTy() ||
880 !DstVecTy->getScalarType()->isIntegerTy())
897 LHSCastCost + RHSCastCost;
908 if (!LHSCast->hasOneUse())
909 NewCost += LHSCastCost;
910 if (!RHSCast->hasOneUse())
911 NewCost += RHSCastCost;
914 <<
" NewCost=" << NewCost <<
"\n");
916 if (NewCost > OldCost)
921 BinOp->getName() +
".inner");
922 if (
auto *NewBinOp = dyn_cast<BinaryOperator>(NewOp))
923 NewBinOp->copyIRFlags(BinOp);
937 replaceValue(
I, *Result);
954 case Instruction::BitCast:
957 case Instruction::Trunc: {
963 Flags.NSW = ZExtC == SExtC;
966 case Instruction::SExt:
967 case Instruction::ZExt: {
971 if (!CastInvC || CastInvC !=
C)
973 if (CastOp == Instruction::ZExt) {
977 Flags.NNeg = CastInvC == SExtInvC;
991bool VectorCombine::foldBitOpOfCastConstant(
Instruction &
I) {
1000 auto *LHSCast = dyn_cast<CastInst>(LHS);
1007 switch (CastOpcode) {
1008 case Instruction::BitCast:
1014 Value *LHSSrc = LHSCast->getOperand(0);
1017 auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->
getType());
1018 auto *DstVecTy = dyn_cast<FixedVectorType>(
I.getType());
1019 if (!SrcVecTy || !DstVecTy)
1022 if (!SrcVecTy->getScalarType()->isIntegerTy() ||
1023 !DstVecTy->getScalarType()->isIntegerTy())
1054 if (!LHSCast->hasOneUse())
1055 NewCost += LHSCastCost;
1057 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1058 <<
" NewCost=" << NewCost <<
"\n");
1060 if (NewCost > OldCost)
1065 LHSSrc, InvC,
I.getName() +
".inner");
1066 if (
auto *NewBinOp = dyn_cast<BinaryOperator>(NewOp))
1067 NewBinOp->copyIRFlags(&
I);
1077 replaceValue(
I, *Result);
1084bool VectorCombine::foldBitcastShuffle(
Instruction &
I) {
1096 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
1097 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1098 if (!DestTy || !SrcTy)
1101 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1102 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1103 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1106 bool IsUnary = isa<UndefValue>(V1);
1113 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1114 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1119 if (DestEltSize <= SrcEltSize) {
1122 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1123 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1128 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1129 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1136 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1137 auto *NewShuffleTy =
1139 auto *OldShuffleTy =
1141 unsigned NumOps = IsUnary ? 1 : 2;
1151 TargetTransformInfo::CastContextHint::None,
1156 TargetTransformInfo::CastContextHint::None,
1159 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1160 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1162 if (NewCost > OldCost || !NewCost.
isValid())
1170 replaceValue(
I, *Shuf);
1177bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
1178 if (!isa<VPIntrinsic>(
I))
1191 if (!ScalarOp0 || !ScalarOp1)
1199 auto IsAllTrueMask = [](
Value *MaskVal) {
1201 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
1202 return ConstValue->isAllOnesValue();
1217 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
1218 Mask.resize(FVTy->getNumElements(), 0);
1227 Args.push_back(
V->getType());
1233 std::optional<unsigned> FunctionalOpcode =
1235 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1236 if (!FunctionalOpcode) {
1255 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1257 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1260 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1263 if (OldCost < NewCost || !NewCost.
isValid())
1274 bool SafeToSpeculate;
1280 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1281 if (!SafeToSpeculate &&
1288 {ScalarOp0, ScalarOp1})
1290 ScalarOp0, ScalarOp1);
1300 auto *UO = dyn_cast<UnaryOperator>(&
I);
1301 auto *BO = dyn_cast<BinaryOperator>(&
I);
1302 auto *CI = dyn_cast<CmpInst>(&
I);
1303 auto *
II = dyn_cast<IntrinsicInst>(&
I);
1304 if (!UO && !BO && !CI && !
II)
1312 if (Arg->getType() !=
II->getType() &&
1322 for (
User *U :
I.users())
1329 std::optional<uint64_t>
Index;
1331 auto Ops =
II ?
II->args() :
I.operands();
1340 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1346 else if (InsIdx != *Index)
1363 if (!
Index.has_value())
1366 VectorType *VecTy = cast<VectorType>(
I.getType());
1367 Type *ScalarTy = VecTy->getScalarType();
1368 assert(VecTy->isVectorTy() &&
1371 "Unexpected types for insert element into binop or cmp");
1373 unsigned Opcode =
I.getOpcode();
1381 }
else if (UO || BO) {
1386 II->getIntrinsicID(), ScalarTy,
1390 II->getIntrinsicID(), VecTy,
1397 Value *NewVecC =
nullptr;
1399 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1402 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1404 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1418 for (
auto [
Idx,
Op, VecC, Scalar] :
enumerate(Ops, VecCs, ScalarOps)) {
1423 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1424 OldCost += InsertCost;
1425 NewCost += !
Op->hasOneUse() * InsertCost;
1429 if (OldCost < NewCost || !NewCost.
isValid())
1439 ++NumScalarIntrinsic;
1445 cast<Constant>(VecC), Builder.
getInt64(*Index));
1449 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1455 Scalar->setName(
I.getName() +
".scalar");
1459 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1460 ScalarInst->copyIRFlags(&
I);
1463 replaceValue(
I, *Insert);
1471 auto *BI = dyn_cast<BinaryOperator>(&
I);
1475 if (!BI || !
I.getType()->isIntegerTy(1))
1480 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1500 auto *Ext0 = cast<ExtractElementInst>(I0);
1501 auto *Ext1 = cast<ExtractElementInst>(I1);
1505 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1506 "Unknown ExtractElementInst");
1511 unsigned CmpOpcode =
1513 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1526 Ext0Cost + Ext1Cost + CmpCost * 2 +
1532 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1533 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1538 ShufMask[CheapIndex] = ExpensiveIndex;
1543 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1544 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1549 if (OldCost < NewCost || !NewCost.
isValid())
1559 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1560 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1563 replaceValue(
I, *NewExt);
1574 auto *RedOp = dyn_cast<Instruction>(
II.getOperand(0));
1575 auto *VecRedTy = cast<VectorType>(
II.getOperand(0)->getType());
1576 unsigned ReductionOpc =
1579 bool IsUnsigned = isa<ZExtInst>(RedOp);
1580 auto *ExtType = cast<VectorType>(RedOp->getOperand(0)->getType());
1582 CostBeforeReduction =
1585 CostAfterReduction =
1590 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1596 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1598 bool IsUnsigned = isa<ZExtInst>(Op0);
1611 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1613 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1620bool VectorCombine::foldBinopOfReductions(
Instruction &
I) {
1623 if (BinOpOpc == Instruction::Sub)
1624 ReductionIID = Intrinsic::vector_reduce_add;
1628 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1630 auto *
II = dyn_cast<IntrinsicInst>(V);
1633 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1634 return II->getArgOperand(0);
1638 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1641 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1645 auto *VTy = cast<VectorType>(V0->
getType());
1648 const auto &II0 = *cast<IntrinsicInst>(
I.getOperand(0));
1649 const auto &II1 = *cast<IntrinsicInst>(
I.getOperand(1));
1650 unsigned ReductionOpc =
1663 CostOfRedOperand0 + CostOfRedOperand1 +
1666 if (NewCost >= OldCost || !NewCost.
isValid())
1670 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1673 if (BinOpOpc == Instruction::Or)
1674 VectorBO = Builder.
CreateOr(V0, V1,
"",
1675 cast<PossiblyDisjointInst>(
I).isDisjoint());
1680 replaceValue(
I, *Rdx);
1688 unsigned NumScanned = 0;
1698class ScalarizationResult {
1699 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1704 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1708 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1709 ~ScalarizationResult() {
1710 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1713 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1714 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1715 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1716 return {StatusTy::SafeWithFreeze, ToFreeze};
1720 bool isSafe()
const {
return Status == StatusTy::Safe; }
1722 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1725 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1730 Status = StatusTy::Unsafe;
1735 assert(isSafeWithFreeze() &&
1736 "should only be used when freezing is required");
1738 "UserI must be a user of ToFreeze");
1744 if (
U.get() == ToFreeze)
1761 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1762 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1764 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1765 if (
C->getValue().ult(NumElements))
1766 return ScalarizationResult::safe();
1767 return ScalarizationResult::unsafe();
1772 return ScalarizationResult::unsafe();
1774 APInt Zero(IntWidth, 0);
1775 APInt MaxElts(IntWidth, NumElements);
1781 true, &AC, CtxI, &DT)))
1782 return ScalarizationResult::safe();
1783 return ScalarizationResult::unsafe();
1796 if (ValidIndices.
contains(IdxRange))
1797 return ScalarizationResult::safeWithFreeze(IdxBase);
1798 return ScalarizationResult::unsafe();
1808 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1810 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1822bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1825 auto *
SI = cast<StoreInst>(&
I);
1826 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1834 if (!
match(
SI->getValueOperand(),
1839 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1840 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1841 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1844 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1845 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1846 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1850 if (ScalarizableIdx.isUnsafe() ||
1857 Worklist.
push(Load);
1859 if (ScalarizableIdx.isSafeWithFreeze())
1860 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1862 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1863 {ConstantInt::get(Idx->getType(), 0), Idx});
1870 replaceValue(
I, *NSI);
1879bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1887 auto *LI = cast<LoadInst>(&
I);
1888 auto *VecTy = cast<VectorType>(LI->getType());
1889 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1894 LI->getPointerAddressSpace(),
CostKind);
1898 unsigned NumInstChecked = 0;
1902 for (
auto &Pair : NeedFreeze)
1903 Pair.second.discard();
1910 auto *UI = dyn_cast<ExtractElementInst>(U);
1911 if (!UI || UI->getParent() != LI->getParent())
1916 if (UI->use_empty())
1923 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1930 LastCheckedInst = UI;
1935 if (ScalarIdx.isUnsafe())
1937 if (ScalarIdx.isSafeWithFreeze()) {
1939 ScalarIdx.discard();
1942 auto *
Index = dyn_cast<ConstantInt>(UI->getIndexOperand());
1945 Index ?
Index->getZExtValue() : -1);
1954 <<
"\n LoadExtractCost: " << OriginalCost
1955 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1957 if (ScalarizedCost >= OriginalCost)
1964 Type *ElemType = VecTy->getElementType();
1968 auto *EI = cast<ExtractElementInst>(U);
1969 Value *
Idx = EI->getIndexOperand();
1972 auto It = NeedFreeze.
find(EI);
1973 if (It != NeedFreeze.
end())
1974 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1979 auto *NewLoad = cast<LoadInst>(
1980 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1982 Align ScalarOpAlignment =
1984 NewLoad->setAlignment(ScalarOpAlignment);
1986 if (
auto *ConstIdx = dyn_cast<ConstantInt>(
Idx)) {
1987 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1988 AAMDNodes OldAAMD = LI->getAAMetadata();
1992 replaceValue(*EI, *NewLoad,
false);
1995 FailureGuard.release();
1999bool VectorCombine::scalarizeExtExtract(
Instruction &
I) {
2002 auto *
Ext = dyn_cast<ZExtInst>(&
I);
2009 auto *SrcTy = dyn_cast<FixedVectorType>(
Ext->getOperand(0)->getType());
2012 auto *DstTy = cast<FixedVectorType>(
Ext->getType());
2014 Type *ScalarDstTy = DstTy->getElementType();
2015 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2021 unsigned ExtCnt = 0;
2022 bool ExtLane0 =
false;
2023 for (
User *U :
Ext->users()) {
2027 if (cast<Instruction>(U)->use_empty())
2037 Instruction::And, ScalarDstTy,
CostKind,
2040 (ExtCnt - ExtLane0) *
2042 Instruction::LShr, ScalarDstTy,
CostKind,
2045 if (ScalarCost > VectorCost)
2048 Value *ScalarV =
Ext->getOperand(0);
2055 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2056 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2057 for (
User *U :
Ext->users()) {
2058 auto *Extract = cast<ExtractElementInst>(U);
2060 cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
2063 U->replaceAllUsesWith(
And);
2071bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
2072 Type *Ty =
I.getType();
2077 if (
DL->isBigEndian())
2104 if (ShAmtX > ShAmtY) {
2112 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2113 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2115 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
2118 MaskTy->getNumElements() != ShAmtDiff ||
2119 MaskTy->getNumElements() > (
BitWidth / 2))
2128 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2145 if (Ty != ConcatIntTy)
2151 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2152 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2155 if (NewCost > OldCost)
2165 if (Ty != ConcatIntTy) {
2175 replaceValue(
I, *Result);
2181bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
2192 Value *Op00, *Op01, *Op10, *Op11;
2200 if (!Match0 && !Match1)
2209 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2210 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
2211 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
2212 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
2213 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2216 unsigned NumSrcElts = BinOpTy->getNumElements();
2220 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
2221 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2226 for (
int M : OuterMask) {
2227 if (M < 0 || M >= (
int)NumSrcElts) {
2231 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2232 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2236 unsigned NumOpElts = Op0Ty->getNumElements();
2237 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2238 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2240 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2241 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2248 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2252 0,
nullptr, {Op00, Op01}, cast<Instruction>(BinOp->
getOperand(0)));
2256 0,
nullptr, {Op10, Op11}, cast<Instruction>(BinOp->
getOperand(1)));
2264 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2268 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2270 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2271 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2275 if (NewCost > OldCost)
2285 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
2286 NewInst->copyIRFlags(BinOp);
2290 replaceValue(
I, *NewBO);
2296bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
2304 if (
LHS->getOpcode() !=
RHS->getOpcode())
2308 bool IsCommutative =
false;
2313 auto *BO = cast<BinaryOperator>(LHS);
2317 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2321 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
2325 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2326 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
2327 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
2328 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2331 unsigned NumSrcElts = BinOpTy->getNumElements();
2334 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2337 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2338 if (M >= (
int)NumSrcElts)
2378 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2390 bool ReducedInstCount =
false;
2391 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2392 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2393 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2394 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2396 auto *ShuffleCmpTy =
2413 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2418 ReducedInstCount |= (isa<Constant>(
X) && isa<Constant>(Z)) ||
2419 (isa<Constant>(
Y) && isa<Constant>(W));
2420 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2427 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
2428 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2431 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
2432 NewInst->copyIRFlags(LHS);
2433 NewInst->andIRFlags(RHS);
2438 replaceValue(
I, *NewBO);
2445bool VectorCombine::foldShuffleOfSelects(
Instruction &
I) {
2447 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2454 auto *C1VecTy = dyn_cast<FixedVectorType>(C1->
getType());
2455 auto *C2VecTy = dyn_cast<FixedVectorType>(C2->
getType());
2456 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2459 auto *SI0FOp = dyn_cast<FPMathOperator>(
I.getOperand(0));
2460 auto *SI1FOp = dyn_cast<FPMathOperator>(
I.getOperand(1));
2462 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2463 ((SI0FOp !=
nullptr) &&
2464 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2467 auto *SrcVecTy = cast<FixedVectorType>(
T1->getType());
2468 auto *DstVecTy = cast<FixedVectorType>(
I.getType());
2470 auto SelOp = Instruction::Select;
2477 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2481 Mask,
CostKind, 0,
nullptr, {C1, C2});
2486 auto *C1C2ShuffledVecTy = cast<FixedVectorType>(
2492 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2494 if (NewCost > OldCost)
2503 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2504 SI0FOp->getFastMathFlags());
2506 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2511 replaceValue(
I, *NewSel);
2517bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
2523 auto *C0 = dyn_cast<CastInst>(V0);
2524 auto *C1 = dyn_cast<CastInst>(V1);
2529 if (C0->getSrcTy() != C1->getSrcTy())
2533 if (Opcode != C1->getOpcode()) {
2535 Opcode = Instruction::SExt;
2540 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2541 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
2542 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
2543 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2546 unsigned NumSrcElts = CastSrcTy->getNumElements();
2547 unsigned NumDstElts = CastDstTy->getNumElements();
2548 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2549 "Only bitcasts expected to alter src/dst element counts");
2553 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2554 (NumDstElts % NumSrcElts) != 0)
2558 if (NumSrcElts >= NumDstElts) {
2561 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2562 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2567 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2568 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2573 auto *NewShuffleDstTy =
2586 CastDstTy, OldMask,
CostKind, 0,
nullptr, {}, &
I);
2599 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2601 if (NewCost > OldCost)
2605 C1->getOperand(0), NewMask);
2609 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
2610 NewInst->copyIRFlags(C0);
2611 NewInst->andIRFlags(C1);
2615 replaceValue(
I, *Cast);
2625bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
2627 Value *OuterV0, *OuterV1;
2633 Value *X0, *X1, *Y0, *Y1;
2638 if (!Match0 && !Match1)
2644 if (!Match1 && isa<PoisonValue>(OuterV1)) {
2648 InnerMask1 = PoisonMask1;
2652 X0 = Match0 ? X0 : OuterV0;
2653 Y0 = Match0 ? Y0 : OuterV0;
2654 X1 = Match1 ? X1 : OuterV1;
2655 Y1 = Match1 ? Y1 : OuterV1;
2656 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2657 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(X0->
getType());
2658 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(OuterV0->
getType());
2659 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2663 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2664 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2670 Value *NewX =
nullptr, *NewY =
nullptr;
2671 for (
int &M : NewMask) {
2672 Value *Src =
nullptr;
2673 if (0 <= M && M < (
int)NumImmElts) {
2677 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2678 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2680 }
else if (M >= (
int)NumImmElts) {
2685 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2686 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2690 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2691 if (isa<UndefValue>(Src)) {
2694 if (!isa<PoisonValue>(Src))
2699 if (!NewX || NewX == Src) {
2703 if (!NewY || NewY == Src) {
2719 replaceValue(
I, *NewX);
2736 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2742 nullptr, {NewX, NewY});
2744 NewCost += InnerCost0;
2746 NewCost += InnerCost1;
2749 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2751 if (NewCost > OldCost)
2755 replaceValue(
I, *Shuf);
2761bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
2768 auto *II0 = dyn_cast<IntrinsicInst>(V0);
2769 auto *II1 = dyn_cast<IntrinsicInst>(V1);
2774 if (IID != II1->getIntrinsicID())
2777 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2778 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
2779 if (!ShuffleDstTy || !II0Ty)
2785 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2787 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2794 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2798 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I) {
2800 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2802 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2804 ShuffleDstTy->getNumElements());
2814 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2817 if (NewCost > OldCost)
2821 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2826 II1->getArgOperand(
I), OldMask);
2833 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2835 NewInst->andIRFlags(II1);
2838 replaceValue(
I, *NewIntrinsic);
2845 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2847 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2848 int M = SV->getMaskValue(Lane);
2851 if (
static_cast<unsigned>(M) < NumElts) {
2852 U = &SV->getOperandUse(0);
2855 U = &SV->getOperandUse(1);
2866 auto [U, Lane] = IL;
2879 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2880 unsigned NumElts = Ty->getNumElements();
2881 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2887 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2893 unsigned NumSlices = Item.
size() / NumElts;
2898 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2899 Use *SliceV = Item[Slice * NumElts].first;
2900 if (!SliceV || SliceV->get()->
getType() != Ty)
2902 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2903 auto [V, Lane] = Item[Slice * NumElts + Elt];
2904 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2917 auto [FrontU, FrontLane] = Item.
front();
2919 if (IdentityLeafs.
contains(FrontU)) {
2920 return FrontU->get();
2926 if (ConcatLeafs.
contains(FrontU)) {
2928 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2930 for (
unsigned S = 0; S < Values.
size(); ++S)
2931 Values[S] = Item[S * NumElts].first->get();
2933 while (Values.
size() > 1) {
2936 std::iota(Mask.begin(), Mask.end(), 0);
2938 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2946 auto *
I = cast<Instruction>(FrontU->get());
2947 auto *
II = dyn_cast<IntrinsicInst>(
I);
2948 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2950 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2957 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2962 for (
const auto &Lane : Item)
2968 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2974 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2975 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2979 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2984 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2994 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
3004bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
3005 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
3006 if (!Ty ||
I.use_empty())
3010 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
3016 unsigned NumVisited = 0;
3018 while (!Worklist.
empty()) {
3023 auto [FrontU, FrontLane] = Item.
front();
3031 return X->getType() ==
Y->getType() &&
3036 if (FrontLane == 0 &&
3037 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
3038 Ty->getNumElements() &&
3041 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
3042 E.value().second == (
int)E.index());
3044 IdentityLeafs.
insert(FrontU);
3048 if (
auto *
C = dyn_cast<Constant>(FrontU);
3049 C &&
C->getSplatValue() &&
3053 return !U || (isa<Constant>(
U->get()) &&
3054 cast<Constant>(
U->get())->getSplatValue() ==
3055 cast<Constant>(FrontV)->getSplatValue());
3057 SplatLeafs.
insert(FrontU);
3062 auto [FrontU, FrontLane] = Item.
front();
3063 auto [
U, Lane] = IL;
3064 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3066 SplatLeafs.
insert(FrontU);
3072 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3076 Value *
V = IL.first->get();
3077 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUser())
3081 if (
auto *CI = dyn_cast<CmpInst>(V))
3082 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
3084 if (
auto *CI = dyn_cast<CastInst>(V))
3085 if (CI->getSrcTy()->getScalarType() !=
3086 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
3088 if (
auto *SI = dyn_cast<SelectInst>(V))
3089 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
3090 SI->getOperand(0)->getType() !=
3091 cast<SelectInst>(FrontV)->getOperand(0)->getType())
3093 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
3095 auto *
II = dyn_cast<IntrinsicInst>(V);
3096 return !
II || (isa<IntrinsicInst>(FrontV) &&
3097 II->getIntrinsicID() ==
3098 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
3099 !
II->hasOperandBundles());
3103 if (isa<BinaryOperator, CmpInst>(FrontU)) {
3105 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
3106 BO && BO->isIntDivRem())
3115 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
3117 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
3118 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
3119 if (DstTy && SrcTy &&
3120 SrcTy->getNumElements() == DstTy->getNumElements()) {
3124 }
else if (isa<SelectInst>(FrontU)) {
3129 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
3131 !
II->hasOperandBundles()) {
3132 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
3138 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
3139 cast<Instruction>(FrontV)->getOperand(
Op));
3151 ConcatLeafs.
insert(FrontU);
3158 if (NumVisited <= 1)
3161 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3167 ConcatLeafs, Builder, &
TTI);
3168 replaceValue(
I, *V);
3175bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
3176 auto *
II = dyn_cast<IntrinsicInst>(&
I);
3179 switch (
II->getIntrinsicID()) {
3180 case Intrinsic::vector_reduce_add:
3181 case Intrinsic::vector_reduce_mul:
3182 case Intrinsic::vector_reduce_and:
3183 case Intrinsic::vector_reduce_or:
3184 case Intrinsic::vector_reduce_xor:
3185 case Intrinsic::vector_reduce_smin:
3186 case Intrinsic::vector_reduce_smax:
3187 case Intrinsic::vector_reduce_umin:
3188 case Intrinsic::vector_reduce_umax:
3197 std::queue<Value *> Worklist;
3200 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
3203 while (!Worklist.empty()) {
3204 Value *CV = Worklist.front();
3215 if (
auto *CI = dyn_cast<Instruction>(CV)) {
3216 if (CI->isBinaryOp()) {
3217 for (
auto *
Op : CI->operand_values())
3220 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
3221 if (Shuffle && Shuffle != SV)
3238 for (
auto *V : Visited)
3239 for (
auto *U :
V->users())
3240 if (!Visited.contains(U) && U != &
I)
3244 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
3249 if (!ShuffleInputType)
3257 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
3258 bool UsesSecondVec =
3259 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3266 ShuffleInputType, ConcatMask,
CostKind);
3268 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3270 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3272 bool MadeChanges =
false;
3273 if (NewCost < OldCost) {
3277 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3278 replaceValue(*Shuffle, *NewShuffle);
3284 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3330bool VectorCombine::foldShuffleChainsToReduce(
Instruction &
I) {
3332 std::queue<Value *> InstWorklist;
3336 std::optional<unsigned int> CommonCallOp = std::nullopt;
3337 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3339 bool IsFirstCallOrBinInst =
true;
3340 bool ShouldBeCallOrBinInst =
true;
3352 auto *FVT = dyn_cast<FixedVectorType>(VecOpEE->
getType());
3356 int64_t
VecSize = FVT->getNumElements();
3362 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3363 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3373 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3374 Cur = (Cur + 1) / 2, --Mask) {
3376 ExpectedParityMask |= (1ll <<
Mask);
3379 InstWorklist.push(VecOpEE);
3381 while (!InstWorklist.empty()) {
3382 Value *CI = InstWorklist.front();
3385 if (
auto *
II = dyn_cast<IntrinsicInst>(CI)) {
3386 if (!ShouldBeCallOrBinInst)
3389 if (!IsFirstCallOrBinInst &&
3390 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3395 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3397 IsFirstCallOrBinInst =
false;
3400 CommonCallOp =
II->getIntrinsicID();
3401 if (
II->getIntrinsicID() != *CommonCallOp)
3404 switch (
II->getIntrinsicID()) {
3405 case Intrinsic::umin:
3406 case Intrinsic::umax:
3407 case Intrinsic::smin:
3408 case Intrinsic::smax: {
3409 auto *Op0 =
II->getOperand(0);
3410 auto *Op1 =
II->getOperand(1);
3418 ShouldBeCallOrBinInst ^= 1;
3421 *CommonCallOp,
II->getType(),
3422 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3427 if (!isa<ShuffleVectorInst>(PrevVecV[1]))
3429 InstWorklist.push(PrevVecV[1]);
3430 InstWorklist.push(PrevVecV[0]);
3431 }
else if (
auto *BinOp = dyn_cast<BinaryOperator>(CI)) {
3434 if (!ShouldBeCallOrBinInst)
3437 if (!IsFirstCallOrBinInst &&
3438 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3441 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3443 IsFirstCallOrBinInst =
false;
3451 switch (*CommonBinOp) {
3452 case BinaryOperator::Add:
3453 case BinaryOperator::Mul:
3454 case BinaryOperator::Or:
3455 case BinaryOperator::And:
3456 case BinaryOperator::Xor: {
3466 ShouldBeCallOrBinInst ^= 1;
3471 if (!isa<ShuffleVectorInst>(PrevVecV[1]))
3473 InstWorklist.push(PrevVecV[1]);
3474 InstWorklist.push(PrevVecV[0]);
3475 }
else if (
auto *SVInst = dyn_cast<ShuffleVectorInst>(CI)) {
3478 if (ShouldBeCallOrBinInst ||
3479 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3482 if (SVInst != PrevVecV[1])
3491 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3492 if (Mask < ShuffleMaskHalf &&
3493 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3495 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3500 ShuffleMaskHalf *= 2;
3501 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3502 ExpectedParityMask >>= 1;
3505 SVInst->getType(), SVInst->getType(),
3509 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3512 ShouldBeCallOrBinInst ^= 1;
3519 if (ShouldBeCallOrBinInst)
3522 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3524 Value *FinalVecV = PrevVecV[0];
3528 auto *FinalVecVTy = cast<FixedVectorType>(FinalVecV->
getType());
3539 if (NewCost >= OrigCost)
3542 auto *ReducedResult =
3544 replaceValue(
I, *ReducedResult);
3553bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
3554 auto *
II = dyn_cast<IntrinsicInst>(&
I);
3558 bool TruncOnly =
false;
3561 case Intrinsic::vector_reduce_add:
3562 case Intrinsic::vector_reduce_mul:
3565 case Intrinsic::vector_reduce_and:
3566 case Intrinsic::vector_reduce_or:
3567 case Intrinsic::vector_reduce_xor:
3574 Value *ReductionSrc =
I.getOperand(0);
3584 auto *SrcTy = cast<VectorType>(Src->getType());
3585 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
3586 Type *ResultTy =
I.getType();
3589 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3592 cast<CastInst>(ReductionSrc));
3599 if (OldCost <= NewCost || !NewCost.
isValid())
3603 II->getIntrinsicID(), {Src});
3605 replaceValue(
I, *NewCast);
3614 constexpr unsigned MaxVisited = 32;
3617 bool FoundReduction =
false;
3620 while (!WorkList.
empty()) {
3622 for (
User *U :
I->users()) {
3623 auto *UI = cast<Instruction>(U);
3624 if (!UI || !Visited.
insert(UI).second)
3626 if (Visited.
size() > MaxVisited)
3628 if (
auto *
II = dyn_cast<IntrinsicInst>(UI)) {
3632 switch (
II->getIntrinsicID()) {
3633 case Intrinsic::vector_reduce_add:
3634 case Intrinsic::vector_reduce_mul:
3635 case Intrinsic::vector_reduce_and:
3636 case Intrinsic::vector_reduce_or:
3637 case Intrinsic::vector_reduce_xor:
3638 case Intrinsic::vector_reduce_smin:
3639 case Intrinsic::vector_reduce_smax:
3640 case Intrinsic::vector_reduce_umin:
3641 case Intrinsic::vector_reduce_umax:
3642 FoundReduction =
true;
3649 if (!isa<BinaryOperator>(UI) && !isa<ShuffleVectorInst>(UI))
3655 return FoundReduction;
3668bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
3669 auto *SVI = cast<ShuffleVectorInst>(&
I);
3670 auto *VT = cast<FixedVectorType>(
I.getType());
3671 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
3672 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
3673 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3677 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
3678 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
3679 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
3680 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
3683 if (!
I ||
I->getOperand(0)->getType() != VT)
3686 return U != Op0 && U != Op1 &&
3687 !(isa<ShuffleVectorInst>(U) &&
3688 (InputShuffles.contains(cast<Instruction>(U)) ||
3689 isInstructionTriviallyDead(cast<Instruction>(U))));
3692 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3693 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3701 for (
auto *U :
I->users()) {
3702 auto *SV = dyn_cast<ShuffleVectorInst>(U);
3703 if (!SV || SV->getType() != VT)
3705 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3706 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3713 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3717 if (FromReduction && Shuffles.
size() > 1)
3722 if (!FromReduction) {
3724 for (
auto *U : SV->users()) {
3727 Shuffles.push_back(SSV);
3739 int MaxV1Elt = 0, MaxV2Elt = 0;
3740 unsigned NumElts = VT->getNumElements();
3743 SVN->getShuffleMask(Mask);
3747 Value *SVOp0 = SVN->getOperand(0);
3748 Value *SVOp1 = SVN->getOperand(1);
3749 if (isa<UndefValue>(SVOp1)) {
3750 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
3753 for (
int &Elem : Mask) {
3759 if (SVOp0 == Op1 && SVOp1 == Op0) {
3763 if (SVOp0 != Op0 || SVOp1 != Op1)
3770 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3773 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3774 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3775 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3776 return Mask[
I] ==
A.first;
3785 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3786 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3787 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3801 sort(ReconstructMask);
3802 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3810 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3811 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3818 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
3821 if (isa<UndefValue>(SV->getOperand(1)))
3822 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
3823 if (InputShuffles.contains(SSV))
3825 return SV->getMaskValue(M);
3833 std::pair<int, int>
Y) {
3834 int MXA = GetBaseMaskValue(
A,
X.first);
3835 int MYA = GetBaseMaskValue(
A,
Y.first);
3838 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3839 return SortBase(SVI0A,
A,
B);
3841 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3842 return SortBase(SVI1A,
A,
B);
3847 for (
const auto &Mask : OrigReconstructMasks) {
3849 for (
int M : Mask) {
3851 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3852 assert(It !=
V.end() &&
"Expected all entries in Mask");
3853 return std::distance(
V.begin(), It);
3857 else if (M <
static_cast<int>(NumElts)) {
3858 ReconstructMask.
push_back(FindIndex(V1, M));
3860 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3863 ReconstructMasks.push_back(std::move(ReconstructMask));
3869 for (
unsigned I = 0;
I < V1.
size();
I++) {
3870 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3871 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3873 for (
unsigned I = 0;
I < V2.
size();
I++) {
3874 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3875 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3877 while (V1A.
size() < NumElts) {
3881 while (V2A.
size() < NumElts) {
3887 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
3893 VT, VT, SV->getShuffleMask(),
CostKind);
3900 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
3901 unsigned MaxVectorSize =
3903 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
3911 std::set<SmallVector<int, 4>> UniqueShuffles;
3916 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
3917 if (NumFullVectors < 2)
3918 return C + ShuffleCost;
3920 unsigned NumUniqueGroups = 0;
3921 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
3924 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
3925 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
3926 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
3927 if (UniqueShuffles.insert(SubShuffle).second)
3928 NumUniqueGroups += 1;
3930 return C + ShuffleCost * NumUniqueGroups / NumGroups;
3933 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
3937 SV->getShuffleMask(Mask);
3938 return AddShuffleMaskAdjustedCost(
C, Mask);
3941 auto AllShufflesHaveSameOperands =
3943 if (InputShuffles.size() < 2)
3946 dyn_cast<ShuffleVectorInst>(*InputShuffles.begin());
3952 std::next(InputShuffles.begin()), InputShuffles.end(),
3954 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
3955 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
3964 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
3966 if (AllShufflesHaveSameOperands(InputShuffles)) {
3967 UniqueShuffles.clear();
3968 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3971 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3984 UniqueShuffles.clear();
3985 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
3987 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
3989 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
3992 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
3994 <<
" vs CostAfter: " << CostAfter <<
"\n");
3995 if (CostBefore < CostAfter ||
4001 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
4004 if (isa<UndefValue>(SV->getOperand(1)))
4005 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
4006 if (InputShuffles.contains(SSV))
4008 return SV->getOperand(
Op);
4012 GetShuffleOperand(SVI0A, 1), V1A);
4015 GetShuffleOperand(SVI0B, 1), V1B);
4018 GetShuffleOperand(SVI1A, 1), V2A);
4021 GetShuffleOperand(SVI1B, 1), V2B);
4025 if (
auto *
I = dyn_cast<Instruction>(NOp0))
4026 I->copyIRFlags(Op0,
true);
4030 if (
auto *
I = dyn_cast<Instruction>(NOp1))
4031 I->copyIRFlags(Op1,
true);
4033 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
4036 replaceValue(*Shuffles[S], *NSV,
false);
4039 Worklist.pushValue(NSV0A);
4040 Worklist.pushValue(NSV0B);
4041 Worklist.pushValue(NSV1A);
4042 Worklist.pushValue(NSV1B);
4053 Value *ZExted, *OtherOperand;
4059 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4061 auto *BigTy = cast<FixedVectorType>(
I.getType());
4062 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
4063 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4065 if (
I.getOpcode() == Instruction::LShr) {
4082 Instruction::ZExt, BigTy, SmallTy,
4083 TargetTransformInfo::CastContextHint::None,
CostKind);
4089 auto *UI = cast<Instruction>(U);
4095 ShrinkCost += ZExtCost;
4110 ShrinkCost += ZExtCost;
4115 if (!isa<Constant>(OtherOperand))
4117 Instruction::Trunc, SmallTy, BigTy,
4118 TargetTransformInfo::CastContextHint::None,
CostKind);
4123 if (ShrinkCost > CurrentCost)
4127 Value *Op0 = ZExted;
4130 if (
I.getOperand(0) == OtherOperand)
4134 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
4135 cast<Instruction>(NewBinOp)->copyMetadata(
I);
4137 replaceValue(
I, *NewZExtr);
4143bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
4144 Value *DstVec, *SrcVec;
4152 auto *DstVecTy = dyn_cast<FixedVectorType>(
I.getType());
4153 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
4155 if (!DstVecTy || !SrcVecTy ||
4156 SrcVecTy->getElementType() != DstVecTy->getElementType())
4159 unsigned NumDstElts = DstVecTy->getNumElements();
4160 unsigned NumSrcElts = SrcVecTy->getNumElements();
4161 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4168 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4169 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4170 bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
4171 if (NeedDstSrcSwap) {
4173 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4176 Mask[InsIdx] = ExtIdx;
4180 std::iota(
Mask.begin(),
Mask.end(), 0);
4181 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4182 Mask[InsIdx] = NumDstElts;
4184 Mask[InsIdx] = ExtIdx + NumDstElts;
4188 auto *
Ins = cast<InsertElementInst>(&
I);
4189 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
4198 if (!NeedExpOrNarrow) {
4203 nullptr, {DstVec, SrcVec});
4209 if (IsExtIdxInBounds)
4210 ExtToVecMask[ExtIdx] = ExtIdx;
4212 ExtToVecMask[0] = ExtIdx;
4215 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4219 if (!
Ext->hasOneUse())
4222 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4223 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4226 if (OldCost < NewCost)
4229 if (NeedExpOrNarrow) {
4230 if (!NeedDstSrcSwap)
4237 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
4243 replaceValue(
I, *Shuf);
4252bool VectorCombine::foldInterleaveIntrinsics(
Instruction &
I) {
4253 const APInt *SplatVal0, *SplatVal1;
4254 if (!
match(&
I, m_Intrinsic<Intrinsic::vector_interleave2>(
4262 cast<VectorType>(cast<IntrinsicInst>(
I).getArgOperand(0)->
getType());
4263 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4264 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4273 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4274 << *
I.getType() <<
" is too high.\n");
4278 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4279 NewSplatVal <<= Width;
4280 NewSplatVal |= SplatVal0->
zext(Width * 2);
4282 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4290bool VectorCombine::shrinkLoadForShuffles(
Instruction &
I) {
4291 auto *OldLoad = dyn_cast<LoadInst>(&
I);
4292 if (!OldLoad || !OldLoad->isSimple())
4295 auto *OldLoadTy = dyn_cast<FixedVectorType>(OldLoad->getType());
4299 unsigned const OldNumElements = OldLoadTy->getNumElements();
4305 using IndexRange = std::pair<int, int>;
4306 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4307 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4315 return std::nullopt;
4322 for (
int Index : Mask) {
4323 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4324 OutputRange.first = std::min(Index, OutputRange.first);
4325 OutputRange.second = std::max(Index, OutputRange.second);
4330 if (OutputRange.second < OutputRange.first)
4331 return std::nullopt;
4337 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4338 unsigned const NewNumElements = Indices->second + 1u;
4342 if (NewNumElements < OldNumElements) {
4347 Type *ElemTy = OldLoadTy->getElementType();
4349 Value *PtrOp = OldLoad->getPointerOperand();
4352 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4353 OldLoad->getPointerAddressSpace(),
CostKind);
4356 OldLoad->getPointerAddressSpace(),
CostKind);
4358 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4360 unsigned const MaxIndex = NewNumElements * 2u;
4363 auto *Shuffle = cast<ShuffleVectorInst>(
Use.
getUser());
4370 for (
int Index : OldMask) {
4371 if (Index >=
static_cast<int>(MaxIndex))
4385 dbgs() <<
"Found a load used only by shufflevector instructions: "
4386 <<
I <<
"\n OldCost: " << OldCost
4387 <<
" vs NewCost: " << NewCost <<
"\n");
4389 if (OldCost < NewCost || !NewCost.
isValid())
4393 auto *NewLoad = cast<LoadInst>(
4395 NewLoad->copyMetadata(
I);
4398 for (UseEntry &
Use : NewUses) {
4400 std::vector<int> &NewMask =
Use.second;
4407 replaceValue(*Shuffle, *NewShuffle,
false);
4420bool VectorCombine::shrinkPhiOfShuffles(
Instruction &
I) {
4421 auto *
Phi = dyn_cast<PHINode>(&
I);
4422 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4435 auto *Shuf = cast<ShuffleVectorInst>(
Phi->getOperand(0u));
4438 auto *InputVT = cast<FixedVectorType>(
Op->getType());
4439 auto *ResultVT = cast<FixedVectorType>(Shuf->
getType());
4440 auto const InputNumElements = InputVT->getNumElements();
4442 if (InputNumElements >= ResultVT->getNumElements())
4450 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4451 if (
M0 >= 0 &&
M1 >= 0)
4453 else if (
M0 == -1 &&
M1 == -1)
4466 int MaskOffset = NewMask[0
u];
4467 unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
4470 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4484 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4487 if (NewCost > OldCost)
4495 Worklist.push(cast<Instruction>(NewShuf0));
4499 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4501 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4507 replaceValue(*Phi, *NewShuf1);
4513bool VectorCombine::run() {
4525 bool IsVectorType = isa<VectorType>(
I.getType());
4526 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
4527 auto Opcode =
I.getOpcode();
4535 if (IsFixedVectorType) {
4537 case Instruction::InsertElement:
4538 if (vectorizeLoadInsert(
I))
4541 case Instruction::ShuffleVector:
4542 if (widenSubvectorLoad(
I))
4553 if (scalarizeOpOrCmp(
I))
4555 if (scalarizeLoadExtract(
I))
4557 if (scalarizeExtExtract(
I))
4559 if (scalarizeVPIntrinsic(
I))
4561 if (foldInterleaveIntrinsics(
I))
4565 if (Opcode == Instruction::Store)
4566 if (foldSingleElementStore(
I))
4570 if (TryEarlyFoldsOnly)
4577 if (IsFixedVectorType) {
4579 case Instruction::InsertElement:
4580 if (foldInsExtFNeg(
I))
4582 if (foldInsExtBinop(
I))
4584 if (foldInsExtVectorToShuffle(
I))
4587 case Instruction::ShuffleVector:
4588 if (foldPermuteOfBinops(
I))
4590 if (foldShuffleOfBinops(
I))
4592 if (foldShuffleOfSelects(
I))
4594 if (foldShuffleOfCastops(
I))
4596 if (foldShuffleOfShuffles(
I))
4598 if (foldShuffleOfIntrinsics(
I))
4600 if (foldSelectShuffle(
I))
4602 if (foldShuffleToIdentity(
I))
4605 case Instruction::Load:
4606 if (shrinkLoadForShuffles(
I))
4609 case Instruction::BitCast:
4610 if (foldBitcastShuffle(
I))
4613 case Instruction::And:
4614 case Instruction::Or:
4615 case Instruction::Xor:
4616 if (foldBitOpOfCastops(
I))
4618 if (foldBitOpOfCastConstant(
I))
4621 case Instruction::PHI:
4622 if (shrinkPhiOfShuffles(
I))
4632 case Instruction::Call:
4633 if (foldShuffleFromReductions(
I))
4635 if (foldCastFromReductions(
I))
4638 case Instruction::ExtractElement:
4639 if (foldShuffleChainsToReduce(
I))
4642 case Instruction::ICmp:
4643 case Instruction::FCmp:
4644 if (foldExtractExtract(
I))
4647 case Instruction::Or:
4648 if (foldConcatOfBoolMasks(
I))
4653 if (foldExtractExtract(
I))
4655 if (foldExtractedCmps(
I))
4657 if (foldBinopOfReductions(
I))
4666 bool MadeChange =
false;
4678 NextInst =
I->getNextNode();
4679 if (!
I->isDebugOrPseudoInst())
4680 MadeChange |= FoldInst(*
I);
4687 while (!Worklist.isEmpty()) {
4697 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, Instruction::CastOps CastOp, const DataLayout &DL, PreservedCastFlags &Flags)
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
A private abstract base class describing the concept of an individual alias analysis implementation.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.