37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 bool TryEarlyFoldsOnly)
72 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
73 CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 bool TryEarlyFoldsOnly;
100 unsigned PreferredExtractIndex)
const;
104 unsigned PreferredExtractIndex);
127 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
134 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
151 if (
auto *OpI = dyn_cast<Instruction>(
Op)) {
162 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
163 V = BitCast->getOperand(0);
171 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
172 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
178 Type *ScalarTy = Load->getType()->getScalarType();
181 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
188bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
202 auto *
Load = dyn_cast<LoadInst>(
X);
214 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
215 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
217 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
218 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
219 unsigned OffsetEltIndex = 0;
227 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
238 uint64_t ScalarSizeInBytes = ScalarSize / 8;
239 if (
Offset.urem(ScalarSizeInBytes) != 0)
243 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
244 if (OffsetEltIndex >= MinVecNumElts)
261 unsigned AS =
Load->getPointerAddressSpace();
279 auto *Ty = cast<FixedVectorType>(
I.getType());
280 unsigned OutputNumElts = Ty->getNumElements();
282 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
283 Mask[0] = OffsetEltIndex;
290 if (OldCost < NewCost || !NewCost.
isValid())
301 replaceValue(
I, *VecLd);
311 auto *Shuf = cast<ShuffleVectorInst>(&
I);
312 if (!Shuf->isIdentityWithPadding())
317 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
318 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
319 return M >= (int)(NumOpElts);
322 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
329 auto *Ty = cast<FixedVectorType>(
I.getType());
330 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
331 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
338 unsigned AS =
Load->getPointerAddressSpace();
353 if (OldCost < NewCost || !NewCost.
isValid())
360 replaceValue(
I, *VecLd);
372 assert(Index0C && Index1C &&
"Expected constant extract indexes");
374 unsigned Index0 = Index0C->getZExtValue();
375 unsigned Index1 = Index1C->getZExtValue();
378 if (Index0 == Index1)
402 if (PreferredExtractIndex == Index0)
404 if (PreferredExtractIndex == Index1)
408 return Index0 > Index1 ? Ext0 : Ext1;
420 unsigned PreferredExtractIndex) {
423 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
425 unsigned Opcode =
I.getOpcode();
429 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
438 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
439 "Expected a compare");
449 unsigned Ext0Index = Ext0IndexC->getZExtValue();
450 unsigned Ext1Index = Ext1IndexC->getZExtValue();
464 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
465 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
466 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
471 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
476 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
478 OldCost = CheapExtractCost + ScalarOpCost;
479 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
483 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
484 NewCost = VectorOpCost + CheapExtractCost +
489 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
490 if (ConvertToShuffle) {
501 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
504 ShuffleMask[BestInsIndex] = BestExtIndex;
506 VecTy, ShuffleMask,
CostKind, 0,
nullptr,
511 {},
CostKind, 0,
nullptr, {ConvertToShuffle});
518 return OldCost < NewCost;
528 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
530 ShufMask[NewIndex] = OldIndex;
543 if (!isa<FixedVectorType>(
X->getType()))
549 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
550 if (isa<Constant>(
X))
563 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
566 "Expected matching constant extract indexes");
574 replaceValue(
I, *NewExt);
582 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
585 "Expected matching constant extract indexes");
595 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
596 VecBOInst->copyIRFlags(&
I);
599 replaceValue(
I, *NewExt);
627 auto *Ext0 = cast<ExtractElementInst>(I0);
628 auto *Ext1 = cast<ExtractElementInst>(I1);
635 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
638 if (ExtractToChange) {
639 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
644 if (ExtractToChange == Ext0)
651 foldExtExtCmp(Ext0, Ext1,
I);
653 foldExtExtBinop(Ext0, Ext1,
I);
679 auto *VecTy = cast<FixedVectorType>(
I.getType());
681 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
682 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
686 unsigned NumElts = VecTy->getNumElements();
687 if (Index >= NumElts)
694 std::iota(
Mask.begin(),
Mask.end(), 0);
711 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
722 if (NewCost > OldCost)
737 replaceValue(
I, *NewShuf);
756 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
757 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
758 if (!DestTy || !SrcTy)
761 unsigned DestEltSize = DestTy->getScalarSizeInBits();
762 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
763 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
766 bool IsUnary = isa<UndefValue>(V1);
773 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
774 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
779 if (DestEltSize <= SrcEltSize) {
782 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
783 unsigned ScaleFactor = SrcEltSize / DestEltSize;
788 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
789 unsigned ScaleFactor = DestEltSize / SrcEltSize;
796 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
801 unsigned NumOps = IsUnary ? 1 : 2;
811 TargetTransformInfo::CastContextHint::None,
816 TargetTransformInfo::CastContextHint::None,
819 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
820 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
822 if (NewCost > OldCost || !NewCost.
isValid())
830 replaceValue(
I, *Shuf);
837bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
838 if (!isa<VPIntrinsic>(
I))
851 if (!ScalarOp0 || !ScalarOp1)
859 auto IsAllTrueMask = [](
Value *MaskVal) {
861 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
862 return ConstValue->isAllOnesValue();
877 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
878 Mask.resize(FVTy->getNumElements(), 0);
887 Args.push_back(
V->getType());
893 std::optional<unsigned> FunctionalOpcode =
895 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
896 if (!FunctionalOpcode) {
920 <<
", Cost of scalarizing:" << NewCost <<
"\n");
923 if (OldCost < NewCost || !NewCost.
isValid())
934 bool SafeToSpeculate;
937 .
hasFnAttr(Attribute::AttrKind::Speculatable);
940 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
941 if (!SafeToSpeculate &&
948 {ScalarOp0, ScalarOp1})
950 ScalarOp0, ScalarOp1);
958bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
969 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
971 for (
User *U :
I.users())
981 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
982 Value *V0 =
nullptr, *V1 =
nullptr;
995 if (IsConst0 && IsConst1)
997 if (!IsConst0 && !IsConst1 && Index0 != Index1)
1000 auto *VecTy0 = cast<VectorType>(Ins0->
getType());
1001 auto *VecTy1 = cast<VectorType>(Ins1->
getType());
1002 if (VecTy0->getElementCount().getKnownMinValue() <= Index0 ||
1003 VecTy1->getElementCount().getKnownMinValue() <= Index1)
1008 auto *I0 = dyn_cast_or_null<Instruction>(V0);
1009 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
1010 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
1016 Type *VecTy =
I.getType();
1021 "Unexpected types for insert element into binop or cmp");
1023 unsigned Opcode =
I.getOpcode();
1039 Instruction::InsertElement, VecTy,
CostKind, Index);
1041 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
1043 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
1044 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
1047 if (OldCost < NewCost || !NewCost.
isValid())
1067 Scalar->setName(
I.getName() +
".scalar");
1071 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1072 ScalarInst->copyIRFlags(&
I);
1076 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1079 replaceValue(
I, *Insert);
1087 auto *BI = dyn_cast<BinaryOperator>(&
I);
1091 if (!BI || !
I.getType()->isIntegerTy(1))
1096 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1116 auto *Ext0 = cast<ExtractElementInst>(I0);
1117 auto *Ext1 = cast<ExtractElementInst>(I1);
1121 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1122 "Unknown ExtractElementInst");
1127 unsigned CmpOpcode =
1129 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1142 Ext0Cost + Ext1Cost + CmpCost * 2 +
1148 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1149 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1155 ShufMask[CheapIndex] = ExpensiveIndex;
1160 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1161 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1166 if (OldCost < NewCost || !NewCost.
isValid())
1176 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1177 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1180 replaceValue(
I, *NewExt);
1189 unsigned NumScanned = 0;
1199class ScalarizationResult {
1200 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1205 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1209 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1210 ~ScalarizationResult() {
1211 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1214 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1215 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1216 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1217 return {StatusTy::SafeWithFreeze, ToFreeze};
1221 bool isSafe()
const {
return Status == StatusTy::Safe; }
1223 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1226 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1231 Status = StatusTy::Unsafe;
1236 assert(isSafeWithFreeze() &&
1237 "should only be used when freezing is required");
1239 "UserI must be a user of ToFreeze");
1245 if (
U.get() == ToFreeze)
1262 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1264 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1265 if (
C->getValue().ult(NumElements))
1266 return ScalarizationResult::safe();
1267 return ScalarizationResult::unsafe();
1270 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1271 APInt Zero(IntWidth, 0);
1272 APInt MaxElts(IntWidth, NumElements);
1278 true, &AC, CtxI, &DT)))
1279 return ScalarizationResult::safe();
1280 return ScalarizationResult::unsafe();
1293 if (ValidIndices.
contains(IdxRange))
1294 return ScalarizationResult::safeWithFreeze(IdxBase);
1295 return ScalarizationResult::unsafe();
1305 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1307 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1319bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1320 auto *
SI = cast<StoreInst>(&
I);
1321 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1329 if (!
match(
SI->getValueOperand(),
1334 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1335 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1336 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1339 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1340 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1341 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1345 if (ScalarizableIdx.isUnsafe() ||
1352 Worklist.
push(Load);
1354 if (ScalarizableIdx.isSafeWithFreeze())
1355 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1357 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1358 {ConstantInt::get(Idx->getType(), 0), Idx});
1365 replaceValue(
I, *NSI);
1374bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1379 auto *LI = cast<LoadInst>(&
I);
1380 auto *VecTy = cast<VectorType>(LI->getType());
1381 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1386 LI->getPointerAddressSpace(),
CostKind);
1390 unsigned NumInstChecked = 0;
1394 for (
auto &Pair : NeedFreeze)
1395 Pair.second.discard();
1402 auto *UI = dyn_cast<ExtractElementInst>(U);
1403 if (!UI || UI->getParent() != LI->getParent())
1410 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1417 LastCheckedInst = UI;
1422 if (ScalarIdx.isUnsafe())
1424 if (ScalarIdx.isSafeWithFreeze()) {
1426 ScalarIdx.discard();
1429 auto *
Index = dyn_cast<ConstantInt>(UI->getIndexOperand());
1432 Index ?
Index->getZExtValue() : -1);
1439 if (ScalarizedCost >= OriginalCost)
1448 auto *EI = cast<ExtractElementInst>(U);
1449 Value *
Idx = EI->getIndexOperand();
1452 auto It = NeedFreeze.
find(EI);
1453 if (It != NeedFreeze.
end())
1454 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1459 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1460 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1463 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1464 NewLoad->setAlignment(ScalarOpAlignment);
1466 replaceValue(*EI, *NewLoad);
1469 FailureGuard.release();
1476bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
1477 Type *Ty =
I.getType();
1482 if (
DL->isBigEndian())
1509 if (ShAmtX > ShAmtY) {
1517 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
1518 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
1520 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
1523 MaskTy->getNumElements() != ShAmtDiff ||
1524 MaskTy->getNumElements() > (
BitWidth / 2))
1533 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
1550 if (Ty != ConcatIntTy)
1556 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
1557 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1560 if (NewCost > OldCost)
1570 if (Ty != ConcatIntTy) {
1580 replaceValue(
I, *Result);
1586bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
1597 Value *Op00, *Op01, *Op10, *Op11;
1605 if (!Match0 && !Match1)
1614 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1615 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
1616 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
1617 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
1618 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
1621 unsigned NumSrcElts = BinOpTy->getNumElements();
1625 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
1626 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
1631 for (
int M : OuterMask) {
1632 if (M < 0 || M >= (
int)NumSrcElts) {
1636 NewMask0.
push_back(Match0 ? Mask0[M] : M);
1637 NewMask1.
push_back(Match1 ? Mask1[M] : M);
1641 unsigned NumOpElts = Op0Ty->getNumElements();
1642 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
1643 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
1645 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
1646 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
1653 OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
1656 Mask0,
CostKind, 0,
nullptr, {Op00, Op01},
1660 Mask1,
CostKind, 0,
nullptr, {Op10, Op11},
1668 NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
1671 NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
1673 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
1674 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1678 if (NewCost > OldCost)
1688 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
1689 NewInst->copyIRFlags(BinOp);
1693 replaceValue(
I, *NewBO);
1699bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1707 if (
LHS->getOpcode() !=
RHS->getOpcode())
1711 bool IsCommutative =
false;
1716 auto *BO = cast<BinaryOperator>(LHS);
1720 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
1724 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
1728 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1729 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
1730 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
1731 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
1734 unsigned NumSrcElts = BinOpTy->getNumElements();
1737 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
1740 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1741 if (M >= (
int)NumSrcElts)
1780 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
1792 bool ReducedInstCount =
false;
1793 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
1794 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
1795 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
1796 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
1806 auto *ShuffleCmpTy =
1813 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1818 ReducedInstCount |= (isa<Constant>(
X) && isa<Constant>(Z)) ||
1819 (isa<Constant>(
Y) && isa<Constant>(W));
1820 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
1827 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
1828 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
1831 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1832 NewInst->copyIRFlags(LHS);
1833 NewInst->andIRFlags(RHS);
1838 replaceValue(
I, *NewBO);
1844bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1850 auto *C0 = dyn_cast<CastInst>(V0);
1851 auto *C1 = dyn_cast<CastInst>(V1);
1856 if (C0->getSrcTy() != C1->getSrcTy())
1860 if (Opcode != C1->getOpcode()) {
1862 Opcode = Instruction::SExt;
1867 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1868 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1869 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1870 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1873 unsigned NumSrcElts = CastSrcTy->getNumElements();
1874 unsigned NumDstElts = CastDstTy->getNumElements();
1875 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1876 "Only bitcasts expected to alter src/dst element counts");
1880 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1881 (NumDstElts % NumSrcElts) != 0)
1885 if (NumSrcElts >= NumDstElts) {
1888 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1889 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1894 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1895 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1900 auto *NewShuffleDstTy =
1925 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1927 if (NewCost > OldCost)
1935 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1936 NewInst->copyIRFlags(C0);
1937 NewInst->andIRFlags(C1);
1941 replaceValue(
I, *Cast);
1951bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1953 Value *OuterV0, *OuterV1;
1959 Value *X0, *X1, *Y0, *Y1;
1964 if (!Match0 && !Match1)
1967 X0 = Match0 ? X0 : OuterV0;
1968 Y0 = Match0 ? Y0 : OuterV0;
1969 X1 = Match1 ? X1 : OuterV1;
1970 Y1 = Match1 ? Y1 : OuterV1;
1971 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1972 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(X0->
getType());
1973 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(OuterV0->
getType());
1974 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1978 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1979 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1985 Value *NewX =
nullptr, *NewY =
nullptr;
1986 for (
int &M : NewMask) {
1987 Value *Src =
nullptr;
1988 if (0 <= M && M < (
int)NumImmElts) {
1992 Src =
M >= (int)NumSrcElts ? Y0 : X0;
1993 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
1995 }
else if (M >= (
int)NumImmElts) {
2000 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2001 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2005 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2006 if (isa<UndefValue>(Src)) {
2009 if (!isa<PoisonValue>(Src))
2014 if (!NewX || NewX == Src) {
2018 if (!NewY || NewY == Src) {
2034 replaceValue(
I, *NewX);
2051 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2056 SK, ShuffleSrcTy, NewMask,
CostKind, 0,
nullptr, {NewX, NewY});
2058 NewCost += InnerCost0;
2060 NewCost += InnerCost1;
2063 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2065 if (NewCost > OldCost)
2069 replaceValue(
I, *Shuf);
2075bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
2082 auto *II0 = dyn_cast<IntrinsicInst>(V0);
2083 auto *II1 = dyn_cast<IntrinsicInst>(V1);
2088 if (IID != II1->getIntrinsicID())
2091 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2092 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
2093 if (!ShuffleDstTy || !II0Ty)
2099 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2101 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2112 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2114 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2116 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2118 VecTy->getNumElements() * 2));
2126 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2129 if (NewCost > OldCost)
2133 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2138 II1->getArgOperand(
I), OldMask);
2145 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2147 NewInst->andIRFlags(II1);
2150 replaceValue(
I, *NewIntrinsic);
2157 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2159 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2160 int M = SV->getMaskValue(Lane);
2163 if (
static_cast<unsigned>(M) < NumElts) {
2164 U = &SV->getOperandUse(0);
2167 U = &SV->getOperandUse(1);
2178 auto [U, Lane] = IL;
2191 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2192 unsigned NumElts = Ty->getNumElements();
2193 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2199 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2203 unsigned NumSlices = Item.
size() / NumElts;
2208 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2209 Use *SliceV = Item[Slice * NumElts].first;
2210 if (!SliceV || SliceV->get()->
getType() != Ty)
2212 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2213 auto [V, Lane] = Item[Slice * NumElts + Elt];
2214 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2227 auto [FrontU, FrontLane] = Item.
front();
2229 if (IdentityLeafs.
contains(FrontU)) {
2230 return FrontU->get();
2236 if (ConcatLeafs.
contains(FrontU)) {
2238 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2240 for (
unsigned S = 0; S < Values.
size(); ++S)
2241 Values[S] = Item[S * NumElts].first->get();
2243 while (Values.
size() > 1) {
2246 std::iota(Mask.begin(), Mask.end(), 0);
2248 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2256 auto *
I = cast<Instruction>(FrontU->get());
2257 auto *
II = dyn_cast<IntrinsicInst>(
I);
2258 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2260 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2267 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2272 for (
const auto &Lane : Item)
2278 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2284 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2285 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2289 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2294 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2305 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
2315bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
2316 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
2317 if (!Ty ||
I.use_empty())
2321 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
2327 unsigned NumVisited = 0;
2329 while (!Worklist.
empty()) {
2334 auto [FrontU, FrontLane] = Item.
front();
2342 return X->getType() ==
Y->getType() &&
2347 if (FrontLane == 0 &&
2348 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
2349 Ty->getNumElements() &&
2352 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
2353 E.value().second == (
int)E.index());
2355 IdentityLeafs.
insert(FrontU);
2359 if (
auto *
C = dyn_cast<Constant>(FrontU);
2360 C &&
C->getSplatValue() &&
2364 return !U || (isa<Constant>(
U->get()) &&
2365 cast<Constant>(
U->get())->getSplatValue() ==
2366 cast<Constant>(FrontV)->getSplatValue());
2368 SplatLeafs.
insert(FrontU);
2373 auto [FrontU, FrontLane] = Item.
front();
2374 auto [
U, Lane] = IL;
2375 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
2377 SplatLeafs.
insert(FrontU);
2383 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
2387 Value *
V = IL.first->get();
2388 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
2392 if (
auto *CI = dyn_cast<CmpInst>(V))
2393 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
2395 if (
auto *CI = dyn_cast<CastInst>(V))
2396 if (CI->getSrcTy()->getScalarType() !=
2397 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
2399 if (
auto *SI = dyn_cast<SelectInst>(V))
2400 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
2401 SI->getOperand(0)->getType() !=
2402 cast<SelectInst>(FrontV)->getOperand(0)->getType())
2404 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
2406 auto *
II = dyn_cast<IntrinsicInst>(V);
2407 return !
II || (isa<IntrinsicInst>(FrontV) &&
2408 II->getIntrinsicID() ==
2409 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
2410 !
II->hasOperandBundles());
2414 if (isa<BinaryOperator, CmpInst>(FrontU)) {
2416 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
2417 BO && BO->isIntDivRem())
2426 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
2428 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
2429 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
2430 if (DstTy && SrcTy &&
2431 SrcTy->getNumElements() == DstTy->getNumElements()) {
2435 }
else if (isa<SelectInst>(FrontU)) {
2440 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
2442 !
II->hasOperandBundles()) {
2443 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
2449 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
2450 cast<Instruction>(FrontV)->getOperand(
Op));
2462 ConcatLeafs.
insert(FrontU);
2469 if (NumVisited <= 1)
2472 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
2478 ConcatLeafs, Builder, &
TTI);
2479 replaceValue(
I, *V);
2486bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
2487 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2490 switch (
II->getIntrinsicID()) {
2491 case Intrinsic::vector_reduce_add:
2492 case Intrinsic::vector_reduce_mul:
2493 case Intrinsic::vector_reduce_and:
2494 case Intrinsic::vector_reduce_or:
2495 case Intrinsic::vector_reduce_xor:
2496 case Intrinsic::vector_reduce_smin:
2497 case Intrinsic::vector_reduce_smax:
2498 case Intrinsic::vector_reduce_umin:
2499 case Intrinsic::vector_reduce_umax:
2508 std::queue<Value *> Worklist;
2511 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2514 while (!Worklist.empty()) {
2515 Value *CV = Worklist.front();
2526 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2527 if (CI->isBinaryOp()) {
2528 for (
auto *
Op : CI->operand_values())
2531 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2532 if (Shuffle && Shuffle != SV)
2549 for (
auto *V : Visited)
2550 for (
auto *U :
V->users())
2551 if (!Visited.contains(U) && U != &
I)
2555 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2560 if (!ShuffleInputType)
2568 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2572 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2573 bool UsesSecondVec =
2574 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2577 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2583 VecTyForCost, ConcatMask,
CostKind);
2585 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2587 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2589 if (NewCost < OldCost) {
2593 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2594 replaceValue(*Shuffle, *NewShuffle);
2599 return foldSelectShuffle(*Shuffle,
true);
2606bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2607 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2611 bool TruncOnly =
false;
2614 case Intrinsic::vector_reduce_add:
2615 case Intrinsic::vector_reduce_mul:
2618 case Intrinsic::vector_reduce_and:
2619 case Intrinsic::vector_reduce_or:
2620 case Intrinsic::vector_reduce_xor:
2627 Value *ReductionSrc =
I.getOperand(0);
2637 auto *SrcTy = cast<VectorType>(Src->getType());
2638 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2639 Type *ResultTy =
I.getType();
2642 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2645 cast<CastInst>(ReductionSrc));
2652 if (OldCost <= NewCost || !NewCost.
isValid())
2656 II->getIntrinsicID(), {Src});
2658 replaceValue(
I, *NewCast);
2672bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2673 auto *SVI = cast<ShuffleVectorInst>(&
I);
2674 auto *VT = cast<FixedVectorType>(
I.getType());
2675 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2676 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2677 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2681 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2682 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2683 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2684 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2687 if (!
I ||
I->getOperand(0)->getType() != VT)
2690 return U != Op0 && U != Op1 &&
2691 !(isa<ShuffleVectorInst>(U) &&
2692 (InputShuffles.contains(cast<Instruction>(U)) ||
2693 isInstructionTriviallyDead(cast<Instruction>(U))));
2696 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2697 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2705 for (
auto *U :
I->users()) {
2706 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2707 if (!SV || SV->getType() != VT)
2709 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2710 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2717 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2721 if (FromReduction && Shuffles.
size() > 1)
2726 if (!FromReduction) {
2728 for (
auto *U : SV->users()) {
2731 Shuffles.push_back(SSV);
2743 int MaxV1Elt = 0, MaxV2Elt = 0;
2744 unsigned NumElts = VT->getNumElements();
2747 SVN->getShuffleMask(Mask);
2751 Value *SVOp0 = SVN->getOperand(0);
2752 Value *SVOp1 = SVN->getOperand(1);
2753 if (isa<UndefValue>(SVOp1)) {
2754 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2757 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2763 if (SVOp0 == Op1 && SVOp1 == Op0) {
2767 if (SVOp0 != Op0 || SVOp1 != Op1)
2774 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2777 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2778 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2779 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2780 return Mask[
I] ==
A.first;
2789 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2790 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2791 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2794 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2797 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2805 sort(ReconstructMask);
2806 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2813 if (V1.
empty() ||
V2.empty() ||
2814 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2815 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2822 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2825 if (isa<UndefValue>(SV->getOperand(1)))
2826 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2827 if (InputShuffles.contains(SSV))
2829 return SV->getMaskValue(M);
2837 std::pair<int, int>
Y) {
2838 int MXA = GetBaseMaskValue(
A,
X.first);
2839 int MYA = GetBaseMaskValue(
A,
Y.first);
2842 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2843 return SortBase(SVI0A,
A,
B);
2845 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2846 return SortBase(SVI1A,
A,
B);
2851 for (
const auto &Mask : OrigReconstructMasks) {
2853 for (
int M : Mask) {
2855 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2856 assert(It !=
V.end() &&
"Expected all entries in Mask");
2857 return std::distance(
V.begin(), It);
2861 else if (M <
static_cast<int>(NumElts)) {
2862 ReconstructMask.
push_back(FindIndex(V1, M));
2864 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2867 ReconstructMasks.push_back(std::move(ReconstructMask));
2873 for (
unsigned I = 0;
I < V1.
size();
I++) {
2874 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2875 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2877 for (
unsigned I = 0;
I <
V2.size();
I++) {
2878 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2879 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2881 while (V1A.
size() < NumElts) {
2885 while (V2A.
size() < NumElts) {
2891 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2897 VT, SV->getShuffleMask(),
CostKind);
2908 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2910 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2922 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2924 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2926 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2929 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2931 <<
" vs CostAfter: " << CostAfter <<
"\n");
2932 if (CostBefore <= CostAfter)
2937 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2940 if (isa<UndefValue>(SV->getOperand(1)))
2941 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2942 if (InputShuffles.contains(SSV))
2944 return SV->getOperand(
Op);
2948 GetShuffleOperand(SVI0A, 1), V1A);
2951 GetShuffleOperand(SVI0B, 1), V1B);
2954 GetShuffleOperand(SVI1A, 1), V2A);
2957 GetShuffleOperand(SVI1B, 1), V2B);
2961 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2962 I->copyIRFlags(Op0,
true);
2966 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2967 I->copyIRFlags(Op1,
true);
2969 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2972 replaceValue(*Shuffles[S], *NSV);
2975 Worklist.pushValue(NSV0A);
2976 Worklist.pushValue(NSV0B);
2977 Worklist.pushValue(NSV1A);
2978 Worklist.pushValue(NSV1B);
2979 for (
auto *S : Shuffles)
2991 Value *ZExted, *OtherOperand;
2997 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
2999 auto *BigTy = cast<FixedVectorType>(
I.getType());
3000 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
3001 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
3003 if (
I.getOpcode() == Instruction::LShr) {
3020 Instruction::ZExt, BigTy, SmallTy,
3021 TargetTransformInfo::CastContextHint::None,
CostKind);
3027 auto *UI = cast<Instruction>(U);
3033 ShrinkCost += ZExtCost;
3048 ShrinkCost += ZExtCost;
3053 if (!isa<Constant>(OtherOperand))
3055 Instruction::Trunc, SmallTy, BigTy,
3056 TargetTransformInfo::CastContextHint::None,
CostKind);
3061 if (ShrinkCost > CurrentCost)
3065 Value *Op0 = ZExted;
3068 if (
I.getOperand(0) == OtherOperand)
3072 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
3073 cast<Instruction>(NewBinOp)->copyMetadata(
I);
3075 replaceValue(
I, *NewZExtr);
3081bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
3082 Value *DstVec, *SrcVec;
3090 auto *VecTy = dyn_cast<FixedVectorType>(
I.getType());
3091 if (!VecTy || SrcVec->
getType() != VecTy)
3094 unsigned NumElts = VecTy->getNumElements();
3095 if (ExtIdx >= NumElts || InsIdx >= NumElts)
3101 if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3103 Mask[InsIdx] = ExtIdx;
3107 std::iota(
Mask.begin(),
Mask.end(), 0);
3108 Mask[InsIdx] = ExtIdx + NumElts;
3112 auto *
Ins = cast<InsertElementInst>(&
I);
3113 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
3126 if (!
Ext->hasOneUse())
3129 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
3130 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3133 if (OldCost < NewCost)
3137 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3143 replaceValue(
I, *Shuf);
3150bool VectorCombine::run() {
3160 bool MadeChange =
false;
3163 bool IsVectorType = isa<VectorType>(
I.getType());
3164 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
3165 auto Opcode =
I.getOpcode();
3173 if (IsFixedVectorType) {
3175 case Instruction::InsertElement:
3176 MadeChange |= vectorizeLoadInsert(
I);
3178 case Instruction::ShuffleVector:
3179 MadeChange |= widenSubvectorLoad(
I);
3189 MadeChange |= scalarizeBinopOrCmp(
I);
3190 MadeChange |= scalarizeLoadExtract(
I);
3191 MadeChange |= scalarizeVPIntrinsic(
I);
3194 if (Opcode == Instruction::Store)
3195 MadeChange |= foldSingleElementStore(
I);
3198 if (TryEarlyFoldsOnly)
3205 if (IsFixedVectorType) {
3207 case Instruction::InsertElement:
3208 MadeChange |= foldInsExtFNeg(
I);
3209 MadeChange |= foldInsExtVectorToShuffle(
I);
3211 case Instruction::ShuffleVector:
3212 MadeChange |= foldPermuteOfBinops(
I);
3213 MadeChange |= foldShuffleOfBinops(
I);
3214 MadeChange |= foldShuffleOfCastops(
I);
3215 MadeChange |= foldShuffleOfShuffles(
I);
3216 MadeChange |= foldShuffleOfIntrinsics(
I);
3217 MadeChange |= foldSelectShuffle(
I);
3218 MadeChange |= foldShuffleToIdentity(
I);
3220 case Instruction::BitCast:
3221 MadeChange |= foldBitcastShuffle(
I);
3224 MadeChange |= shrinkType(
I);
3229 case Instruction::Call:
3230 MadeChange |= foldShuffleFromReductions(
I);
3231 MadeChange |= foldCastFromReductions(
I);
3233 case Instruction::ICmp:
3234 case Instruction::FCmp:
3235 MadeChange |= foldExtractExtract(
I);
3237 case Instruction::Or:
3238 MadeChange |= foldConcatOfBoolMasks(
I);
3242 MadeChange |= foldExtractExtract(
I);
3243 MadeChange |= foldExtractedCmps(
I);
3256 if (
I.isDebugOrPseudoInst())
3262 while (!Worklist.isEmpty()) {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder, const TargetTransformInfo *TTI)
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static constexpr int Concat[]
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.