37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 bool TryEarlyFoldsOnly)
72 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
73 CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 bool TryEarlyFoldsOnly;
100 unsigned PreferredExtractIndex)
const;
104 unsigned PreferredExtractIndex);
127 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
134 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
151 if (
auto *OpI = dyn_cast<Instruction>(
Op)) {
162 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
163 V = BitCast->getOperand(0);
171 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
172 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
178 Type *ScalarTy = Load->getType()->getScalarType();
181 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
188bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
202 auto *
Load = dyn_cast<LoadInst>(
X);
214 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
215 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
217 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
218 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
219 unsigned OffsetEltIndex = 0;
227 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
238 uint64_t ScalarSizeInBytes = ScalarSize / 8;
239 if (
Offset.urem(ScalarSizeInBytes) != 0)
243 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
244 if (OffsetEltIndex >= MinVecNumElts)
261 unsigned AS =
Load->getPointerAddressSpace();
279 auto *Ty = cast<FixedVectorType>(
I.getType());
280 unsigned OutputNumElts = Ty->getNumElements();
282 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
283 Mask[0] = OffsetEltIndex;
290 if (OldCost < NewCost || !NewCost.
isValid())
301 replaceValue(
I, *VecLd);
311 auto *Shuf = cast<ShuffleVectorInst>(&
I);
312 if (!Shuf->isIdentityWithPadding())
317 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
318 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
319 return M >= (int)(NumOpElts);
322 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
329 auto *Ty = cast<FixedVectorType>(
I.getType());
330 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
331 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
338 unsigned AS =
Load->getPointerAddressSpace();
353 if (OldCost < NewCost || !NewCost.
isValid())
360 replaceValue(
I, *VecLd);
372 assert(Index0C && Index1C &&
"Expected constant extract indexes");
374 unsigned Index0 = Index0C->getZExtValue();
375 unsigned Index1 = Index1C->getZExtValue();
378 if (Index0 == Index1)
402 if (PreferredExtractIndex == Index0)
404 if (PreferredExtractIndex == Index1)
408 return Index0 > Index1 ? Ext0 : Ext1;
420 unsigned PreferredExtractIndex) {
423 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
425 unsigned Opcode =
I.getOpcode();
429 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
438 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
439 "Expected a compare");
449 unsigned Ext0Index = Ext0IndexC->getZExtValue();
450 unsigned Ext1Index = Ext1IndexC->getZExtValue();
464 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
465 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
466 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
471 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
476 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
478 OldCost = CheapExtractCost + ScalarOpCost;
479 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
483 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
484 NewCost = VectorOpCost + CheapExtractCost +
489 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
490 if (ConvertToShuffle) {
501 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
504 ShuffleMask[BestInsIndex] = BestExtIndex;
506 VecTy, ShuffleMask,
CostKind, 0,
nullptr,
511 {},
CostKind, 0,
nullptr, {ConvertToShuffle});
518 return OldCost < NewCost;
528 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
530 ShufMask[NewIndex] = OldIndex;
543 if (!isa<FixedVectorType>(
X->getType()))
549 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
550 if (isa<Constant>(
X))
563 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
566 "Expected matching constant extract indexes");
574 replaceValue(
I, *NewExt);
582 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
585 "Expected matching constant extract indexes");
595 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
596 VecBOInst->copyIRFlags(&
I);
599 replaceValue(
I, *NewExt);
627 auto *Ext0 = cast<ExtractElementInst>(I0);
628 auto *Ext1 = cast<ExtractElementInst>(I1);
635 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
638 if (ExtractToChange) {
639 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
644 if (ExtractToChange == Ext0)
651 foldExtExtCmp(Ext0, Ext1,
I);
653 foldExtExtBinop(Ext0, Ext1,
I);
679 auto *VecTy = cast<FixedVectorType>(
I.getType());
681 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
682 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
686 unsigned NumElts = VecTy->getNumElements();
687 if (Index >= NumElts)
694 std::iota(
Mask.begin(),
Mask.end(), 0);
710 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
721 if (NewCost > OldCost)
736 replaceValue(
I, *NewShuf);
755 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
756 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
757 if (!DestTy || !SrcTy)
760 unsigned DestEltSize = DestTy->getScalarSizeInBits();
761 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
762 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
765 bool IsUnary = isa<UndefValue>(V1);
772 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
773 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
778 if (DestEltSize <= SrcEltSize) {
781 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
782 unsigned ScaleFactor = SrcEltSize / DestEltSize;
787 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
788 unsigned ScaleFactor = DestEltSize / SrcEltSize;
795 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
800 unsigned NumOps = IsUnary ? 1 : 2;
810 TargetTransformInfo::CastContextHint::None,
815 TargetTransformInfo::CastContextHint::None,
818 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
819 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
821 if (NewCost > OldCost || !NewCost.
isValid())
829 replaceValue(
I, *Shuf);
836bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
837 if (!isa<VPIntrinsic>(
I))
850 if (!ScalarOp0 || !ScalarOp1)
858 auto IsAllTrueMask = [](
Value *MaskVal) {
860 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
861 return ConstValue->isAllOnesValue();
876 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
877 Mask.resize(FVTy->getNumElements(), 0);
886 Args.push_back(
V->getType());
892 std::optional<unsigned> FunctionalOpcode =
894 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
895 if (!FunctionalOpcode) {
919 <<
", Cost of scalarizing:" << NewCost <<
"\n");
922 if (OldCost < NewCost || !NewCost.
isValid())
933 bool SafeToSpeculate;
936 .
hasFnAttr(Attribute::AttrKind::Speculatable);
939 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
940 if (!SafeToSpeculate &&
947 {ScalarOp0, ScalarOp1})
949 ScalarOp0, ScalarOp1);
957bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
968 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
970 for (
User *U :
I.users())
980 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
981 Value *V0 =
nullptr, *V1 =
nullptr;
994 if (IsConst0 && IsConst1)
996 if (!IsConst0 && !IsConst1 && Index0 != Index1)
999 auto *VecTy0 = cast<VectorType>(Ins0->
getType());
1000 auto *VecTy1 = cast<VectorType>(Ins1->
getType());
1001 if (VecTy0->getElementCount().getKnownMinValue() <= Index0 ||
1002 VecTy1->getElementCount().getKnownMinValue() <= Index1)
1007 auto *I0 = dyn_cast_or_null<Instruction>(V0);
1008 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
1009 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
1015 Type *VecTy =
I.getType();
1020 "Unexpected types for insert element into binop or cmp");
1022 unsigned Opcode =
I.getOpcode();
1038 Instruction::InsertElement, VecTy,
CostKind, Index);
1040 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
1042 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
1043 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
1046 if (OldCost < NewCost || !NewCost.
isValid())
1066 Scalar->setName(
I.getName() +
".scalar");
1070 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1071 ScalarInst->copyIRFlags(&
I);
1075 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1078 replaceValue(
I, *Insert);
1086 auto *BI = dyn_cast<BinaryOperator>(&
I);
1090 if (!BI || !
I.getType()->isIntegerTy(1))
1095 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1113 auto *Ext0 = cast<ExtractElementInst>(I0);
1114 auto *Ext1 = cast<ExtractElementInst>(I1);
1118 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1119 "Unknown ExtractElementInst");
1124 unsigned CmpOpcode =
1126 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1139 Ext0Cost + Ext1Cost + CmpCost * 2 +
1145 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1146 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1152 ShufMask[CheapIndex] = ExpensiveIndex;
1157 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1158 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1163 if (OldCost < NewCost || !NewCost.
isValid())
1173 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1174 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1177 replaceValue(
I, *NewExt);
1186 unsigned NumScanned = 0;
1196class ScalarizationResult {
1197 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1202 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1206 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1207 ~ScalarizationResult() {
1208 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1211 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1212 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1213 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1214 return {StatusTy::SafeWithFreeze, ToFreeze};
1218 bool isSafe()
const {
return Status == StatusTy::Safe; }
1220 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1223 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1228 Status = StatusTy::Unsafe;
1233 assert(isSafeWithFreeze() &&
1234 "should only be used when freezing is required");
1236 "UserI must be a user of ToFreeze");
1242 if (
U.get() == ToFreeze)
1259 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1261 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1262 if (
C->getValue().ult(NumElements))
1263 return ScalarizationResult::safe();
1264 return ScalarizationResult::unsafe();
1267 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1268 APInt Zero(IntWidth, 0);
1269 APInt MaxElts(IntWidth, NumElements);
1275 true, &AC, CtxI, &DT)))
1276 return ScalarizationResult::safe();
1277 return ScalarizationResult::unsafe();
1290 if (ValidIndices.
contains(IdxRange))
1291 return ScalarizationResult::safeWithFreeze(IdxBase);
1292 return ScalarizationResult::unsafe();
1302 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1304 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1316bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1317 auto *
SI = cast<StoreInst>(&
I);
1318 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1326 if (!
match(
SI->getValueOperand(),
1331 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1332 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1333 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1336 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1337 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1338 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1342 if (ScalarizableIdx.isUnsafe() ||
1349 Worklist.
push(Load);
1351 if (ScalarizableIdx.isSafeWithFreeze())
1352 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1354 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1355 {ConstantInt::get(Idx->getType(), 0), Idx});
1362 replaceValue(
I, *NSI);
1371bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1376 auto *LI = cast<LoadInst>(&
I);
1377 auto *VecTy = cast<VectorType>(LI->getType());
1378 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1383 LI->getPointerAddressSpace(),
CostKind);
1387 unsigned NumInstChecked = 0;
1391 for (
auto &Pair : NeedFreeze)
1392 Pair.second.discard();
1399 auto *UI = dyn_cast<ExtractElementInst>(U);
1400 if (!UI || UI->getParent() != LI->getParent())
1407 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1414 LastCheckedInst = UI;
1419 if (ScalarIdx.isUnsafe())
1421 if (ScalarIdx.isSafeWithFreeze()) {
1423 ScalarIdx.discard();
1426 auto *
Index = dyn_cast<ConstantInt>(UI->getIndexOperand());
1429 Index ?
Index->getZExtValue() : -1);
1436 if (ScalarizedCost >= OriginalCost)
1445 auto *EI = cast<ExtractElementInst>(U);
1446 Value *
Idx = EI->getIndexOperand();
1449 auto It = NeedFreeze.
find(EI);
1450 if (It != NeedFreeze.
end())
1451 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1456 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1457 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1460 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1461 NewLoad->setAlignment(ScalarOpAlignment);
1463 replaceValue(*EI, *NewLoad);
1466 FailureGuard.release();
1473bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
1474 Type *Ty =
I.getType();
1479 if (
DL->isBigEndian())
1506 if (ShAmtX > ShAmtY) {
1514 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
1515 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
1517 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
1520 MaskTy->getNumElements() != ShAmtDiff ||
1521 MaskTy->getNumElements() > (
BitWidth / 2))
1530 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
1547 if (Ty != ConcatIntTy)
1553 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
1554 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1557 if (NewCost > OldCost)
1567 if (Ty != ConcatIntTy) {
1577 replaceValue(
I, *Result);
1583bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
1607 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1608 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
1609 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
1610 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
1611 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
1614 unsigned NumSrcElts = BinOpTy->getNumElements();
1618 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
1619 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
1624 for (
int M : OuterMask) {
1625 if (M < 0 || M >= (
int)NumSrcElts) {
1638 OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I) +
1640 CostKind, 0,
nullptr, {Op00, Op01},
1643 CostKind, 0,
nullptr, {Op10, Op11},
1648 CostKind, 0,
nullptr, {Op00, Op01}) +
1650 CostKind, 0,
nullptr, {Op10, Op11}) +
1653 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
1654 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1658 if (NewCost > OldCost)
1666 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
1667 NewInst->copyIRFlags(BinOp);
1671 replaceValue(
I, *NewBO);
1677bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1685 if (
LHS->getOpcode() !=
RHS->getOpcode())
1689 bool IsCommutative =
false;
1694 auto *BO = cast<BinaryOperator>(LHS);
1698 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
1702 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
1706 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1707 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
1708 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
1709 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
1712 unsigned NumSrcElts = BinOpTy->getNumElements();
1715 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
1718 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1719 if (M >= (
int)NumSrcElts)
1758 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
1770 bool ReducedInstCount =
false;
1771 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
1772 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
1773 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
1774 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
1784 auto *ShuffleCmpTy =
1791 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1796 ReducedInstCount |= (isa<Constant>(
X) && isa<Constant>(Z)) ||
1797 (isa<Constant>(
Y) && isa<Constant>(W));
1798 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
1805 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
1806 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
1809 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1810 NewInst->copyIRFlags(LHS);
1811 NewInst->andIRFlags(RHS);
1816 replaceValue(
I, *NewBO);
1822bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1828 auto *C0 = dyn_cast<CastInst>(V0);
1829 auto *C1 = dyn_cast<CastInst>(V1);
1834 if (C0->getSrcTy() != C1->getSrcTy())
1838 if (Opcode != C1->getOpcode()) {
1840 Opcode = Instruction::SExt;
1845 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1846 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1847 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1848 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1851 unsigned NumSrcElts = CastSrcTy->getNumElements();
1852 unsigned NumDstElts = CastDstTy->getNumElements();
1853 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1854 "Only bitcasts expected to alter src/dst element counts");
1858 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1859 (NumDstElts % NumSrcElts) != 0)
1863 if (NumSrcElts >= NumDstElts) {
1866 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1867 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1872 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1873 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1878 auto *NewShuffleDstTy =
1903 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1905 if (NewCost > OldCost)
1913 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1914 NewInst->copyIRFlags(C0);
1915 NewInst->andIRFlags(C1);
1919 replaceValue(
I, *Cast);
1929bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1931 Value *OuterV0, *OuterV1;
1937 Value *X0, *X1, *Y0, *Y1;
1942 if (!Match0 && !Match1)
1945 X0 = Match0 ? X0 : OuterV0;
1946 Y0 = Match0 ? Y0 : OuterV0;
1947 X1 = Match1 ? X1 : OuterV1;
1948 Y1 = Match1 ? Y1 : OuterV1;
1949 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1950 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(X0->
getType());
1951 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(OuterV0->
getType());
1952 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1956 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1957 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1963 Value *NewX =
nullptr, *NewY =
nullptr;
1964 for (
int &M : NewMask) {
1965 Value *Src =
nullptr;
1966 if (0 <= M && M < (
int)NumImmElts) {
1970 Src =
M >= (int)NumSrcElts ? Y0 : X0;
1971 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
1973 }
else if (M >= (
int)NumImmElts) {
1978 Src =
M >= (int)NumSrcElts ? Y1 : X1;
1979 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
1983 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
1984 if (isa<UndefValue>(Src)) {
1987 if (!isa<PoisonValue>(Src))
1992 if (!NewX || NewX == Src) {
1996 if (!NewY || NewY == Src) {
2012 replaceValue(
I, *NewX);
2027 0,
nullptr, {OuterV0, OuterV1}, &
I);
2031 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2036 SK, ShuffleSrcTy, NewMask,
CostKind, 0,
nullptr, {NewX, NewY});
2038 NewCost += InnerCost0;
2040 NewCost += InnerCost1;
2043 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2045 if (NewCost > OldCost)
2049 replaceValue(
I, *Shuf);
2055bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
2062 auto *II0 = dyn_cast<IntrinsicInst>(V0);
2063 auto *II1 = dyn_cast<IntrinsicInst>(V1);
2068 if (IID != II1->getIntrinsicID())
2071 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2072 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
2073 if (!ShuffleDstTy || !II0Ty)
2079 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2081 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2092 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2094 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2096 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2098 VecTy->getNumElements() * 2));
2106 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2109 if (NewCost > OldCost)
2113 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2118 II1->getArgOperand(
I), OldMask);
2125 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2127 NewInst->andIRFlags(II1);
2130 replaceValue(
I, *NewIntrinsic);
2137 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2139 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2140 int M = SV->getMaskValue(Lane);
2143 if (
static_cast<unsigned>(M) < NumElts) {
2144 U = &SV->getOperandUse(0);
2147 U = &SV->getOperandUse(1);
2158 auto [U, Lane] = IL;
2171 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2172 unsigned NumElts = Ty->getNumElements();
2173 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2179 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2183 unsigned NumSlices = Item.
size() / NumElts;
2188 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2189 Use *SliceV = Item[Slice * NumElts].first;
2190 if (!SliceV || SliceV->get()->
getType() != Ty)
2192 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2193 auto [V, Lane] = Item[Slice * NumElts + Elt];
2194 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2207 auto [FrontU, FrontLane] = Item.
front();
2209 if (IdentityLeafs.
contains(FrontU)) {
2210 return FrontU->get();
2216 if (ConcatLeafs.
contains(FrontU)) {
2218 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2220 for (
unsigned S = 0; S < Values.
size(); ++S)
2221 Values[S] = Item[S * NumElts].first->get();
2223 while (Values.
size() > 1) {
2226 std::iota(Mask.begin(), Mask.end(), 0);
2228 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2236 auto *
I = cast<Instruction>(FrontU->get());
2237 auto *
II = dyn_cast<IntrinsicInst>(
I);
2238 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2240 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2247 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2252 for (
const auto &Lane : Item)
2258 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2264 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2265 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2269 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2274 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2285 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
2295bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
2296 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
2297 if (!Ty ||
I.use_empty())
2301 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
2307 unsigned NumVisited = 0;
2309 while (!Worklist.
empty()) {
2314 auto [FrontU, FrontLane] = Item.
front();
2322 return X->getType() ==
Y->getType() &&
2327 if (FrontLane == 0 &&
2328 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
2329 Ty->getNumElements() &&
2332 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
2333 E.value().second == (
int)E.index());
2335 IdentityLeafs.
insert(FrontU);
2339 if (
auto *
C = dyn_cast<Constant>(FrontU);
2340 C &&
C->getSplatValue() &&
2344 return !U || (isa<Constant>(
U->get()) &&
2345 cast<Constant>(
U->get())->getSplatValue() ==
2346 cast<Constant>(FrontV)->getSplatValue());
2348 SplatLeafs.
insert(FrontU);
2353 auto [FrontU, FrontLane] = Item.
front();
2354 auto [
U, Lane] = IL;
2355 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
2357 SplatLeafs.
insert(FrontU);
2363 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
2367 Value *
V = IL.first->get();
2368 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
2372 if (
auto *CI = dyn_cast<CmpInst>(V))
2373 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
2375 if (
auto *CI = dyn_cast<CastInst>(V))
2376 if (CI->getSrcTy()->getScalarType() !=
2377 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
2379 if (
auto *SI = dyn_cast<SelectInst>(V))
2380 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
2381 SI->getOperand(0)->getType() !=
2382 cast<SelectInst>(FrontV)->getOperand(0)->getType())
2384 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
2386 auto *
II = dyn_cast<IntrinsicInst>(V);
2387 return !
II || (isa<IntrinsicInst>(FrontV) &&
2388 II->getIntrinsicID() ==
2389 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
2390 !
II->hasOperandBundles());
2394 if (isa<BinaryOperator, CmpInst>(FrontU)) {
2396 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
2397 BO && BO->isIntDivRem())
2406 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
2408 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
2409 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
2410 if (DstTy && SrcTy &&
2411 SrcTy->getNumElements() == DstTy->getNumElements()) {
2415 }
else if (isa<SelectInst>(FrontU)) {
2420 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
2422 !
II->hasOperandBundles()) {
2423 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
2429 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
2430 cast<Instruction>(FrontV)->getOperand(
Op));
2442 ConcatLeafs.
insert(FrontU);
2449 if (NumVisited <= 1)
2452 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
2458 ConcatLeafs, Builder, &
TTI);
2459 replaceValue(
I, *V);
2466bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
2467 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2470 switch (
II->getIntrinsicID()) {
2471 case Intrinsic::vector_reduce_add:
2472 case Intrinsic::vector_reduce_mul:
2473 case Intrinsic::vector_reduce_and:
2474 case Intrinsic::vector_reduce_or:
2475 case Intrinsic::vector_reduce_xor:
2476 case Intrinsic::vector_reduce_smin:
2477 case Intrinsic::vector_reduce_smax:
2478 case Intrinsic::vector_reduce_umin:
2479 case Intrinsic::vector_reduce_umax:
2488 std::queue<Value *> Worklist;
2491 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2494 while (!Worklist.empty()) {
2495 Value *CV = Worklist.front();
2506 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2507 if (CI->isBinaryOp()) {
2508 for (
auto *
Op : CI->operand_values())
2511 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2512 if (Shuffle && Shuffle != SV)
2529 for (
auto *V : Visited)
2530 for (
auto *U :
V->users())
2531 if (!Visited.contains(U) && U != &
I)
2535 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2540 if (!ShuffleInputType)
2548 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2552 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2553 bool UsesSecondVec =
2554 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2557 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2563 VecTyForCost, ConcatMask,
CostKind);
2565 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2567 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2569 if (NewCost < OldCost) {
2573 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2574 replaceValue(*Shuffle, *NewShuffle);
2579 return foldSelectShuffle(*Shuffle,
true);
2586bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2587 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2591 bool TruncOnly =
false;
2594 case Intrinsic::vector_reduce_add:
2595 case Intrinsic::vector_reduce_mul:
2598 case Intrinsic::vector_reduce_and:
2599 case Intrinsic::vector_reduce_or:
2600 case Intrinsic::vector_reduce_xor:
2607 Value *ReductionSrc =
I.getOperand(0);
2617 auto *SrcTy = cast<VectorType>(Src->getType());
2618 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2619 Type *ResultTy =
I.getType();
2622 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2625 cast<CastInst>(ReductionSrc));
2632 if (OldCost <= NewCost || !NewCost.
isValid())
2636 II->getIntrinsicID(), {Src});
2638 replaceValue(
I, *NewCast);
2652bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2653 auto *SVI = cast<ShuffleVectorInst>(&
I);
2654 auto *VT = cast<FixedVectorType>(
I.getType());
2655 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2656 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2657 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2661 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2662 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2663 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2664 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2667 if (!
I ||
I->getOperand(0)->getType() != VT)
2670 return U != Op0 && U != Op1 &&
2671 !(isa<ShuffleVectorInst>(U) &&
2672 (InputShuffles.contains(cast<Instruction>(U)) ||
2673 isInstructionTriviallyDead(cast<Instruction>(U))));
2676 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2677 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2685 for (
auto *U :
I->users()) {
2686 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2687 if (!SV || SV->getType() != VT)
2689 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2690 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2697 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2701 if (FromReduction && Shuffles.
size() > 1)
2706 if (!FromReduction) {
2708 for (
auto *U : SV->users()) {
2711 Shuffles.push_back(SSV);
2723 int MaxV1Elt = 0, MaxV2Elt = 0;
2724 unsigned NumElts = VT->getNumElements();
2727 SVN->getShuffleMask(Mask);
2731 Value *SVOp0 = SVN->getOperand(0);
2732 Value *SVOp1 = SVN->getOperand(1);
2733 if (isa<UndefValue>(SVOp1)) {
2734 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2737 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2743 if (SVOp0 == Op1 && SVOp1 == Op0) {
2747 if (SVOp0 != Op0 || SVOp1 != Op1)
2754 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2757 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2758 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2759 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2760 return Mask[
I] ==
A.first;
2769 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2770 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2771 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2774 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2777 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2785 sort(ReconstructMask);
2786 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2793 if (V1.
empty() ||
V2.empty() ||
2794 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2795 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2802 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2805 if (isa<UndefValue>(SV->getOperand(1)))
2806 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2807 if (InputShuffles.contains(SSV))
2809 return SV->getMaskValue(M);
2817 std::pair<int, int>
Y) {
2818 int MXA = GetBaseMaskValue(
A,
X.first);
2819 int MYA = GetBaseMaskValue(
A,
Y.first);
2822 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2823 return SortBase(SVI0A,
A,
B);
2825 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2826 return SortBase(SVI1A,
A,
B);
2831 for (
const auto &Mask : OrigReconstructMasks) {
2833 for (
int M : Mask) {
2835 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2836 assert(It !=
V.end() &&
"Expected all entries in Mask");
2837 return std::distance(
V.begin(), It);
2841 else if (M <
static_cast<int>(NumElts)) {
2842 ReconstructMask.
push_back(FindIndex(V1, M));
2844 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2847 ReconstructMasks.push_back(std::move(ReconstructMask));
2853 for (
unsigned I = 0;
I < V1.
size();
I++) {
2854 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2855 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2857 for (
unsigned I = 0;
I <
V2.size();
I++) {
2858 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2859 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2861 while (V1A.
size() < NumElts) {
2865 while (V2A.
size() < NumElts) {
2871 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2877 VT, SV->getShuffleMask(),
CostKind);
2888 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2890 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2902 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2904 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2906 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2909 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2911 <<
" vs CostAfter: " << CostAfter <<
"\n");
2912 if (CostBefore <= CostAfter)
2917 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2920 if (isa<UndefValue>(SV->getOperand(1)))
2921 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2922 if (InputShuffles.contains(SSV))
2924 return SV->getOperand(
Op);
2928 GetShuffleOperand(SVI0A, 1), V1A);
2931 GetShuffleOperand(SVI0B, 1), V1B);
2934 GetShuffleOperand(SVI1A, 1), V2A);
2937 GetShuffleOperand(SVI1B, 1), V2B);
2941 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2942 I->copyIRFlags(Op0,
true);
2946 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2947 I->copyIRFlags(Op1,
true);
2949 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2952 replaceValue(*Shuffles[S], *NSV);
2955 Worklist.pushValue(NSV0A);
2956 Worklist.pushValue(NSV0B);
2957 Worklist.pushValue(NSV1A);
2958 Worklist.pushValue(NSV1B);
2959 for (
auto *S : Shuffles)
2971 Value *ZExted, *OtherOperand;
2977 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
2979 auto *BigTy = cast<FixedVectorType>(
I.getType());
2980 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
2981 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
2983 if (
I.getOpcode() == Instruction::LShr) {
3000 Instruction::ZExt, BigTy, SmallTy,
3001 TargetTransformInfo::CastContextHint::None,
CostKind);
3007 auto *UI = cast<Instruction>(U);
3013 ShrinkCost += ZExtCost;
3028 ShrinkCost += ZExtCost;
3033 if (!isa<Constant>(OtherOperand))
3035 Instruction::Trunc, SmallTy, BigTy,
3036 TargetTransformInfo::CastContextHint::None,
CostKind);
3041 if (ShrinkCost > CurrentCost)
3045 Value *Op0 = ZExted;
3048 if (
I.getOperand(0) == OtherOperand)
3052 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
3053 cast<Instruction>(NewBinOp)->copyMetadata(
I);
3055 replaceValue(
I, *NewZExtr);
3061bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
3062 Value *DstVec, *SrcVec;
3070 auto *VecTy = dyn_cast<FixedVectorType>(
I.getType());
3071 if (!VecTy || SrcVec->
getType() != VecTy)
3074 unsigned NumElts = VecTy->getNumElements();
3075 if (ExtIdx >= NumElts || InsIdx >= NumElts)
3081 if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3083 Mask[InsIdx] = ExtIdx;
3087 std::iota(
Mask.begin(),
Mask.end(), 0);
3088 Mask[InsIdx] = ExtIdx + NumElts;
3092 auto *
Ins = cast<InsertElementInst>(&
I);
3093 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
3106 if (!
Ext->hasOneUse())
3109 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
3110 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3113 if (OldCost < NewCost)
3117 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3123 replaceValue(
I, *Shuf);
3130bool VectorCombine::run() {
3140 bool MadeChange =
false;
3143 bool IsVectorType = isa<VectorType>(
I.getType());
3144 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
3145 auto Opcode =
I.getOpcode();
3153 if (IsFixedVectorType) {
3155 case Instruction::InsertElement:
3156 MadeChange |= vectorizeLoadInsert(
I);
3158 case Instruction::ShuffleVector:
3159 MadeChange |= widenSubvectorLoad(
I);
3169 MadeChange |= scalarizeBinopOrCmp(
I);
3170 MadeChange |= scalarizeLoadExtract(
I);
3171 MadeChange |= scalarizeVPIntrinsic(
I);
3174 if (Opcode == Instruction::Store)
3175 MadeChange |= foldSingleElementStore(
I);
3178 if (TryEarlyFoldsOnly)
3185 if (IsFixedVectorType) {
3187 case Instruction::InsertElement:
3188 MadeChange |= foldInsExtFNeg(
I);
3189 MadeChange |= foldInsExtVectorToShuffle(
I);
3191 case Instruction::ShuffleVector:
3192 MadeChange |= foldPermuteOfBinops(
I);
3193 MadeChange |= foldShuffleOfBinops(
I);
3194 MadeChange |= foldShuffleOfCastops(
I);
3195 MadeChange |= foldShuffleOfShuffles(
I);
3196 MadeChange |= foldShuffleOfIntrinsics(
I);
3197 MadeChange |= foldSelectShuffle(
I);
3198 MadeChange |= foldShuffleToIdentity(
I);
3200 case Instruction::BitCast:
3201 MadeChange |= foldBitcastShuffle(
I);
3204 MadeChange |= shrinkType(
I);
3209 case Instruction::Call:
3210 MadeChange |= foldShuffleFromReductions(
I);
3211 MadeChange |= foldCastFromReductions(
I);
3213 case Instruction::ICmp:
3214 case Instruction::FCmp:
3215 MadeChange |= foldExtractExtract(
I);
3217 case Instruction::Or:
3218 MadeChange |= foldConcatOfBoolMasks(
I);
3222 MadeChange |= foldExtractExtract(
I);
3223 MadeChange |= foldExtractedCmps(
I);
3236 if (
I.isDebugOrPseudoInst())
3242 while (!Worklist.isEmpty()) {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder, const TargetTransformInfo *TTI)
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static constexpr int Concat[]
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.