37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 bool TryEarlyFoldsOnly)
72 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
73 CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 bool TryEarlyFoldsOnly;
100 unsigned PreferredExtractIndex)
const;
104 unsigned PreferredExtractIndex);
127 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
132 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
153 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
154 V = BitCast->getOperand(0);
162 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
163 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
169 Type *ScalarTy = Load->getType()->getScalarType();
172 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
179bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
193 auto *
Load = dyn_cast<LoadInst>(
X);
205 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
206 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
208 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
209 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
210 unsigned OffsetEltIndex = 0;
218 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
229 uint64_t ScalarSizeInBytes = ScalarSize / 8;
230 if (
Offset.urem(ScalarSizeInBytes) != 0)
234 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
235 if (OffsetEltIndex >= MinVecNumElts)
252 unsigned AS =
Load->getPointerAddressSpace();
270 auto *Ty = cast<FixedVectorType>(
I.getType());
271 unsigned OutputNumElts = Ty->getNumElements();
273 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
274 Mask[0] = OffsetEltIndex;
281 if (OldCost < NewCost || !NewCost.
isValid())
292 replaceValue(
I, *VecLd);
302 auto *Shuf = cast<ShuffleVectorInst>(&
I);
303 if (!Shuf->isIdentityWithPadding())
308 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
309 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
310 return M >= (int)(NumOpElts);
313 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
320 auto *Ty = cast<FixedVectorType>(
I.getType());
321 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
322 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
329 unsigned AS =
Load->getPointerAddressSpace();
344 if (OldCost < NewCost || !NewCost.
isValid())
351 replaceValue(
I, *VecLd);
363 assert(Index0C && Index1C &&
"Expected constant extract indexes");
365 unsigned Index0 = Index0C->getZExtValue();
366 unsigned Index1 = Index1C->getZExtValue();
369 if (Index0 == Index1)
393 if (PreferredExtractIndex == Index0)
395 if (PreferredExtractIndex == Index1)
399 return Index0 > Index1 ? Ext0 : Ext1;
411 unsigned PreferredExtractIndex) {
414 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
416 unsigned Opcode =
I.getOpcode();
420 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
429 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
430 "Expected a compare");
440 unsigned Ext0Index = Ext0IndexC->getZExtValue();
441 unsigned Ext1Index = Ext1IndexC->getZExtValue();
455 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
456 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
457 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
462 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
467 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
469 OldCost = CheapExtractCost + ScalarOpCost;
470 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
474 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
475 NewCost = VectorOpCost + CheapExtractCost +
480 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
481 if (ConvertToShuffle) {
492 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
495 ShuffleMask[BestInsIndex] = BestExtIndex;
497 VecTy, ShuffleMask,
CostKind, 0,
nullptr,
502 {},
CostKind, 0,
nullptr, {ConvertToShuffle});
509 return OldCost < NewCost;
519 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
521 ShufMask[NewIndex] = OldIndex;
534 if (!isa<FixedVectorType>(
X->getType()))
540 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
541 if (isa<Constant>(
X))
554 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
557 "Expected matching constant extract indexes");
565 replaceValue(
I, *NewExt);
573 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
576 "Expected matching constant extract indexes");
586 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
587 VecBOInst->copyIRFlags(&
I);
590 replaceValue(
I, *NewExt);
618 auto *Ext0 = cast<ExtractElementInst>(I0);
619 auto *Ext1 = cast<ExtractElementInst>(I1);
626 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
629 if (ExtractToChange) {
630 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
635 if (ExtractToChange == Ext0)
642 foldExtExtCmp(Ext0, Ext1,
I);
644 foldExtExtBinop(Ext0, Ext1,
I);
670 auto *VecTy = cast<FixedVectorType>(
I.getType());
672 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
673 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
677 unsigned NumElts = VecTy->getNumElements();
678 if (Index >= NumElts)
685 std::iota(
Mask.begin(),
Mask.end(), 0);
701 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
712 if (NewCost > OldCost)
727 replaceValue(
I, *NewShuf);
746 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
747 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
748 if (!DestTy || !SrcTy)
751 unsigned DestEltSize = DestTy->getScalarSizeInBits();
752 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
753 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
756 bool IsUnary = isa<UndefValue>(V1);
763 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
764 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
769 if (DestEltSize <= SrcEltSize) {
772 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
773 unsigned ScaleFactor = SrcEltSize / DestEltSize;
778 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
779 unsigned ScaleFactor = DestEltSize / SrcEltSize;
786 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
791 unsigned NumOps = IsUnary ? 1 : 2;
801 TargetTransformInfo::CastContextHint::None,
806 TargetTransformInfo::CastContextHint::None,
809 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
810 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
812 if (NewCost > OldCost || !NewCost.
isValid())
820 replaceValue(
I, *Shuf);
827bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
828 if (!isa<VPIntrinsic>(
I))
841 if (!ScalarOp0 || !ScalarOp1)
849 auto IsAllTrueMask = [](
Value *MaskVal) {
851 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
852 return ConstValue->isAllOnesValue();
867 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
868 Mask.resize(FVTy->getNumElements(), 0);
877 Args.push_back(
V->getType());
883 std::optional<unsigned> FunctionalOpcode =
885 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
886 if (!FunctionalOpcode) {
910 <<
", Cost of scalarizing:" << NewCost <<
"\n");
913 if (OldCost < NewCost || !NewCost.
isValid())
924 bool SafeToSpeculate;
927 .
hasFnAttr(Attribute::AttrKind::Speculatable);
930 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
931 if (!SafeToSpeculate &&
938 {ScalarOp0, ScalarOp1})
940 ScalarOp0, ScalarOp1);
948bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
959 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
961 for (
User *U :
I.users())
971 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
972 Value *V0 =
nullptr, *V1 =
nullptr;
985 if (IsConst0 && IsConst1)
987 if (!IsConst0 && !IsConst1 && Index0 != Index1)
990 auto *VecTy0 = cast<VectorType>(Ins0->
getType());
991 auto *VecTy1 = cast<VectorType>(Ins1->
getType());
992 if (VecTy0->getElementCount().getKnownMinValue() <= Index0 ||
993 VecTy1->getElementCount().getKnownMinValue() <= Index1)
998 auto *I0 = dyn_cast_or_null<Instruction>(V0);
999 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
1000 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
1006 Type *VecTy =
I.getType();
1011 "Unexpected types for insert element into binop or cmp");
1013 unsigned Opcode =
I.getOpcode();
1029 Instruction::InsertElement, VecTy,
CostKind, Index);
1031 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
1033 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
1034 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
1037 if (OldCost < NewCost || !NewCost.
isValid())
1057 Scalar->setName(
I.getName() +
".scalar");
1061 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1062 ScalarInst->copyIRFlags(&
I);
1066 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1069 replaceValue(
I, *Insert);
1077 auto *BI = dyn_cast<BinaryOperator>(&
I);
1081 if (!BI || !
I.getType()->isIntegerTy(1))
1086 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1104 auto *Ext0 = cast<ExtractElementInst>(I0);
1105 auto *Ext1 = cast<ExtractElementInst>(I1);
1109 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1110 "Unknown ExtractElementInst");
1115 unsigned CmpOpcode =
1117 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1130 Ext0Cost + Ext1Cost + CmpCost * 2 +
1136 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1137 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1143 ShufMask[CheapIndex] = ExpensiveIndex;
1148 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1149 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1154 if (OldCost < NewCost || !NewCost.
isValid())
1164 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1165 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1168 replaceValue(
I, *NewExt);
1177 unsigned NumScanned = 0;
1187class ScalarizationResult {
1188 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1193 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1197 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1198 ~ScalarizationResult() {
1199 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1202 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1203 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1204 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1205 return {StatusTy::SafeWithFreeze, ToFreeze};
1209 bool isSafe()
const {
return Status == StatusTy::Safe; }
1211 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1214 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1219 Status = StatusTy::Unsafe;
1224 assert(isSafeWithFreeze() &&
1225 "should only be used when freezing is required");
1227 "UserI must be a user of ToFreeze");
1233 if (
U.get() == ToFreeze)
1250 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1252 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1253 if (
C->getValue().ult(NumElements))
1254 return ScalarizationResult::safe();
1255 return ScalarizationResult::unsafe();
1258 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1259 APInt Zero(IntWidth, 0);
1260 APInt MaxElts(IntWidth, NumElements);
1266 true, &AC, CtxI, &DT)))
1267 return ScalarizationResult::safe();
1268 return ScalarizationResult::unsafe();
1281 if (ValidIndices.
contains(IdxRange))
1282 return ScalarizationResult::safeWithFreeze(IdxBase);
1283 return ScalarizationResult::unsafe();
1293 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1295 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1307bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1308 auto *
SI = cast<StoreInst>(&
I);
1309 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1317 if (!
match(
SI->getValueOperand(),
1322 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1323 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1324 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1327 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1328 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1329 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1333 if (ScalarizableIdx.isUnsafe() ||
1338 if (ScalarizableIdx.isSafeWithFreeze())
1339 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1341 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1342 {ConstantInt::get(Idx->getType(), 0), Idx});
1349 replaceValue(
I, *NSI);
1358bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1363 auto *VecTy = cast<VectorType>(
I.getType());
1364 auto *LI = cast<LoadInst>(&
I);
1365 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1370 LI->getPointerAddressSpace(),
CostKind);
1374 unsigned NumInstChecked = 0;
1378 for (
auto &Pair : NeedFreeze)
1379 Pair.second.discard();
1386 auto *UI = dyn_cast<ExtractElementInst>(U);
1387 if (!UI || UI->getParent() != LI->getParent())
1394 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1401 LastCheckedInst = UI;
1405 if (ScalarIdx.isUnsafe())
1407 if (ScalarIdx.isSafeWithFreeze()) {
1409 ScalarIdx.discard();
1412 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1415 Index ?
Index->getZExtValue() : -1);
1422 if (ScalarizedCost >= OriginalCost)
1427 auto *EI = cast<ExtractElementInst>(U);
1431 auto It = NeedFreeze.
find(EI);
1432 if (It != NeedFreeze.
end())
1433 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1438 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1439 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1442 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1443 NewLoad->setAlignment(ScalarOpAlignment);
1445 replaceValue(*EI, *NewLoad);
1448 FailureGuard.release();
1455bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
1456 Type *Ty =
I.getType();
1461 if (
DL->isBigEndian())
1488 if (ShAmtX > ShAmtY) {
1496 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
1497 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
1499 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
1502 MaskTy->getNumElements() != ShAmtDiff ||
1503 MaskTy->getNumElements() > (
BitWidth / 2))
1512 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
1529 if (Ty != ConcatIntTy)
1535 if (NewCost > OldCost)
1545 if (Ty != ConcatIntTy) {
1555 replaceValue(
I, *Result);
1561bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
1585 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1586 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
1587 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
1588 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
1589 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
1592 unsigned NumSrcElts = BinOpTy->getNumElements();
1596 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
1597 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
1602 for (
int M : OuterMask) {
1603 if (M < 0 || M >= (
int)NumSrcElts) {
1616 OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I) +
1618 CostKind, 0,
nullptr, {Op00, Op01},
1621 CostKind, 0,
nullptr, {Op10, Op11},
1626 CostKind, 0,
nullptr, {Op00, Op01}) +
1628 CostKind, 0,
nullptr, {Op10, Op11}) +
1631 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
1632 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1636 if (NewCost > OldCost)
1644 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
1645 NewInst->copyIRFlags(BinOp);
1649 replaceValue(
I, *NewBO);
1655bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1663 if (
LHS->getOpcode() !=
RHS->getOpcode())
1667 bool IsCommutative =
false;
1671 auto *BO = cast<BinaryOperator>(LHS);
1675 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
1678 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
1682 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1683 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
1684 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
1685 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
1688 unsigned NumSrcElts = BinOpTy->getNumElements();
1691 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
1694 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1695 if (M >= (
int)NumSrcElts)
1730 auto *ShuffleCmpTy =
1737 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1742 bool ReducedInstCount = (isa<Constant>(
X) && isa<Constant>(Z)) ||
1743 (isa<Constant>(
Y) && isa<Constant>(W));
1744 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
1751 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
1752 : Builder.
CreateCmp(Pred, Shuf0, Shuf1);
1755 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1756 NewInst->copyIRFlags(LHS);
1757 NewInst->andIRFlags(RHS);
1762 replaceValue(
I, *NewBO);
1768bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1774 auto *C0 = dyn_cast<CastInst>(V0);
1775 auto *C1 = dyn_cast<CastInst>(V1);
1780 if (C0->getSrcTy() != C1->getSrcTy())
1784 if (Opcode != C1->getOpcode()) {
1786 Opcode = Instruction::SExt;
1791 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1792 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1793 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1794 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1797 unsigned NumSrcElts = CastSrcTy->getNumElements();
1798 unsigned NumDstElts = CastDstTy->getNumElements();
1799 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1800 "Only bitcasts expected to alter src/dst element counts");
1804 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1805 (NumDstElts % NumSrcElts) != 0)
1809 if (NumSrcElts >= NumDstElts) {
1812 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1813 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1818 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1819 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1824 auto *NewShuffleDstTy =
1849 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1851 if (NewCost > OldCost)
1859 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1860 NewInst->copyIRFlags(C0);
1861 NewInst->andIRFlags(C1);
1865 replaceValue(
I, *Cast);
1874bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1876 Value *OuterV0, *OuterV1;
1882 Value *V0 =
nullptr, *V1 =
nullptr;
1884 bool Match0 =
match(
1886 bool Match1 =
match(
1888 if (!Match0 && !Match1)
1891 V0 = Match0 ? V0 : OuterV0;
1892 V1 = Match1 ? V1 : OuterV1;
1893 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1894 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1895 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(
I.getOperand(0)->getType());
1896 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1900 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1901 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1904 if ((Match0 && !isa<PoisonValue>(U0) &&
1905 any_of(InnerMask0, [&](
int M) {
return M >= (int)NumSrcElts; })) ||
1906 (Match1 && !isa<PoisonValue>(U1) &&
1907 any_of(InnerMask1, [&](
int M) {
return M >= (int)NumSrcElts; })))
1912 for (
int &M : NewMask) {
1913 if (0 <= M && M < (
int)NumImmElts) {
1916 }
else if (M >= (
int)NumImmElts) {
1918 if (InnerMask1[M - NumImmElts] >= (
int)NumSrcElts)
1921 M = InnerMask1[
M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
1928 replaceValue(
I, *V0);
1937 CostKind, 0,
nullptr, {V0, U0}, cast<ShuffleVectorInst>(OuterV0));
1943 CostKind, 0,
nullptr, {V1, U1}, cast<ShuffleVectorInst>(OuterV1));
1947 0,
nullptr, {OuterV0, OuterV1}, &
I);
1953 NewMask,
CostKind, 0,
nullptr, {V0, V1});
1955 NewCost += InnerCost0;
1957 NewCost += InnerCost1;
1960 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1962 if (NewCost > OldCost)
1966 if (
none_of(NewMask, [&](
int M) {
return 0 <=
M &&
M < (int)NumSrcElts; }))
1968 if (
none_of(NewMask, [&](
int M) {
return (
int)NumSrcElts <= M; }))
1972 replaceValue(
I, *Shuf);
1978bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
1985 auto *II0 = dyn_cast<IntrinsicInst>(V0);
1986 auto *II1 = dyn_cast<IntrinsicInst>(V1);
1991 if (IID != II1->getIntrinsicID())
1994 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1995 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
1996 if (!ShuffleDstTy || !II0Ty)
2002 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2004 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2015 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2017 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2019 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2021 VecTy->getNumElements() * 2));
2029 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2032 if (NewCost > OldCost)
2036 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2041 II1->getArgOperand(
I), OldMask);
2048 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2050 NewInst->andIRFlags(II1);
2053 replaceValue(
I, *NewIntrinsic);
2060 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2062 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2063 int M = SV->getMaskValue(Lane);
2066 if (
static_cast<unsigned>(M) < NumElts) {
2067 U = &SV->getOperandUse(0);
2070 U = &SV->getOperandUse(1);
2081 auto [U, Lane] = IL;
2094 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2095 unsigned NumElts = Ty->getNumElements();
2096 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2102 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2106 unsigned NumSlices = Item.
size() / NumElts;
2111 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2112 Use *SliceV = Item[Slice * NumElts].first;
2113 if (!SliceV || SliceV->get()->
getType() != Ty)
2115 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2116 auto [V, Lane] = Item[Slice * NumElts + Elt];
2117 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2130 auto [FrontU, FrontLane] = Item.
front();
2132 if (IdentityLeafs.
contains(FrontU)) {
2133 return FrontU->get();
2139 if (ConcatLeafs.
contains(FrontU)) {
2141 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2143 for (
unsigned S = 0; S < Values.
size(); ++S)
2144 Values[S] = Item[S * NumElts].first->get();
2146 while (Values.
size() > 1) {
2149 std::iota(Mask.begin(), Mask.end(), 0);
2151 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2159 auto *
I = cast<Instruction>(FrontU->get());
2160 auto *
II = dyn_cast<IntrinsicInst>(
I);
2161 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2163 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2170 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2175 for (
const auto &Lane : Item)
2181 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2187 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2188 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2192 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2197 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2208 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
2218bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
2219 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
2220 if (!Ty ||
I.use_empty())
2224 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
2230 unsigned NumVisited = 0;
2232 while (!Worklist.
empty()) {
2237 auto [FrontU, FrontLane] = Item.
front();
2245 return X->getType() ==
Y->getType() &&
2250 if (FrontLane == 0 &&
2251 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
2252 Ty->getNumElements() &&
2255 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
2256 E.value().second == (
int)E.index());
2258 IdentityLeafs.
insert(FrontU);
2262 if (
auto *
C = dyn_cast<Constant>(FrontU);
2263 C &&
C->getSplatValue() &&
2267 return !U || (isa<Constant>(
U->get()) &&
2268 cast<Constant>(
U->get())->getSplatValue() ==
2269 cast<Constant>(FrontV)->getSplatValue());
2271 SplatLeafs.
insert(FrontU);
2276 auto [FrontU, FrontLane] = Item.
front();
2277 auto [
U, Lane] = IL;
2278 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
2280 SplatLeafs.
insert(FrontU);
2286 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
2290 Value *
V = IL.first->get();
2291 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
2295 if (
auto *CI = dyn_cast<CmpInst>(V))
2296 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
2298 if (
auto *CI = dyn_cast<CastInst>(V))
2299 if (CI->getSrcTy()->getScalarType() !=
2300 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
2302 if (
auto *SI = dyn_cast<SelectInst>(V))
2303 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
2304 SI->getOperand(0)->getType() !=
2305 cast<SelectInst>(FrontV)->getOperand(0)->getType())
2307 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
2309 auto *
II = dyn_cast<IntrinsicInst>(V);
2310 return !
II || (isa<IntrinsicInst>(FrontV) &&
2311 II->getIntrinsicID() ==
2312 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
2313 !
II->hasOperandBundles());
2317 if (isa<BinaryOperator, CmpInst>(FrontU)) {
2319 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
2320 BO && BO->isIntDivRem())
2329 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
2331 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
2332 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
2333 if (DstTy && SrcTy &&
2334 SrcTy->getNumElements() == DstTy->getNumElements()) {
2338 }
else if (isa<SelectInst>(FrontU)) {
2343 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
2345 !
II->hasOperandBundles()) {
2346 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
2352 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
2353 cast<Instruction>(FrontV)->getOperand(
Op));
2365 ConcatLeafs.
insert(FrontU);
2372 if (NumVisited <= 1)
2379 ConcatLeafs, Builder, &
TTI);
2380 replaceValue(
I, *V);
2387bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
2388 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2391 switch (
II->getIntrinsicID()) {
2392 case Intrinsic::vector_reduce_add:
2393 case Intrinsic::vector_reduce_mul:
2394 case Intrinsic::vector_reduce_and:
2395 case Intrinsic::vector_reduce_or:
2396 case Intrinsic::vector_reduce_xor:
2397 case Intrinsic::vector_reduce_smin:
2398 case Intrinsic::vector_reduce_smax:
2399 case Intrinsic::vector_reduce_umin:
2400 case Intrinsic::vector_reduce_umax:
2409 std::queue<Value *> Worklist;
2412 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2415 while (!Worklist.empty()) {
2416 Value *CV = Worklist.front();
2427 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2428 if (CI->isBinaryOp()) {
2429 for (
auto *
Op : CI->operand_values())
2432 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2433 if (Shuffle && Shuffle != SV)
2450 for (
auto *V : Visited)
2451 for (
auto *U :
V->users())
2452 if (!Visited.contains(U) && U != &
I)
2456 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2461 if (!ShuffleInputType)
2469 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2473 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2474 bool UsesSecondVec =
2475 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2478 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2484 VecTyForCost, ConcatMask,
CostKind);
2486 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2488 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2490 if (NewCost < OldCost) {
2494 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2495 replaceValue(*Shuffle, *NewShuffle);
2500 return foldSelectShuffle(*Shuffle,
true);
2507bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2508 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2512 bool TruncOnly =
false;
2515 case Intrinsic::vector_reduce_add:
2516 case Intrinsic::vector_reduce_mul:
2519 case Intrinsic::vector_reduce_and:
2520 case Intrinsic::vector_reduce_or:
2521 case Intrinsic::vector_reduce_xor:
2528 Value *ReductionSrc =
I.getOperand(0);
2538 auto *SrcTy = cast<VectorType>(Src->getType());
2539 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2540 Type *ResultTy =
I.getType();
2543 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2546 cast<CastInst>(ReductionSrc));
2553 if (OldCost <= NewCost || !NewCost.
isValid())
2557 II->getIntrinsicID(), {Src});
2559 replaceValue(
I, *NewCast);
2573bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2574 auto *SVI = cast<ShuffleVectorInst>(&
I);
2575 auto *VT = cast<FixedVectorType>(
I.getType());
2576 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2577 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2578 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2582 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2583 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2584 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2585 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2588 if (!
I ||
I->getOperand(0)->getType() != VT)
2591 return U != Op0 && U != Op1 &&
2592 !(isa<ShuffleVectorInst>(U) &&
2593 (InputShuffles.contains(cast<Instruction>(U)) ||
2594 isInstructionTriviallyDead(cast<Instruction>(U))));
2597 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2598 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2606 for (
auto *U :
I->users()) {
2607 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2608 if (!SV || SV->getType() != VT)
2610 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2611 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2618 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2622 if (FromReduction && Shuffles.
size() > 1)
2627 if (!FromReduction) {
2629 for (
auto *U : SV->users()) {
2632 Shuffles.push_back(SSV);
2644 int MaxV1Elt = 0, MaxV2Elt = 0;
2645 unsigned NumElts = VT->getNumElements();
2648 SVN->getShuffleMask(Mask);
2652 Value *SVOp0 = SVN->getOperand(0);
2653 Value *SVOp1 = SVN->getOperand(1);
2654 if (isa<UndefValue>(SVOp1)) {
2655 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2658 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2664 if (SVOp0 == Op1 && SVOp1 == Op0) {
2668 if (SVOp0 != Op0 || SVOp1 != Op1)
2675 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2678 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2679 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2680 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2681 return Mask[
I] ==
A.first;
2690 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2691 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2692 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2695 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2698 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2706 sort(ReconstructMask);
2707 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2714 if (V1.
empty() ||
V2.empty() ||
2715 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2716 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2723 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2726 if (isa<UndefValue>(SV->getOperand(1)))
2727 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2728 if (InputShuffles.contains(SSV))
2730 return SV->getMaskValue(M);
2738 std::pair<int, int>
Y) {
2739 int MXA = GetBaseMaskValue(
A,
X.first);
2740 int MYA = GetBaseMaskValue(
A,
Y.first);
2743 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2744 return SortBase(SVI0A,
A,
B);
2746 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2747 return SortBase(SVI1A,
A,
B);
2752 for (
const auto &Mask : OrigReconstructMasks) {
2754 for (
int M : Mask) {
2756 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2757 assert(It !=
V.end() &&
"Expected all entries in Mask");
2758 return std::distance(
V.begin(), It);
2762 else if (M <
static_cast<int>(NumElts)) {
2763 ReconstructMask.
push_back(FindIndex(V1, M));
2765 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2768 ReconstructMasks.push_back(std::move(ReconstructMask));
2774 for (
unsigned I = 0;
I < V1.
size();
I++) {
2775 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2776 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2778 for (
unsigned I = 0;
I <
V2.size();
I++) {
2779 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2780 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2782 while (V1A.
size() < NumElts) {
2786 while (V2A.
size() < NumElts) {
2792 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2798 VT, SV->getShuffleMask(),
CostKind);
2809 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2811 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2823 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2825 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2827 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2830 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2832 <<
" vs CostAfter: " << CostAfter <<
"\n");
2833 if (CostBefore <= CostAfter)
2838 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2841 if (isa<UndefValue>(SV->getOperand(1)))
2842 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2843 if (InputShuffles.contains(SSV))
2845 return SV->getOperand(
Op);
2849 GetShuffleOperand(SVI0A, 1), V1A);
2852 GetShuffleOperand(SVI0B, 1), V1B);
2855 GetShuffleOperand(SVI1A, 1), V2A);
2858 GetShuffleOperand(SVI1B, 1), V2B);
2862 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2863 I->copyIRFlags(Op0,
true);
2867 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2868 I->copyIRFlags(Op1,
true);
2870 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2873 replaceValue(*Shuffles[S], *NSV);
2876 Worklist.pushValue(NSV0A);
2877 Worklist.pushValue(NSV0B);
2878 Worklist.pushValue(NSV1A);
2879 Worklist.pushValue(NSV1B);
2880 for (
auto *S : Shuffles)
2892 Value *ZExted, *OtherOperand;
2898 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
2900 auto *BigTy = cast<FixedVectorType>(
I.getType());
2901 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
2902 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
2904 if (
I.getOpcode() == Instruction::LShr) {
2921 Instruction::ZExt, BigTy, SmallTy,
2922 TargetTransformInfo::CastContextHint::None,
CostKind);
2928 auto *UI = cast<Instruction>(U);
2934 ShrinkCost += ZExtCost;
2949 ShrinkCost += ZExtCost;
2954 if (!isa<Constant>(OtherOperand))
2956 Instruction::Trunc, SmallTy, BigTy,
2957 TargetTransformInfo::CastContextHint::None,
CostKind);
2962 if (ShrinkCost > CurrentCost)
2966 Value *Op0 = ZExted;
2969 if (
I.getOperand(0) == OtherOperand)
2973 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
2974 cast<Instruction>(NewBinOp)->copyMetadata(
I);
2976 replaceValue(
I, *NewZExtr);
2982bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
2983 Value *DstVec, *SrcVec;
2991 auto *VecTy = dyn_cast<FixedVectorType>(
I.getType());
2992 if (!VecTy || SrcVec->
getType() != VecTy)
2995 unsigned NumElts = VecTy->getNumElements();
2996 if (ExtIdx >= NumElts || InsIdx >= NumElts)
3000 std::iota(
Mask.begin(),
Mask.end(), 0);
3001 Mask[InsIdx] = ExtIdx + NumElts;
3003 auto *
Ins = cast<InsertElementInst>(&
I);
3004 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
3012 if (!
Ext->hasOneUse())
3015 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair : " <<
I
3016 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3019 if (OldCost < NewCost)
3023 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3029 replaceValue(
I, *Shuf);
3036bool VectorCombine::run() {
3046 bool MadeChange =
false;
3049 bool IsVectorType = isa<VectorType>(
I.getType());
3050 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
3051 auto Opcode =
I.getOpcode();
3059 if (IsFixedVectorType) {
3061 case Instruction::InsertElement:
3062 MadeChange |= vectorizeLoadInsert(
I);
3064 case Instruction::ShuffleVector:
3065 MadeChange |= widenSubvectorLoad(
I);
3075 MadeChange |= scalarizeBinopOrCmp(
I);
3076 MadeChange |= scalarizeLoadExtract(
I);
3077 MadeChange |= scalarizeVPIntrinsic(
I);
3080 if (Opcode == Instruction::Store)
3081 MadeChange |= foldSingleElementStore(
I);
3084 if (TryEarlyFoldsOnly)
3091 if (IsFixedVectorType) {
3093 case Instruction::InsertElement:
3094 MadeChange |= foldInsExtFNeg(
I);
3095 MadeChange |= foldInsExtVectorToShuffle(
I);
3097 case Instruction::ShuffleVector:
3098 MadeChange |= foldPermuteOfBinops(
I);
3099 MadeChange |= foldShuffleOfBinops(
I);
3100 MadeChange |= foldShuffleOfCastops(
I);
3101 MadeChange |= foldShuffleOfShuffles(
I);
3102 MadeChange |= foldShuffleOfIntrinsics(
I);
3103 MadeChange |= foldSelectShuffle(
I);
3104 MadeChange |= foldShuffleToIdentity(
I);
3106 case Instruction::BitCast:
3107 MadeChange |= foldBitcastShuffle(
I);
3110 MadeChange |= shrinkType(
I);
3115 case Instruction::Call:
3116 MadeChange |= foldShuffleFromReductions(
I);
3117 MadeChange |= foldCastFromReductions(
I);
3119 case Instruction::ICmp:
3120 case Instruction::FCmp:
3121 MadeChange |= foldExtractExtract(
I);
3123 case Instruction::Or:
3124 MadeChange |= foldConcatOfBoolMasks(
I);
3128 MadeChange |= foldExtractExtract(
I);
3129 MadeChange |= foldExtractedCmps(
I);
3142 if (
I.isDebugOrPseudoInst())
3148 while (!Worklist.isEmpty()) {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder, const TargetTransformInfo *TTI)
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static constexpr int Concat[]
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
'undef' values are things that do not have specified contents.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< UndefValue > m_UndefValue()
Match an arbitrary UndefValue constant.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.