37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 bool TryEarlyFoldsOnly)
72 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
73 CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 bool TryEarlyFoldsOnly;
100 unsigned PreferredExtractIndex)
const;
104 unsigned PreferredExtractIndex);
127 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
132 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
153 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
154 V = BitCast->getOperand(0);
162 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
163 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
169 Type *ScalarTy = Load->getType()->getScalarType();
172 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
179bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
193 auto *
Load = dyn_cast<LoadInst>(
X);
205 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
206 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
208 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
209 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
210 unsigned OffsetEltIndex = 0;
218 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
229 uint64_t ScalarSizeInBytes = ScalarSize / 8;
230 if (
Offset.urem(ScalarSizeInBytes) != 0)
234 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
235 if (OffsetEltIndex >= MinVecNumElts)
252 unsigned AS =
Load->getPointerAddressSpace();
270 auto *Ty = cast<FixedVectorType>(
I.getType());
271 unsigned OutputNumElts = Ty->getNumElements();
273 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
274 Mask[0] = OffsetEltIndex;
281 if (OldCost < NewCost || !NewCost.
isValid())
292 replaceValue(
I, *VecLd);
302 auto *Shuf = cast<ShuffleVectorInst>(&
I);
303 if (!Shuf->isIdentityWithPadding())
308 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
309 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
310 return M >= (int)(NumOpElts);
313 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
320 auto *Ty = cast<FixedVectorType>(
I.getType());
321 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
322 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
329 unsigned AS =
Load->getPointerAddressSpace();
344 if (OldCost < NewCost || !NewCost.
isValid())
351 replaceValue(
I, *VecLd);
363 assert(Index0C && Index1C &&
"Expected constant extract indexes");
365 unsigned Index0 = Index0C->getZExtValue();
366 unsigned Index1 = Index1C->getZExtValue();
369 if (Index0 == Index1)
393 if (PreferredExtractIndex == Index0)
395 if (PreferredExtractIndex == Index1)
399 return Index0 > Index1 ? Ext0 : Ext1;
411 unsigned PreferredExtractIndex) {
414 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
416 unsigned Opcode =
I.getOpcode();
420 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
429 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
430 "Expected a compare");
440 unsigned Ext0Index = Ext0IndexC->getZExtValue();
441 unsigned Ext1Index = Ext1IndexC->getZExtValue();
455 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
456 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
457 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
462 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
467 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
469 OldCost = CheapExtractCost + ScalarOpCost;
470 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
474 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
475 NewCost = VectorOpCost + CheapExtractCost +
480 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
481 if (ConvertToShuffle) {
492 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
495 ShuffleMask[BestInsIndex] = BestExtIndex;
497 VecTy, ShuffleMask,
CostKind, 0,
nullptr,
502 {},
CostKind, 0,
nullptr, {ConvertToShuffle});
509 return OldCost < NewCost;
519 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
521 ShufMask[NewIndex] = OldIndex;
534 if (!isa<FixedVectorType>(
X->getType()))
540 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
541 if (isa<Constant>(
X))
554 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
557 "Expected matching constant extract indexes");
565 replaceValue(
I, *NewExt);
573 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
576 "Expected matching constant extract indexes");
586 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
587 VecBOInst->copyIRFlags(&
I);
590 replaceValue(
I, *NewExt);
618 auto *Ext0 = cast<ExtractElementInst>(I0);
619 auto *Ext1 = cast<ExtractElementInst>(I1);
626 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
629 if (ExtractToChange) {
630 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
635 if (ExtractToChange == Ext0)
642 foldExtExtCmp(Ext0, Ext1,
I);
644 foldExtExtBinop(Ext0, Ext1,
I);
670 auto *VecTy = cast<FixedVectorType>(
I.getType());
672 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
673 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
677 unsigned NumElts = VecTy->getNumElements();
678 if (Index >= NumElts)
685 std::iota(
Mask.begin(),
Mask.end(), 0);
701 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
712 if (NewCost > OldCost)
727 replaceValue(
I, *NewShuf);
746 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
747 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
748 if (!DestTy || !SrcTy)
751 unsigned DestEltSize = DestTy->getScalarSizeInBits();
752 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
753 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
756 bool IsUnary = isa<UndefValue>(V1);
763 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
764 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
769 if (DestEltSize <= SrcEltSize) {
772 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
773 unsigned ScaleFactor = SrcEltSize / DestEltSize;
778 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
779 unsigned ScaleFactor = DestEltSize / SrcEltSize;
786 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
791 unsigned NumOps = IsUnary ? 1 : 2;
801 TargetTransformInfo::CastContextHint::None,
806 TargetTransformInfo::CastContextHint::None,
809 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
810 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
812 if (NewCost > OldCost || !NewCost.
isValid())
820 replaceValue(
I, *Shuf);
827bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
828 if (!isa<VPIntrinsic>(
I))
841 if (!ScalarOp0 || !ScalarOp1)
849 auto IsAllTrueMask = [](
Value *MaskVal) {
851 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
852 return ConstValue->isAllOnesValue();
867 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
868 Mask.resize(FVTy->getNumElements(), 0);
877 Args.push_back(
V->getType());
883 std::optional<unsigned> FunctionalOpcode =
885 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
886 if (!FunctionalOpcode) {
910 <<
", Cost of scalarizing:" << NewCost <<
"\n");
913 if (OldCost < NewCost || !NewCost.
isValid())
924 bool SafeToSpeculate;
927 .
hasFnAttr(Attribute::AttrKind::Speculatable);
930 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
931 if (!SafeToSpeculate &&
938 {ScalarOp0, ScalarOp1})
940 ScalarOp0, ScalarOp1);
948bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
959 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
961 for (
User *U :
I.users())
971 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
972 Value *V0 =
nullptr, *V1 =
nullptr;
985 if (IsConst0 && IsConst1)
987 if (!IsConst0 && !IsConst1 && Index0 != Index1)
990 auto *VecTy0 = cast<VectorType>(Ins0->
getType());
991 auto *VecTy1 = cast<VectorType>(Ins1->
getType());
992 if (VecTy0->getElementCount().getKnownMinValue() <= Index0 ||
993 VecTy1->getElementCount().getKnownMinValue() <= Index1)
998 auto *I0 = dyn_cast_or_null<Instruction>(V0);
999 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
1000 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
1006 Type *VecTy =
I.getType();
1011 "Unexpected types for insert element into binop or cmp");
1013 unsigned Opcode =
I.getOpcode();
1029 Instruction::InsertElement, VecTy,
CostKind, Index);
1031 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
1033 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
1034 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
1037 if (OldCost < NewCost || !NewCost.
isValid())
1057 Scalar->setName(
I.getName() +
".scalar");
1061 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1062 ScalarInst->copyIRFlags(&
I);
1066 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1069 replaceValue(
I, *Insert);
1077 auto *BI = dyn_cast<BinaryOperator>(&
I);
1081 if (!BI || !
I.getType()->isIntegerTy(1))
1086 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1104 auto *Ext0 = cast<ExtractElementInst>(I0);
1105 auto *Ext1 = cast<ExtractElementInst>(I1);
1109 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1110 "Unknown ExtractElementInst");
1115 unsigned CmpOpcode =
1117 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1130 Ext0Cost + Ext1Cost + CmpCost * 2 +
1136 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1137 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1143 ShufMask[CheapIndex] = ExpensiveIndex;
1148 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1149 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1154 if (OldCost < NewCost || !NewCost.
isValid())
1164 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1165 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1168 replaceValue(
I, *NewExt);
1177 unsigned NumScanned = 0;
1187class ScalarizationResult {
1188 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1193 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1197 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1198 ~ScalarizationResult() {
1199 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1202 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1203 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1204 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1205 return {StatusTy::SafeWithFreeze, ToFreeze};
1209 bool isSafe()
const {
return Status == StatusTy::Safe; }
1211 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1214 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1219 Status = StatusTy::Unsafe;
1224 assert(isSafeWithFreeze() &&
1225 "should only be used when freezing is required");
1227 "UserI must be a user of ToFreeze");
1233 if (
U.get() == ToFreeze)
1250 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1252 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1253 if (
C->getValue().ult(NumElements))
1254 return ScalarizationResult::safe();
1255 return ScalarizationResult::unsafe();
1258 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1259 APInt Zero(IntWidth, 0);
1260 APInt MaxElts(IntWidth, NumElements);
1266 true, &AC, CtxI, &DT)))
1267 return ScalarizationResult::safe();
1268 return ScalarizationResult::unsafe();
1281 if (ValidIndices.
contains(IdxRange))
1282 return ScalarizationResult::safeWithFreeze(IdxBase);
1283 return ScalarizationResult::unsafe();
1293 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1295 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1307bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1308 auto *
SI = cast<StoreInst>(&
I);
1309 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1317 if (!
match(
SI->getValueOperand(),
1322 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1323 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1324 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1327 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1328 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1329 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1333 if (ScalarizableIdx.isUnsafe() ||
1338 if (ScalarizableIdx.isSafeWithFreeze())
1339 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1341 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1342 {ConstantInt::get(Idx->getType(), 0), Idx});
1349 replaceValue(
I, *NSI);
1358bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1363 auto *VecTy = cast<VectorType>(
I.getType());
1364 auto *LI = cast<LoadInst>(&
I);
1365 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1370 LI->getPointerAddressSpace(),
CostKind);
1374 unsigned NumInstChecked = 0;
1378 for (
auto &Pair : NeedFreeze)
1379 Pair.second.discard();
1386 auto *UI = dyn_cast<ExtractElementInst>(U);
1387 if (!UI || UI->getParent() != LI->getParent())
1394 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1401 LastCheckedInst = UI;
1405 if (ScalarIdx.isUnsafe())
1407 if (ScalarIdx.isSafeWithFreeze()) {
1409 ScalarIdx.discard();
1412 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1415 Index ?
Index->getZExtValue() : -1);
1422 if (ScalarizedCost >= OriginalCost)
1427 auto *EI = cast<ExtractElementInst>(U);
1431 auto It = NeedFreeze.
find(EI);
1432 if (It != NeedFreeze.
end())
1433 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1438 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1439 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1442 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1443 NewLoad->setAlignment(ScalarOpAlignment);
1445 replaceValue(*EI, *NewLoad);
1448 FailureGuard.release();
1455bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
1456 Type *Ty =
I.getType();
1461 if (
DL->isBigEndian())
1488 if (ShAmtX > ShAmtY) {
1496 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
1497 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
1499 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
1502 MaskTy->getNumElements() != ShAmtDiff ||
1503 MaskTy->getNumElements() > (
BitWidth / 2))
1512 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
1529 if (Ty != ConcatIntTy)
1535 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
1536 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1539 if (NewCost > OldCost)
1549 if (Ty != ConcatIntTy) {
1559 replaceValue(
I, *Result);
1565bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
1589 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1590 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
1591 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
1592 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
1593 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
1596 unsigned NumSrcElts = BinOpTy->getNumElements();
1600 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
1601 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
1606 for (
int M : OuterMask) {
1607 if (M < 0 || M >= (
int)NumSrcElts) {
1620 OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I) +
1622 CostKind, 0,
nullptr, {Op00, Op01},
1625 CostKind, 0,
nullptr, {Op10, Op11},
1630 CostKind, 0,
nullptr, {Op00, Op01}) +
1632 CostKind, 0,
nullptr, {Op10, Op11}) +
1635 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
1636 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1640 if (NewCost > OldCost)
1648 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
1649 NewInst->copyIRFlags(BinOp);
1653 replaceValue(
I, *NewBO);
1659bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1667 if (
LHS->getOpcode() !=
RHS->getOpcode())
1671 bool IsCommutative =
false;
1676 auto *BO = cast<BinaryOperator>(LHS);
1680 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
1684 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
1688 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1689 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
1690 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
1691 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
1694 unsigned NumSrcElts = BinOpTy->getNumElements();
1697 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
1700 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1701 if (M >= (
int)NumSrcElts)
1736 auto *ShuffleCmpTy =
1743 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1748 bool ReducedInstCount = (isa<Constant>(
X) && isa<Constant>(Z)) ||
1749 (isa<Constant>(
Y) && isa<Constant>(W));
1750 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
1757 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
1758 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
1761 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1762 NewInst->copyIRFlags(LHS);
1763 NewInst->andIRFlags(RHS);
1768 replaceValue(
I, *NewBO);
1774bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1780 auto *C0 = dyn_cast<CastInst>(V0);
1781 auto *C1 = dyn_cast<CastInst>(V1);
1786 if (C0->getSrcTy() != C1->getSrcTy())
1790 if (Opcode != C1->getOpcode()) {
1792 Opcode = Instruction::SExt;
1797 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1798 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1799 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1800 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1803 unsigned NumSrcElts = CastSrcTy->getNumElements();
1804 unsigned NumDstElts = CastDstTy->getNumElements();
1805 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1806 "Only bitcasts expected to alter src/dst element counts");
1810 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1811 (NumDstElts % NumSrcElts) != 0)
1815 if (NumSrcElts >= NumDstElts) {
1818 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1819 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1824 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1825 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1830 auto *NewShuffleDstTy =
1855 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1857 if (NewCost > OldCost)
1865 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1866 NewInst->copyIRFlags(C0);
1867 NewInst->andIRFlags(C1);
1871 replaceValue(
I, *Cast);
1881bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1883 Value *OuterV0, *OuterV1;
1889 Value *X0, *X1, *Y0, *Y1;
1894 if (!Match0 && !Match1)
1897 X0 = Match0 ? X0 : OuterV0;
1898 Y0 = Match0 ? Y0 : OuterV0;
1899 X1 = Match1 ? X1 : OuterV1;
1900 Y1 = Match1 ? Y1 : OuterV1;
1901 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1902 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(X0->
getType());
1903 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(OuterV0->
getType());
1904 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1908 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1909 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1915 Value *NewX =
nullptr, *NewY =
nullptr;
1916 for (
int &M : NewMask) {
1917 Value *Src =
nullptr;
1918 if (0 <= M && M < (
int)NumImmElts) {
1922 Src =
M >= (int)NumSrcElts ? Y0 : X0;
1923 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
1925 }
else if (M >= (
int)NumImmElts) {
1930 Src =
M >= (int)NumSrcElts ? Y1 : X1;
1931 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
1935 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
1936 if (isa<UndefValue>(Src)) {
1939 if (!isa<PoisonValue>(Src))
1944 if (!NewX || NewX == Src) {
1948 if (!NewY || NewY == Src) {
1964 replaceValue(
I, *NewX);
1979 0,
nullptr, {OuterV0, OuterV1}, &
I);
1983 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
1988 SK, ShuffleSrcTy, NewMask,
CostKind, 0,
nullptr, {NewX, NewY});
1990 NewCost += InnerCost0;
1992 NewCost += InnerCost1;
1995 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1997 if (NewCost > OldCost)
2001 replaceValue(
I, *Shuf);
2007bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
2014 auto *II0 = dyn_cast<IntrinsicInst>(V0);
2015 auto *II1 = dyn_cast<IntrinsicInst>(V1);
2020 if (IID != II1->getIntrinsicID())
2023 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2024 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
2025 if (!ShuffleDstTy || !II0Ty)
2031 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2033 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2044 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2046 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2048 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2050 VecTy->getNumElements() * 2));
2058 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2061 if (NewCost > OldCost)
2065 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2070 II1->getArgOperand(
I), OldMask);
2077 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2079 NewInst->andIRFlags(II1);
2082 replaceValue(
I, *NewIntrinsic);
2089 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2091 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2092 int M = SV->getMaskValue(Lane);
2095 if (
static_cast<unsigned>(M) < NumElts) {
2096 U = &SV->getOperandUse(0);
2099 U = &SV->getOperandUse(1);
2110 auto [U, Lane] = IL;
2123 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2124 unsigned NumElts = Ty->getNumElements();
2125 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2131 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2135 unsigned NumSlices = Item.
size() / NumElts;
2140 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2141 Use *SliceV = Item[Slice * NumElts].first;
2142 if (!SliceV || SliceV->get()->
getType() != Ty)
2144 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2145 auto [V, Lane] = Item[Slice * NumElts + Elt];
2146 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2159 auto [FrontU, FrontLane] = Item.
front();
2161 if (IdentityLeafs.
contains(FrontU)) {
2162 return FrontU->get();
2168 if (ConcatLeafs.
contains(FrontU)) {
2170 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2172 for (
unsigned S = 0; S < Values.
size(); ++S)
2173 Values[S] = Item[S * NumElts].first->get();
2175 while (Values.
size() > 1) {
2178 std::iota(Mask.begin(), Mask.end(), 0);
2180 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2188 auto *
I = cast<Instruction>(FrontU->get());
2189 auto *
II = dyn_cast<IntrinsicInst>(
I);
2190 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2192 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2199 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2204 for (
const auto &Lane : Item)
2210 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2216 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2217 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2221 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2226 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2237 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
2247bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
2248 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
2249 if (!Ty ||
I.use_empty())
2253 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
2259 unsigned NumVisited = 0;
2261 while (!Worklist.
empty()) {
2266 auto [FrontU, FrontLane] = Item.
front();
2274 return X->getType() ==
Y->getType() &&
2279 if (FrontLane == 0 &&
2280 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
2281 Ty->getNumElements() &&
2284 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
2285 E.value().second == (
int)E.index());
2287 IdentityLeafs.
insert(FrontU);
2291 if (
auto *
C = dyn_cast<Constant>(FrontU);
2292 C &&
C->getSplatValue() &&
2296 return !U || (isa<Constant>(
U->get()) &&
2297 cast<Constant>(
U->get())->getSplatValue() ==
2298 cast<Constant>(FrontV)->getSplatValue());
2300 SplatLeafs.
insert(FrontU);
2305 auto [FrontU, FrontLane] = Item.
front();
2306 auto [
U, Lane] = IL;
2307 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
2309 SplatLeafs.
insert(FrontU);
2315 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
2319 Value *
V = IL.first->get();
2320 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
2324 if (
auto *CI = dyn_cast<CmpInst>(V))
2325 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
2327 if (
auto *CI = dyn_cast<CastInst>(V))
2328 if (CI->getSrcTy()->getScalarType() !=
2329 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
2331 if (
auto *SI = dyn_cast<SelectInst>(V))
2332 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
2333 SI->getOperand(0)->getType() !=
2334 cast<SelectInst>(FrontV)->getOperand(0)->getType())
2336 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
2338 auto *
II = dyn_cast<IntrinsicInst>(V);
2339 return !
II || (isa<IntrinsicInst>(FrontV) &&
2340 II->getIntrinsicID() ==
2341 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
2342 !
II->hasOperandBundles());
2346 if (isa<BinaryOperator, CmpInst>(FrontU)) {
2348 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
2349 BO && BO->isIntDivRem())
2358 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
2360 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
2361 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
2362 if (DstTy && SrcTy &&
2363 SrcTy->getNumElements() == DstTy->getNumElements()) {
2367 }
else if (isa<SelectInst>(FrontU)) {
2372 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
2374 !
II->hasOperandBundles()) {
2375 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
2381 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
2382 cast<Instruction>(FrontV)->getOperand(
Op));
2394 ConcatLeafs.
insert(FrontU);
2401 if (NumVisited <= 1)
2404 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
2410 ConcatLeafs, Builder, &
TTI);
2411 replaceValue(
I, *V);
2418bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
2419 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2422 switch (
II->getIntrinsicID()) {
2423 case Intrinsic::vector_reduce_add:
2424 case Intrinsic::vector_reduce_mul:
2425 case Intrinsic::vector_reduce_and:
2426 case Intrinsic::vector_reduce_or:
2427 case Intrinsic::vector_reduce_xor:
2428 case Intrinsic::vector_reduce_smin:
2429 case Intrinsic::vector_reduce_smax:
2430 case Intrinsic::vector_reduce_umin:
2431 case Intrinsic::vector_reduce_umax:
2440 std::queue<Value *> Worklist;
2443 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2446 while (!Worklist.empty()) {
2447 Value *CV = Worklist.front();
2458 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2459 if (CI->isBinaryOp()) {
2460 for (
auto *
Op : CI->operand_values())
2463 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2464 if (Shuffle && Shuffle != SV)
2481 for (
auto *V : Visited)
2482 for (
auto *U :
V->users())
2483 if (!Visited.contains(U) && U != &
I)
2487 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2492 if (!ShuffleInputType)
2500 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2504 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2505 bool UsesSecondVec =
2506 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2509 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2515 VecTyForCost, ConcatMask,
CostKind);
2517 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2519 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2521 if (NewCost < OldCost) {
2525 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2526 replaceValue(*Shuffle, *NewShuffle);
2531 return foldSelectShuffle(*Shuffle,
true);
2538bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2539 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2543 bool TruncOnly =
false;
2546 case Intrinsic::vector_reduce_add:
2547 case Intrinsic::vector_reduce_mul:
2550 case Intrinsic::vector_reduce_and:
2551 case Intrinsic::vector_reduce_or:
2552 case Intrinsic::vector_reduce_xor:
2559 Value *ReductionSrc =
I.getOperand(0);
2569 auto *SrcTy = cast<VectorType>(Src->getType());
2570 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2571 Type *ResultTy =
I.getType();
2574 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2577 cast<CastInst>(ReductionSrc));
2584 if (OldCost <= NewCost || !NewCost.
isValid())
2588 II->getIntrinsicID(), {Src});
2590 replaceValue(
I, *NewCast);
2604bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2605 auto *SVI = cast<ShuffleVectorInst>(&
I);
2606 auto *VT = cast<FixedVectorType>(
I.getType());
2607 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2608 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2609 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2613 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2614 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2615 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2616 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2619 if (!
I ||
I->getOperand(0)->getType() != VT)
2622 return U != Op0 && U != Op1 &&
2623 !(isa<ShuffleVectorInst>(U) &&
2624 (InputShuffles.contains(cast<Instruction>(U)) ||
2625 isInstructionTriviallyDead(cast<Instruction>(U))));
2628 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2629 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2637 for (
auto *U :
I->users()) {
2638 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2639 if (!SV || SV->getType() != VT)
2641 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2642 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2649 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2653 if (FromReduction && Shuffles.
size() > 1)
2658 if (!FromReduction) {
2660 for (
auto *U : SV->users()) {
2663 Shuffles.push_back(SSV);
2675 int MaxV1Elt = 0, MaxV2Elt = 0;
2676 unsigned NumElts = VT->getNumElements();
2679 SVN->getShuffleMask(Mask);
2683 Value *SVOp0 = SVN->getOperand(0);
2684 Value *SVOp1 = SVN->getOperand(1);
2685 if (isa<UndefValue>(SVOp1)) {
2686 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2689 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2695 if (SVOp0 == Op1 && SVOp1 == Op0) {
2699 if (SVOp0 != Op0 || SVOp1 != Op1)
2706 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2709 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2710 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2711 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2712 return Mask[
I] ==
A.first;
2721 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2722 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2723 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2726 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2729 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2737 sort(ReconstructMask);
2738 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2745 if (V1.
empty() ||
V2.empty() ||
2746 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2747 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2754 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2757 if (isa<UndefValue>(SV->getOperand(1)))
2758 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2759 if (InputShuffles.contains(SSV))
2761 return SV->getMaskValue(M);
2769 std::pair<int, int>
Y) {
2770 int MXA = GetBaseMaskValue(
A,
X.first);
2771 int MYA = GetBaseMaskValue(
A,
Y.first);
2774 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2775 return SortBase(SVI0A,
A,
B);
2777 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2778 return SortBase(SVI1A,
A,
B);
2783 for (
const auto &Mask : OrigReconstructMasks) {
2785 for (
int M : Mask) {
2787 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2788 assert(It !=
V.end() &&
"Expected all entries in Mask");
2789 return std::distance(
V.begin(), It);
2793 else if (M <
static_cast<int>(NumElts)) {
2794 ReconstructMask.
push_back(FindIndex(V1, M));
2796 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2799 ReconstructMasks.push_back(std::move(ReconstructMask));
2805 for (
unsigned I = 0;
I < V1.
size();
I++) {
2806 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2807 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2809 for (
unsigned I = 0;
I <
V2.size();
I++) {
2810 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2811 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2813 while (V1A.
size() < NumElts) {
2817 while (V2A.
size() < NumElts) {
2823 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2829 VT, SV->getShuffleMask(),
CostKind);
2840 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2842 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2854 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2856 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2858 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2861 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2863 <<
" vs CostAfter: " << CostAfter <<
"\n");
2864 if (CostBefore <= CostAfter)
2869 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2872 if (isa<UndefValue>(SV->getOperand(1)))
2873 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2874 if (InputShuffles.contains(SSV))
2876 return SV->getOperand(
Op);
2880 GetShuffleOperand(SVI0A, 1), V1A);
2883 GetShuffleOperand(SVI0B, 1), V1B);
2886 GetShuffleOperand(SVI1A, 1), V2A);
2889 GetShuffleOperand(SVI1B, 1), V2B);
2893 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2894 I->copyIRFlags(Op0,
true);
2898 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2899 I->copyIRFlags(Op1,
true);
2901 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2904 replaceValue(*Shuffles[S], *NSV);
2907 Worklist.pushValue(NSV0A);
2908 Worklist.pushValue(NSV0B);
2909 Worklist.pushValue(NSV1A);
2910 Worklist.pushValue(NSV1B);
2911 for (
auto *S : Shuffles)
2923 Value *ZExted, *OtherOperand;
2929 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
2931 auto *BigTy = cast<FixedVectorType>(
I.getType());
2932 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
2933 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
2935 if (
I.getOpcode() == Instruction::LShr) {
2952 Instruction::ZExt, BigTy, SmallTy,
2953 TargetTransformInfo::CastContextHint::None,
CostKind);
2959 auto *UI = cast<Instruction>(U);
2965 ShrinkCost += ZExtCost;
2980 ShrinkCost += ZExtCost;
2985 if (!isa<Constant>(OtherOperand))
2987 Instruction::Trunc, SmallTy, BigTy,
2988 TargetTransformInfo::CastContextHint::None,
CostKind);
2993 if (ShrinkCost > CurrentCost)
2997 Value *Op0 = ZExted;
3000 if (
I.getOperand(0) == OtherOperand)
3004 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
3005 cast<Instruction>(NewBinOp)->copyMetadata(
I);
3007 replaceValue(
I, *NewZExtr);
3013bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
3014 Value *DstVec, *SrcVec;
3022 auto *VecTy = dyn_cast<FixedVectorType>(
I.getType());
3023 if (!VecTy || SrcVec->
getType() != VecTy)
3026 unsigned NumElts = VecTy->getNumElements();
3027 if (ExtIdx >= NumElts || InsIdx >= NumElts)
3033 if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3035 Mask[InsIdx] = ExtIdx;
3039 std::iota(
Mask.begin(),
Mask.end(), 0);
3040 Mask[InsIdx] = ExtIdx + NumElts;
3044 auto *
Ins = cast<InsertElementInst>(&
I);
3045 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
3053 nullptr, {DstVec, SrcVec});
3054 if (!
Ext->hasOneUse())
3057 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair : " <<
I
3058 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3061 if (OldCost < NewCost)
3065 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3071 replaceValue(
I, *Shuf);
3078bool VectorCombine::run() {
3088 bool MadeChange =
false;
3091 bool IsVectorType = isa<VectorType>(
I.getType());
3092 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
3093 auto Opcode =
I.getOpcode();
3101 if (IsFixedVectorType) {
3103 case Instruction::InsertElement:
3104 MadeChange |= vectorizeLoadInsert(
I);
3106 case Instruction::ShuffleVector:
3107 MadeChange |= widenSubvectorLoad(
I);
3117 MadeChange |= scalarizeBinopOrCmp(
I);
3118 MadeChange |= scalarizeLoadExtract(
I);
3119 MadeChange |= scalarizeVPIntrinsic(
I);
3122 if (Opcode == Instruction::Store)
3123 MadeChange |= foldSingleElementStore(
I);
3126 if (TryEarlyFoldsOnly)
3133 if (IsFixedVectorType) {
3135 case Instruction::InsertElement:
3136 MadeChange |= foldInsExtFNeg(
I);
3137 MadeChange |= foldInsExtVectorToShuffle(
I);
3139 case Instruction::ShuffleVector:
3140 MadeChange |= foldPermuteOfBinops(
I);
3141 MadeChange |= foldShuffleOfBinops(
I);
3142 MadeChange |= foldShuffleOfCastops(
I);
3143 MadeChange |= foldShuffleOfShuffles(
I);
3144 MadeChange |= foldShuffleOfIntrinsics(
I);
3145 MadeChange |= foldSelectShuffle(
I);
3146 MadeChange |= foldShuffleToIdentity(
I);
3148 case Instruction::BitCast:
3149 MadeChange |= foldBitcastShuffle(
I);
3152 MadeChange |= shrinkType(
I);
3157 case Instruction::Call:
3158 MadeChange |= foldShuffleFromReductions(
I);
3159 MadeChange |= foldCastFromReductions(
I);
3161 case Instruction::ICmp:
3162 case Instruction::FCmp:
3163 MadeChange |= foldExtractExtract(
I);
3165 case Instruction::Or:
3166 MadeChange |= foldConcatOfBoolMasks(
I);
3170 MadeChange |= foldExtractExtract(
I);
3171 MadeChange |= foldExtractedCmps(
I);
3184 if (
I.isDebugOrPseudoInst())
3190 while (!Worklist.isEmpty()) {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder, const TargetTransformInfo *TTI)
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static constexpr int Concat[]
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.