36#define DEBUG_TYPE "vector-combine"
42STATISTIC(NumVecLoad,
"Number of vector loads formed");
43STATISTIC(NumVecCmp,
"Number of vector compares formed");
44STATISTIC(NumVecBO,
"Number of vector binops formed");
45STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
46STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
47STATISTIC(NumScalarBO,
"Number of scalar binops formed");
48STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
52 cl::desc(
"Disable all vector combine transforms"));
56 cl::desc(
"Disable binop extract to shuffle transforms"));
60 cl::desc(
"Max number of instructions to scan for vector combining."));
62static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
70 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
71 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
86 bool TryEarlyFoldsOnly;
97 unsigned PreferredExtractIndex)
const;
101 unsigned PreferredExtractIndex);
120 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
124 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
144 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
145 V = BitCast->getOperand(0);
153 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
154 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
160 Type *ScalarTy = Load->getType()->getScalarType();
163 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
170bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
184 auto *
Load = dyn_cast<LoadInst>(
X);
196 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
197 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
199 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
200 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
201 unsigned OffsetEltIndex = 0;
209 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
220 uint64_t ScalarSizeInBytes = ScalarSize / 8;
221 if (
Offset.urem(ScalarSizeInBytes) != 0)
225 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
226 if (OffsetEltIndex >= MinVecNumElts)
243 unsigned AS =
Load->getPointerAddressSpace();
262 auto *Ty = cast<FixedVectorType>(
I.getType());
263 unsigned OutputNumElts = Ty->getNumElements();
265 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
266 Mask[0] = OffsetEltIndex;
272 if (OldCost < NewCost || !NewCost.
isValid())
283 replaceValue(
I, *VecLd);
293 auto *Shuf = cast<ShuffleVectorInst>(&
I);
294 if (!Shuf->isIdentityWithPadding())
299 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
300 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
301 return M >= (int)(NumOpElts);
304 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
311 auto *Ty = cast<FixedVectorType>(
I.getType());
312 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
313 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
320 unsigned AS =
Load->getPointerAddressSpace();
335 if (OldCost < NewCost || !NewCost.
isValid())
342 replaceValue(
I, *VecLd);
354 assert(Index0C && Index1C &&
"Expected constant extract indexes");
356 unsigned Index0 = Index0C->getZExtValue();
357 unsigned Index1 = Index1C->getZExtValue();
360 if (Index0 == Index1)
385 if (PreferredExtractIndex == Index0)
387 if (PreferredExtractIndex == Index1)
391 return Index0 > Index1 ? Ext0 : Ext1;
403 unsigned PreferredExtractIndex) {
404 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
405 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
406 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
408 unsigned Opcode =
I.getOpcode();
419 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
420 "Expected a compare");
430 unsigned Ext0Index = Ext0IndexC->getZExtValue();
431 unsigned Ext1Index = Ext1IndexC->getZExtValue();
446 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
456 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
458 OldCost = CheapExtractCost + ScalarOpCost;
459 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
463 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
464 NewCost = VectorOpCost + CheapExtractCost +
469 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
470 if (ConvertToShuffle) {
488 return OldCost < NewCost;
498 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
500 ShufMask[NewIndex] = OldIndex;
519 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
520 if (isa<Constant>(
X))
533 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
536 "Expected matching constant extract indexes");
544 replaceValue(
I, *NewExt);
552 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
555 "Expected matching constant extract indexes");
565 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
566 VecBOInst->copyIRFlags(&
I);
569 replaceValue(
I, *NewExt);
597 auto *Ext0 = cast<ExtractElementInst>(I0);
598 auto *Ext1 = cast<ExtractElementInst>(I1);
605 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
608 if (ExtractToChange) {
609 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
614 if (ExtractToChange == Ext0)
621 foldExtExtCmp(Ext0, Ext1,
I);
623 foldExtExtBinop(Ext0, Ext1,
I);
650 auto *VecTy = cast<FixedVectorType>(
I.getType());
651 if (SrcVec->
getType() != VecTy)
655 unsigned NumElts = VecTy->getNumElements();
656 if (
Index >= NumElts)
663 std::iota(
Mask.begin(),
Mask.end(), 0);
682 if (NewCost > OldCost)
689 replaceValue(
I, *Shuf);
708 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
709 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
710 if (!DestTy || !SrcTy)
713 unsigned DestEltSize = DestTy->getScalarSizeInBits();
714 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
715 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
718 bool IsUnary = isa<UndefValue>(V1);
725 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
726 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
731 if (DestEltSize <= SrcEltSize) {
734 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
735 unsigned ScaleFactor = SrcEltSize / DestEltSize;
740 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
741 unsigned ScaleFactor = DestEltSize / SrcEltSize;
748 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
753 unsigned NumOps = IsUnary ? 1 : 2;
765 TargetTransformInfo::CastContextHint::None,
770 TargetTransformInfo::CastContextHint::None, CK);
771 if (DestCost > SrcCost || !DestCost.
isValid())
779 replaceValue(
I, *Shuf);
786bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
787 if (!isa<VPIntrinsic>(
I))
800 if (!ScalarOp0 || !ScalarOp1)
808 auto IsAllTrueMask = [](
Value *MaskVal) {
810 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
811 return ConstValue->isAllOnesValue();
827 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
828 Mask.resize(FVTy->getNumElements(), 0);
836 Args.push_back(
V->getType());
842 std::optional<unsigned> FunctionalOpcode =
844 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
845 if (!FunctionalOpcode) {
869 <<
", Cost of scalarizing:" << NewCost <<
"\n");
872 if (OldCost < NewCost || !NewCost.
isValid())
883 bool SafeToSpeculate;
886 .
hasFnAttr(Attribute::AttrKind::Speculatable);
889 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
890 if (!SafeToSpeculate &&
897 {ScalarOp0, ScalarOp1})
899 ScalarOp0, ScalarOp1);
907bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
918 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
920 for (
User *U :
I.users())
930 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
931 Value *V0 =
nullptr, *V1 =
nullptr;
944 if (IsConst0 && IsConst1)
946 if (!IsConst0 && !IsConst1 && Index0 != Index1)
951 auto *I0 = dyn_cast_or_null<Instruction>(V0);
952 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
953 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
959 Type *VecTy =
I.getType();
964 "Unexpected types for insert element into binop or cmp");
966 unsigned Opcode =
I.getOpcode();
985 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
987 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
988 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
991 if (OldCost < NewCost || !NewCost.
isValid())
1011 Scalar->setName(
I.getName() +
".scalar");
1015 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1016 ScalarInst->copyIRFlags(&
I);
1020 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1023 replaceValue(
I, *Insert);
1033 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
1039 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1057 auto *Ext0 = cast<ExtractElementInst>(I0);
1058 auto *Ext1 = cast<ExtractElementInst>(I1);
1067 : Instruction::ICmp;
1068 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1085 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1086 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1091 ShufMask[CheapIndex] = ExpensiveIndex;
1100 if (OldCost < NewCost || !NewCost.
isValid())
1114 replaceValue(
I, *NewExt);
1123 unsigned NumScanned = 0;
1133class ScalarizationResult {
1134 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1139 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1143 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1144 ~ScalarizationResult() {
1145 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1148 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1149 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1150 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1151 return {StatusTy::SafeWithFreeze, ToFreeze};
1155 bool isSafe()
const {
return Status == StatusTy::Safe; }
1157 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1160 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1165 Status = StatusTy::Unsafe;
1170 assert(isSafeWithFreeze() &&
1171 "should only be used when freezing is required");
1173 "UserI must be a user of ToFreeze");
1179 if (
U.get() == ToFreeze)
1196 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1198 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1199 if (
C->getValue().ult(NumElements))
1200 return ScalarizationResult::safe();
1201 return ScalarizationResult::unsafe();
1204 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1205 APInt Zero(IntWidth, 0);
1206 APInt MaxElts(IntWidth, NumElements);
1212 true, &AC, CtxI, &DT)))
1213 return ScalarizationResult::safe();
1214 return ScalarizationResult::unsafe();
1227 if (ValidIndices.
contains(IdxRange))
1228 return ScalarizationResult::safeWithFreeze(IdxBase);
1229 return ScalarizationResult::unsafe();
1239 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1241 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1253bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1254 auto *
SI = cast<StoreInst>(&
I);
1255 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1263 if (!
match(
SI->getValueOperand(),
1268 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1269 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1270 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1273 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1274 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1275 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1279 if (ScalarizableIdx.isUnsafe() ||
1284 if (ScalarizableIdx.isSafeWithFreeze())
1285 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1287 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1288 {ConstantInt::get(Idx->getType(), 0), Idx});
1295 replaceValue(
I, *NSI);
1304bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1309 auto *VecTy = cast<VectorType>(
I.getType());
1310 auto *LI = cast<LoadInst>(&
I);
1311 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1316 LI->getPointerAddressSpace());
1320 unsigned NumInstChecked = 0;
1324 for (
auto &Pair : NeedFreeze)
1325 Pair.second.discard();
1332 auto *UI = dyn_cast<ExtractElementInst>(U);
1333 if (!UI || UI->getParent() != LI->getParent())
1340 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1347 LastCheckedInst = UI;
1351 if (ScalarIdx.isUnsafe())
1353 if (ScalarIdx.isSafeWithFreeze()) {
1355 ScalarIdx.discard();
1358 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1365 Align(1), LI->getPointerAddressSpace());
1369 if (ScalarizedCost >= OriginalCost)
1374 auto *EI = cast<ExtractElementInst>(U);
1378 auto It = NeedFreeze.
find(EI);
1379 if (It != NeedFreeze.
end())
1380 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1385 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1386 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1389 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1390 NewLoad->setAlignment(ScalarOpAlignment);
1392 replaceValue(*EI, *NewLoad);
1395 FailureGuard.release();
1400bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1417 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1418 auto *BinOpTy = dyn_cast<FixedVectorType>(B0->
getType());
1419 if (!ShuffleDstTy || !BinOpTy)
1422 unsigned NumSrcElts = BinOpTy->getNumElements();
1427 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W &&
1431 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1432 if (M >= (
int)NumSrcElts)
1459 OldMask,
CostKind, 0,
nullptr, {B0, B1}, &
I);
1467 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1469 if (NewCost >= OldCost)
1477 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1478 NewInst->copyIRFlags(B0);
1479 NewInst->andIRFlags(B1);
1484 replaceValue(
I, *NewBO);
1490bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1497 auto *C0 = dyn_cast<CastInst>(V0);
1498 auto *C1 = dyn_cast<CastInst>(V1);
1503 if (C0->getSrcTy() != C1->getSrcTy())
1507 if (Opcode != C1->getOpcode()) {
1509 Opcode = Instruction::SExt;
1514 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1515 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1516 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1517 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1520 unsigned NumSrcElts = CastSrcTy->getNumElements();
1521 unsigned NumDstElts = CastDstTy->getNumElements();
1522 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1523 "Only bitcasts expected to alter src/dst element counts");
1527 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1528 (NumDstElts % NumSrcElts) != 0)
1532 if (NumSrcElts >= NumDstElts) {
1535 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1536 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1541 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1542 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1547 auto *NewShuffleDstTy =
1560 OldMask,
CostKind, 0,
nullptr, std::nullopt, &
I);
1568 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1570 if (NewCost > OldCost)
1578 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1579 NewInst->copyIRFlags(C0);
1580 NewInst->andIRFlags(C1);
1584 replaceValue(
I, *Cast);
1590bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1601 auto *ShufI0 = dyn_cast<Instruction>(
I.getOperand(0));
1602 auto *ShufI1 = dyn_cast<Instruction>(
I.getOperand(1));
1603 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1604 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1605 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(
I.getOperand(0)->getType());
1606 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1610 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1611 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1614 if ((!isa<PoisonValue>(U0) &&
1615 any_of(InnerMask0, [&](
int M) {
return M >= (int)NumSrcElts; })) ||
1616 (!isa<PoisonValue>(U1) &&
1617 any_of(InnerMask1, [&](
int M) {
return M >= (int)NumSrcElts; })))
1622 for (
int &M : NewMask) {
1623 if (0 <= M && M < (
int)NumImmElts) {
1625 }
else if (M >= (
int)NumImmElts) {
1626 if (InnerMask1[M - NumImmElts] >= (
int)NumSrcElts)
1629 M = InnerMask1[
M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
1635 replaceValue(
I, *V0);
1644 InnerMask0,
CostKind, 0,
nullptr, {V0, U0}, ShufI0) +
1646 InnerMask1,
CostKind, 0,
nullptr, {V1, U1}, ShufI1) +
1648 OuterMask,
CostKind, 0,
nullptr, {ShufI0, ShufI1}, &
I);
1652 NewMask,
CostKind, 0,
nullptr, {V0, V1});
1655 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1657 if (NewCost > OldCost)
1661 if (
none_of(NewMask, [&](
int M) {
return 0 <=
M &&
M < (int)NumSrcElts; }))
1663 if (
none_of(NewMask, [&](
int M) {
return (
int)NumSrcElts <= M; }))
1667 replaceValue(
I, *Shuf);
1674bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
1675 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
1676 if (!Ty || !isa<Instruction>(
I.getOperand(0)) ||
1677 !isa<Instruction>(
I.getOperand(1)))
1680 using InstLane = std::pair<Value *, int>;
1682 auto LookThroughShuffles = [](
Value *
V,
int Lane) -> InstLane {
1683 while (
auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1685 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1686 int M = SV->getMaskValue(Lane);
1689 else if (M < (
int)NumElts) {
1690 V = SV->getOperand(0);
1693 V = SV->getOperand(1);
1697 return InstLane{
V, Lane};
1700 auto GenerateInstLaneVectorFromOperand =
1703 for (InstLane V : Item) {
1707 : LookThroughShuffles(
1708 cast<Instruction>(
V.first)->getOperand(
Op),
V.second));
1714 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
1715 Start[
M] = LookThroughShuffles(&
I, M);
1720 unsigned NumVisited = 0;
1722 while (!Worklist.
empty()) {
1732 if (Item[0].second == 0 &&
1733 cast<FixedVectorType>(Item[0].first->getType())->getNumElements() ==
1734 Ty->getNumElements() &&
1736 return !E.value().first || (E.value().first == Item[0].first &&
1737 E.value().second == (
int)E.index());
1739 IdentityLeafs.
insert(Item[0].first);
1745 (IL.first == Item[0].first && IL.second == Item[0].second);
1747 SplatLeafs.
insert(Item[0].first);
1756 if (
auto *
I = dyn_cast<Instruction>(IL.first);
I && !
I->hasOneUse())
1758 if (IL.first->getValueID() != Item[0].first->getValueID())
1760 if (isa<CallInst>(IL.first) && !isa<IntrinsicInst>(IL.first))
1762 auto *II = dyn_cast<IntrinsicInst>(IL.first);
1764 (isa<IntrinsicInst>(Item[0].first) &&
1765 II->getIntrinsicID() ==
1766 cast<IntrinsicInst>(Item[0].first)->getIntrinsicID());
1772 if (isa<BinaryOperator>(Item[0].first) &&
1773 !cast<BinaryOperator>(Item[0].first)->isIntDivRem()) {
1774 Worklist.
push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1775 Worklist.
push_back(GenerateInstLaneVectorFromOperand(Item, 1));
1776 }
else if (isa<UnaryOperator>(Item[0].first)) {
1777 Worklist.
push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1778 }
else if (
auto *II = dyn_cast<IntrinsicInst>(Item[0].first);
1784 (cast<Instruction>(IL.first)->getOperand(
Op) ==
1785 cast<Instruction>(Item[0].first)->getOperand(
Op));
1790 Worklist.
push_back(GenerateInstLaneVectorFromOperand(Item,
Op));
1801 if (IdentityLeafs.
contains(Item[0].first) &&
1803 return !E.value().first || (E.value().first == Item[0].first &&
1804 E.value().second == (
int)E.index());
1806 return Item[0].first;
1808 if (SplatLeafs.
contains(Item[0].first)) {
1809 if (
auto ILI = dyn_cast<Instruction>(Item[0].first))
1811 else if (isa<Argument>(Item[0].first))
1817 auto *
I = cast<Instruction>(Item[0].first);
1818 auto *II = dyn_cast<IntrinsicInst>(
I);
1819 unsigned NumOps =
I->getNumOperands() - (II ? 1 : 0);
1821 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
1823 Ops[
Idx] = II->getOperand(
Idx);
1826 Ops[
Idx] = Generate(GenerateInstLaneVectorFromOperand(Item,
Idx));
1830 Ty->getNumElements());
1831 if (
auto BI = dyn_cast<BinaryOperator>(
I))
1836 assert(isa<UnaryInstruction>(
I) &&
1837 "Unexpected instruction type in Generate");
1841 Value *
V = Generate(Start);
1842 replaceValue(
I, *V);
1849bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1850 auto *II = dyn_cast<IntrinsicInst>(&
I);
1853 switch (II->getIntrinsicID()) {
1854 case Intrinsic::vector_reduce_add:
1855 case Intrinsic::vector_reduce_mul:
1856 case Intrinsic::vector_reduce_and:
1857 case Intrinsic::vector_reduce_or:
1858 case Intrinsic::vector_reduce_xor:
1859 case Intrinsic::vector_reduce_smin:
1860 case Intrinsic::vector_reduce_smax:
1861 case Intrinsic::vector_reduce_umin:
1862 case Intrinsic::vector_reduce_umax:
1871 std::queue<Value *> Worklist;
1874 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
1877 while (!Worklist.empty()) {
1878 Value *CV = Worklist.front();
1889 if (
auto *CI = dyn_cast<Instruction>(CV)) {
1890 if (CI->isBinaryOp()) {
1891 for (
auto *
Op : CI->operand_values())
1894 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1895 if (Shuffle && Shuffle != SV)
1912 for (
auto *V : Visited)
1913 for (
auto *U :
V->users())
1914 if (!Visited.contains(U) && U != &
I)
1918 dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1923 if (!ShuffleInputType)
1931 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
1935 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
1936 bool UsesSecondVec =
1937 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
1940 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
1946 VecTyForCost, ConcatMask);
1948 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
1950 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1952 if (NewCost < OldCost) {
1956 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
1957 replaceValue(*Shuffle, *NewShuffle);
1962 return foldSelectShuffle(*Shuffle,
true);
1967bool VectorCombine::foldTruncFromReductions(
Instruction &
I) {
1968 auto *II = dyn_cast<IntrinsicInst>(&
I);
1974 case Intrinsic::vector_reduce_add:
1975 case Intrinsic::vector_reduce_mul:
1976 case Intrinsic::vector_reduce_and:
1977 case Intrinsic::vector_reduce_or:
1978 case Intrinsic::vector_reduce_xor:
1985 Value *ReductionSrc =
I.getOperand(0);
1991 auto *TruncSrcTy = cast<VectorType>(TruncSrc->
getType());
1992 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
1993 Type *ResultTy =
I.getType();
1997 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
1998 if (
auto *Trunc = dyn_cast<CastInst>(ReductionSrc))
2006 ReductionSrcTy->getScalarType(),
2009 if (OldCost <= NewCost || !NewCost.
isValid())
2013 TruncSrcTy->getScalarType(), II->getIntrinsicID(), {TruncSrc});
2015 replaceValue(
I, *NewTruncation);
2029bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2030 auto *SVI = cast<ShuffleVectorInst>(&
I);
2031 auto *VT = cast<FixedVectorType>(
I.getType());
2032 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2033 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2034 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2038 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2039 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2040 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2041 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2044 if (!
I ||
I->getOperand(0)->getType() != VT)
2047 return U != Op0 && U != Op1 &&
2048 !(isa<ShuffleVectorInst>(U) &&
2049 (InputShuffles.contains(cast<Instruction>(U)) ||
2050 isInstructionTriviallyDead(cast<Instruction>(U))));
2053 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2054 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2062 for (
auto *U :
I->users()) {
2063 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2064 if (!SV || SV->getType() != VT)
2066 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2067 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2074 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2078 if (FromReduction && Shuffles.
size() > 1)
2083 if (!FromReduction) {
2085 for (
auto *U : SV->users()) {
2088 Shuffles.push_back(SSV);
2100 int MaxV1Elt = 0, MaxV2Elt = 0;
2101 unsigned NumElts = VT->getNumElements();
2104 SVN->getShuffleMask(Mask);
2108 Value *SVOp0 = SVN->getOperand(0);
2109 Value *SVOp1 = SVN->getOperand(1);
2110 if (isa<UndefValue>(SVOp1)) {
2111 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2114 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2120 if (SVOp0 == Op1 && SVOp1 == Op0) {
2124 if (SVOp0 != Op0 || SVOp1 != Op1)
2131 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2134 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2135 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2136 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2137 return Mask[
I] ==
A.first;
2146 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2147 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2148 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2151 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2154 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2162 sort(ReconstructMask);
2163 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2170 if (V1.
empty() ||
V2.empty() ||
2171 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2172 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2179 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2182 if (isa<UndefValue>(SV->getOperand(1)))
2183 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2184 if (InputShuffles.contains(SSV))
2186 return SV->getMaskValue(M);
2194 std::pair<int, int>
Y) {
2195 int MXA = GetBaseMaskValue(
A,
X.first);
2196 int MYA = GetBaseMaskValue(
A,
Y.first);
2199 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2200 return SortBase(SVI0A,
A,
B);
2202 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2203 return SortBase(SVI1A,
A,
B);
2208 for (
const auto &Mask : OrigReconstructMasks) {
2210 for (
int M : Mask) {
2212 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2213 assert(It !=
V.end() &&
"Expected all entries in Mask");
2214 return std::distance(
V.begin(), It);
2218 else if (M <
static_cast<int>(NumElts)) {
2219 ReconstructMask.
push_back(FindIndex(V1, M));
2221 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2224 ReconstructMasks.push_back(std::move(ReconstructMask));
2230 for (
unsigned I = 0;
I < V1.
size();
I++) {
2231 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2232 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2234 for (
unsigned I = 0;
I <
V2.size();
I++) {
2235 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2236 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2238 while (V1A.
size() < NumElts) {
2242 while (V2A.
size() < NumElts) {
2248 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2254 VT, SV->getShuffleMask());
2265 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2267 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2279 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2281 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2283 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2286 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2288 <<
" vs CostAfter: " << CostAfter <<
"\n");
2289 if (CostBefore <= CostAfter)
2294 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2297 if (isa<UndefValue>(SV->getOperand(1)))
2298 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2299 if (InputShuffles.contains(SSV))
2301 return SV->getOperand(
Op);
2305 GetShuffleOperand(SVI0A, 1), V1A);
2308 GetShuffleOperand(SVI0B, 1), V1B);
2311 GetShuffleOperand(SVI1A, 1), V2A);
2314 GetShuffleOperand(SVI1B, 1), V2B);
2318 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2319 I->copyIRFlags(Op0,
true);
2323 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2324 I->copyIRFlags(Op1,
true);
2326 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2329 replaceValue(*Shuffles[S], *NSV);
2332 Worklist.pushValue(NSV0A);
2333 Worklist.pushValue(NSV0B);
2334 Worklist.pushValue(NSV1A);
2335 Worklist.pushValue(NSV1B);
2336 for (
auto *S : Shuffles)
2343bool VectorCombine::run() {
2351 bool MadeChange =
false;
2354 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
2355 auto Opcode =
I.getOpcode();
2361 if (IsFixedVectorType) {
2363 case Instruction::InsertElement:
2364 MadeChange |= vectorizeLoadInsert(
I);
2366 case Instruction::ShuffleVector:
2367 MadeChange |= widenSubvectorLoad(
I);
2376 if (isa<VectorType>(
I.getType())) {
2377 MadeChange |= scalarizeBinopOrCmp(
I);
2378 MadeChange |= scalarizeLoadExtract(
I);
2379 MadeChange |= scalarizeVPIntrinsic(
I);
2382 if (Opcode == Instruction::Store)
2383 MadeChange |= foldSingleElementStore(
I);
2386 if (TryEarlyFoldsOnly)
2393 if (IsFixedVectorType) {
2395 case Instruction::InsertElement:
2396 MadeChange |= foldInsExtFNeg(
I);
2398 case Instruction::ShuffleVector:
2399 MadeChange |= foldShuffleOfBinops(
I);
2400 MadeChange |= foldShuffleOfCastops(
I);
2401 MadeChange |= foldShuffleOfShuffles(
I);
2402 MadeChange |= foldSelectShuffle(
I);
2403 MadeChange |= foldShuffleToIdentity(
I);
2405 case Instruction::BitCast:
2406 MadeChange |= foldBitcastShuffle(
I);
2411 case Instruction::Call:
2412 MadeChange |= foldShuffleFromReductions(
I);
2413 MadeChange |= foldTruncFromReductions(
I);
2415 case Instruction::ICmp:
2416 case Instruction::FCmp:
2417 MadeChange |= foldExtractExtract(
I);
2421 MadeChange |= foldExtractExtract(
I);
2422 MadeChange |= foldExtractedCmps(
I);
2435 if (
I.isDebugOrPseudoInst())
2441 while (!Worklist.isEmpty()) {
2464 VectorCombine
Combiner(
F,
TTI, DT, AA, AC,
DL, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetInsertPointPastAllocas(Function *F)
This specifies that created instructions should inserted at the beginning end of the specified functi...
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
'undef' values are things that do not have specified contents.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< UndefValue > m_UndefValue()
Match an arbitrary UndefValue constant.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.