37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
72 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
87 bool TryEarlyFoldsOnly;
98 unsigned PreferredExtractIndex)
const;
102 unsigned PreferredExtractIndex);
121 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
125 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
145 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
146 V = BitCast->getOperand(0);
154 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
155 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
161 Type *ScalarTy = Load->getType()->getScalarType();
164 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
171bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
185 auto *
Load = dyn_cast<LoadInst>(
X);
197 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
198 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
200 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
201 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
202 unsigned OffsetEltIndex = 0;
210 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
221 uint64_t ScalarSizeInBytes = ScalarSize / 8;
222 if (
Offset.urem(ScalarSizeInBytes) != 0)
226 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
227 if (OffsetEltIndex >= MinVecNumElts)
244 unsigned AS =
Load->getPointerAddressSpace();
263 auto *Ty = cast<FixedVectorType>(
I.getType());
264 unsigned OutputNumElts = Ty->getNumElements();
266 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
267 Mask[0] = OffsetEltIndex;
273 if (OldCost < NewCost || !NewCost.
isValid())
284 replaceValue(
I, *VecLd);
294 auto *Shuf = cast<ShuffleVectorInst>(&
I);
295 if (!Shuf->isIdentityWithPadding())
300 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
301 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
302 return M >= (int)(NumOpElts);
305 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
312 auto *Ty = cast<FixedVectorType>(
I.getType());
313 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
314 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
321 unsigned AS =
Load->getPointerAddressSpace();
336 if (OldCost < NewCost || !NewCost.
isValid())
343 replaceValue(
I, *VecLd);
355 assert(Index0C && Index1C &&
"Expected constant extract indexes");
357 unsigned Index0 = Index0C->getZExtValue();
358 unsigned Index1 = Index1C->getZExtValue();
361 if (Index0 == Index1)
386 if (PreferredExtractIndex == Index0)
388 if (PreferredExtractIndex == Index1)
392 return Index0 > Index1 ? Ext0 : Ext1;
404 unsigned PreferredExtractIndex) {
405 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
406 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
407 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
409 unsigned Opcode =
I.getOpcode();
420 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
421 "Expected a compare");
431 unsigned Ext0Index = Ext0IndexC->getZExtValue();
432 unsigned Ext1Index = Ext1IndexC->getZExtValue();
447 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
457 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
459 OldCost = CheapExtractCost + ScalarOpCost;
460 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
464 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
465 NewCost = VectorOpCost + CheapExtractCost +
470 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
471 if (ConvertToShuffle) {
489 return OldCost < NewCost;
499 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
501 ShufMask[NewIndex] = OldIndex;
520 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
521 if (isa<Constant>(
X))
534 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
537 "Expected matching constant extract indexes");
545 replaceValue(
I, *NewExt);
553 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
556 "Expected matching constant extract indexes");
566 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
567 VecBOInst->copyIRFlags(&
I);
570 replaceValue(
I, *NewExt);
598 auto *Ext0 = cast<ExtractElementInst>(I0);
599 auto *Ext1 = cast<ExtractElementInst>(I1);
606 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
609 if (ExtractToChange) {
610 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
615 if (ExtractToChange == Ext0)
622 foldExtExtCmp(Ext0, Ext1,
I);
624 foldExtExtBinop(Ext0, Ext1,
I);
651 auto *VecTy = cast<FixedVectorType>(
I.getType());
652 if (SrcVec->
getType() != VecTy)
656 unsigned NumElts = VecTy->getNumElements();
657 if (
Index >= NumElts)
664 std::iota(
Mask.begin(),
Mask.end(), 0);
683 if (NewCost > OldCost)
690 replaceValue(
I, *Shuf);
709 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
710 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
711 if (!DestTy || !SrcTy)
714 unsigned DestEltSize = DestTy->getScalarSizeInBits();
715 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
716 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
719 bool IsUnary = isa<UndefValue>(V1);
726 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
727 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
732 if (DestEltSize <= SrcEltSize) {
735 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
736 unsigned ScaleFactor = SrcEltSize / DestEltSize;
741 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
742 unsigned ScaleFactor = DestEltSize / SrcEltSize;
749 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
754 unsigned NumOps = IsUnary ? 1 : 2;
766 TargetTransformInfo::CastContextHint::None,
771 TargetTransformInfo::CastContextHint::None, CK);
772 if (DestCost > SrcCost || !DestCost.
isValid())
780 replaceValue(
I, *Shuf);
787bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
788 if (!isa<VPIntrinsic>(
I))
801 if (!ScalarOp0 || !ScalarOp1)
809 auto IsAllTrueMask = [](
Value *MaskVal) {
811 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
812 return ConstValue->isAllOnesValue();
828 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
829 Mask.resize(FVTy->getNumElements(), 0);
837 Args.push_back(
V->getType());
843 std::optional<unsigned> FunctionalOpcode =
845 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
846 if (!FunctionalOpcode) {
870 <<
", Cost of scalarizing:" << NewCost <<
"\n");
873 if (OldCost < NewCost || !NewCost.
isValid())
884 bool SafeToSpeculate;
887 .
hasFnAttr(Attribute::AttrKind::Speculatable);
890 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
891 if (!SafeToSpeculate &&
898 {ScalarOp0, ScalarOp1})
900 ScalarOp0, ScalarOp1);
908bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
919 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
921 for (
User *U :
I.users())
931 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
932 Value *V0 =
nullptr, *V1 =
nullptr;
945 if (IsConst0 && IsConst1)
947 if (!IsConst0 && !IsConst1 && Index0 != Index1)
952 auto *I0 = dyn_cast_or_null<Instruction>(V0);
953 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
954 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
960 Type *VecTy =
I.getType();
965 "Unexpected types for insert element into binop or cmp");
967 unsigned Opcode =
I.getOpcode();
986 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
988 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
989 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
992 if (OldCost < NewCost || !NewCost.
isValid())
1012 Scalar->setName(
I.getName() +
".scalar");
1016 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1017 ScalarInst->copyIRFlags(&
I);
1021 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1024 replaceValue(
I, *Insert);
1034 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
1040 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1058 auto *Ext0 = cast<ExtractElementInst>(I0);
1059 auto *Ext1 = cast<ExtractElementInst>(I1);
1068 : Instruction::ICmp;
1069 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1086 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1087 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1092 ShufMask[CheapIndex] = ExpensiveIndex;
1101 if (OldCost < NewCost || !NewCost.
isValid())
1115 replaceValue(
I, *NewExt);
1124 unsigned NumScanned = 0;
1134class ScalarizationResult {
1135 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1140 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1144 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1145 ~ScalarizationResult() {
1146 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1149 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1150 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1151 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1152 return {StatusTy::SafeWithFreeze, ToFreeze};
1156 bool isSafe()
const {
return Status == StatusTy::Safe; }
1158 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1161 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1166 Status = StatusTy::Unsafe;
1171 assert(isSafeWithFreeze() &&
1172 "should only be used when freezing is required");
1174 "UserI must be a user of ToFreeze");
1180 if (
U.get() == ToFreeze)
1197 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1199 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1200 if (
C->getValue().ult(NumElements))
1201 return ScalarizationResult::safe();
1202 return ScalarizationResult::unsafe();
1205 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1206 APInt Zero(IntWidth, 0);
1207 APInt MaxElts(IntWidth, NumElements);
1213 true, &AC, CtxI, &DT)))
1214 return ScalarizationResult::safe();
1215 return ScalarizationResult::unsafe();
1228 if (ValidIndices.
contains(IdxRange))
1229 return ScalarizationResult::safeWithFreeze(IdxBase);
1230 return ScalarizationResult::unsafe();
1240 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1242 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1254bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1255 auto *
SI = cast<StoreInst>(&
I);
1256 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1264 if (!
match(
SI->getValueOperand(),
1269 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1270 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1271 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1274 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1275 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1276 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1280 if (ScalarizableIdx.isUnsafe() ||
1285 if (ScalarizableIdx.isSafeWithFreeze())
1286 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1288 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1289 {ConstantInt::get(Idx->getType(), 0), Idx});
1296 replaceValue(
I, *NSI);
1305bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1310 auto *VecTy = cast<VectorType>(
I.getType());
1311 auto *LI = cast<LoadInst>(&
I);
1312 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1317 LI->getPointerAddressSpace());
1321 unsigned NumInstChecked = 0;
1325 for (
auto &Pair : NeedFreeze)
1326 Pair.second.discard();
1333 auto *UI = dyn_cast<ExtractElementInst>(U);
1334 if (!UI || UI->getParent() != LI->getParent())
1341 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1348 LastCheckedInst = UI;
1352 if (ScalarIdx.isUnsafe())
1354 if (ScalarIdx.isSafeWithFreeze()) {
1356 ScalarIdx.discard();
1359 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1366 Align(1), LI->getPointerAddressSpace());
1370 if (ScalarizedCost >= OriginalCost)
1375 auto *EI = cast<ExtractElementInst>(U);
1379 auto It = NeedFreeze.
find(EI);
1380 if (It != NeedFreeze.
end())
1381 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1386 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1387 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1390 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1391 NewLoad->setAlignment(ScalarOpAlignment);
1393 replaceValue(*EI, *NewLoad);
1396 FailureGuard.release();
1401bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1417 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1418 auto *BinOpTy = dyn_cast<FixedVectorType>(B0->
getType());
1419 if (!ShuffleDstTy || !BinOpTy)
1422 unsigned NumSrcElts = BinOpTy->getNumElements();
1427 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W &&
1431 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1432 if (M >= (
int)NumSrcElts)
1459 OldMask,
CostKind, 0,
nullptr, {B0, B1}, &
I);
1467 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1469 if (NewCost >= OldCost)
1477 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1478 NewInst->copyIRFlags(B0);
1479 NewInst->andIRFlags(B1);
1484 replaceValue(
I, *NewBO);
1490bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1496 auto *C0 = dyn_cast<CastInst>(V0);
1497 auto *C1 = dyn_cast<CastInst>(V1);
1502 if (C0->getSrcTy() != C1->getSrcTy())
1506 if (Opcode != C1->getOpcode()) {
1508 Opcode = Instruction::SExt;
1513 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1514 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1515 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1516 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1519 unsigned NumSrcElts = CastSrcTy->getNumElements();
1520 unsigned NumDstElts = CastDstTy->getNumElements();
1521 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1522 "Only bitcasts expected to alter src/dst element counts");
1526 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1527 (NumDstElts % NumSrcElts) != 0)
1531 if (NumSrcElts >= NumDstElts) {
1534 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1535 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1540 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1541 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1546 auto *NewShuffleDstTy =
1561 OldMask,
CostKind, 0,
nullptr, std::nullopt, &
I);
1573 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1575 if (NewCost > OldCost)
1583 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1584 NewInst->copyIRFlags(C0);
1585 NewInst->andIRFlags(C1);
1589 replaceValue(
I, *Cast);
1595bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1606 auto *ShufI0 = dyn_cast<Instruction>(
I.getOperand(0));
1607 auto *ShufI1 = dyn_cast<Instruction>(
I.getOperand(1));
1608 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1609 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1610 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(
I.getOperand(0)->getType());
1611 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1615 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1616 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1619 if ((!isa<PoisonValue>(U0) &&
1620 any_of(InnerMask0, [&](
int M) {
return M >= (int)NumSrcElts; })) ||
1621 (!isa<PoisonValue>(U1) &&
1622 any_of(InnerMask1, [&](
int M) {
return M >= (int)NumSrcElts; })))
1627 for (
int &M : NewMask) {
1628 if (0 <= M && M < (
int)NumImmElts) {
1630 }
else if (M >= (
int)NumImmElts) {
1631 if (InnerMask1[M - NumImmElts] >= (
int)NumSrcElts)
1634 M = InnerMask1[
M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
1640 replaceValue(
I, *V0);
1649 InnerMask0,
CostKind, 0,
nullptr, {V0, U0}, ShufI0) +
1651 InnerMask1,
CostKind, 0,
nullptr, {V1, U1}, ShufI1) +
1653 OuterMask,
CostKind, 0,
nullptr, {ShufI0, ShufI1}, &
I);
1657 NewMask,
CostKind, 0,
nullptr, {V0, V1});
1660 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1662 if (NewCost > OldCost)
1666 if (
none_of(NewMask, [&](
int M) {
return 0 <=
M &&
M < (int)NumSrcElts; }))
1668 if (
none_of(NewMask, [&](
int M) {
return (
int)NumSrcElts <= M; }))
1672 replaceValue(
I, *Shuf);
1679 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
1681 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1682 int M = SV->getMaskValue(Lane);
1685 if (
static_cast<unsigned>(M) < NumElts) {
1686 U = &SV->getOperandUse(0);
1689 U = &SV->getOperandUse(1);
1700 auto [U, Lane] = IL;
1713 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
1714 unsigned NumElts = Ty->getNumElements();
1715 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
1721 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
1726 unsigned NumSlices = Item.
size() / NumElts;
1731 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
1732 Use *SliceV = Item[Slice * NumElts].first;
1733 if (!SliceV || SliceV->get()->
getType() != Ty)
1735 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
1736 auto [V, Lane] = Item[Slice * NumElts + Elt];
1737 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
1749 auto [FrontU, FrontLane] = Item.
front();
1751 if (IdentityLeafs.
contains(FrontU)) {
1752 return FrontU->get();
1758 if (ConcatLeafs.
contains(FrontU)) {
1760 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
1762 for (
unsigned S = 0; S < Values.
size(); ++S)
1763 Values[S] = Item[S * NumElts].first->get();
1765 while (Values.
size() > 1) {
1768 std::iota(Mask.begin(), Mask.end(), 0);
1770 for (
unsigned S = 0; S < NewValues.
size(); ++S)
1778 auto *
I = cast<Instruction>(FrontU->get());
1779 auto *
II = dyn_cast<IntrinsicInst>(
I);
1780 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
1782 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
1789 IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
1793 for (
const auto &Lane : Item)
1799 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
1805 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
1806 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
1810 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
1815 if (
auto *CI = dyn_cast<CastInst>(
I)) {
1826 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
1836bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
1837 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
1838 if (!Ty ||
I.use_empty())
1842 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
1848 unsigned NumVisited = 0;
1850 while (!Worklist.
empty()) {
1855 auto [FrontU, FrontLane] = Item.
front();
1863 return X->getType() ==
Y->getType() &&
1868 if (FrontLane == 0 &&
1869 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
1870 Ty->getNumElements() &&
1873 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
1874 E.value().second == (
int)E.index());
1876 IdentityLeafs.
insert(FrontU);
1880 if (
auto *
C = dyn_cast<Constant>(FrontU);
1881 C &&
C->getSplatValue() &&
1885 return !U ||
U->get() == FrontV;
1887 SplatLeafs.
insert(FrontU);
1892 auto [FrontU, FrontLane] = Item.
front();
1893 auto [
U, Lane] = IL;
1894 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
1896 SplatLeafs.
insert(FrontU);
1906 Value *V = IL.first->get();
1907 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
1911 if (
auto *CI = dyn_cast<CmpInst>(V))
1912 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
1914 if (
auto *CI = dyn_cast<CastInst>(V))
1915 if (CI->getSrcTy() != cast<CastInst>(FrontV)->getSrcTy())
1917 if (
auto *SI = dyn_cast<SelectInst>(V))
1918 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
1919 SI->getOperand(0)->getType() !=
1920 cast<SelectInst>(FrontV)->getOperand(0)->getType())
1922 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
1924 auto *
II = dyn_cast<IntrinsicInst>(V);
1925 return !
II || (isa<IntrinsicInst>(FrontV) &&
1926 II->getIntrinsicID() ==
1927 cast<IntrinsicInst>(FrontV)->getIntrinsicID());
1930 if (isa<BinaryOperator, CmpInst>(FrontU)) {
1932 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
1933 BO && BO->isIntDivRem())
1938 }
else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
1941 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
1943 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
1944 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
1945 if (DstTy && SrcTy &&
1946 SrcTy->getNumElements() == DstTy->getNumElements()) {
1950 }
else if (isa<SelectInst>(FrontU)) {
1955 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
1957 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
1962 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
1963 cast<Instruction>(FrontV)->getOperand(
Op));
1975 ConcatLeafs.
insert(FrontU);
1982 if (NumVisited <= 1)
1989 ConcatLeafs, Builder);
1990 replaceValue(
I, *V);
1997bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1998 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2001 switch (
II->getIntrinsicID()) {
2002 case Intrinsic::vector_reduce_add:
2003 case Intrinsic::vector_reduce_mul:
2004 case Intrinsic::vector_reduce_and:
2005 case Intrinsic::vector_reduce_or:
2006 case Intrinsic::vector_reduce_xor:
2007 case Intrinsic::vector_reduce_smin:
2008 case Intrinsic::vector_reduce_smax:
2009 case Intrinsic::vector_reduce_umin:
2010 case Intrinsic::vector_reduce_umax:
2019 std::queue<Value *> Worklist;
2022 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2025 while (!Worklist.empty()) {
2026 Value *CV = Worklist.front();
2037 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2038 if (CI->isBinaryOp()) {
2039 for (
auto *
Op : CI->operand_values())
2042 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2043 if (Shuffle && Shuffle != SV)
2060 for (
auto *V : Visited)
2061 for (
auto *U :
V->users())
2062 if (!Visited.contains(U) && U != &
I)
2066 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2071 if (!ShuffleInputType)
2079 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2083 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2084 bool UsesSecondVec =
2085 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2088 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2094 VecTyForCost, ConcatMask);
2096 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2098 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2100 if (NewCost < OldCost) {
2104 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2105 replaceValue(*Shuffle, *NewShuffle);
2110 return foldSelectShuffle(*Shuffle,
true);
2117bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2118 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2122 bool TruncOnly =
false;
2125 case Intrinsic::vector_reduce_add:
2126 case Intrinsic::vector_reduce_mul:
2129 case Intrinsic::vector_reduce_and:
2130 case Intrinsic::vector_reduce_or:
2131 case Intrinsic::vector_reduce_xor:
2138 Value *ReductionSrc =
I.getOperand(0);
2148 auto *SrcTy = cast<VectorType>(Src->getType());
2149 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2150 Type *ResultTy =
I.getType();
2154 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2157 cast<CastInst>(ReductionSrc));
2164 if (OldCost <= NewCost || !NewCost.
isValid())
2168 II->getIntrinsicID(), {Src});
2170 replaceValue(
I, *NewCast);
2184bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2185 auto *SVI = cast<ShuffleVectorInst>(&
I);
2186 auto *VT = cast<FixedVectorType>(
I.getType());
2187 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2188 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2189 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2193 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2194 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2195 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2196 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2199 if (!
I ||
I->getOperand(0)->getType() != VT)
2202 return U != Op0 && U != Op1 &&
2203 !(isa<ShuffleVectorInst>(U) &&
2204 (InputShuffles.contains(cast<Instruction>(U)) ||
2205 isInstructionTriviallyDead(cast<Instruction>(U))));
2208 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2209 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2217 for (
auto *U :
I->users()) {
2218 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2219 if (!SV || SV->getType() != VT)
2221 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2222 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2229 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2233 if (FromReduction && Shuffles.
size() > 1)
2238 if (!FromReduction) {
2240 for (
auto *U : SV->users()) {
2243 Shuffles.push_back(SSV);
2255 int MaxV1Elt = 0, MaxV2Elt = 0;
2256 unsigned NumElts = VT->getNumElements();
2259 SVN->getShuffleMask(Mask);
2263 Value *SVOp0 = SVN->getOperand(0);
2264 Value *SVOp1 = SVN->getOperand(1);
2265 if (isa<UndefValue>(SVOp1)) {
2266 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2269 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2275 if (SVOp0 == Op1 && SVOp1 == Op0) {
2279 if (SVOp0 != Op0 || SVOp1 != Op1)
2286 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2289 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2290 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2291 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2292 return Mask[
I] ==
A.first;
2301 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2302 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2303 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2306 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2309 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2317 sort(ReconstructMask);
2318 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2325 if (V1.
empty() ||
V2.empty() ||
2326 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2327 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2334 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2337 if (isa<UndefValue>(SV->getOperand(1)))
2338 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2339 if (InputShuffles.contains(SSV))
2341 return SV->getMaskValue(M);
2349 std::pair<int, int>
Y) {
2350 int MXA = GetBaseMaskValue(
A,
X.first);
2351 int MYA = GetBaseMaskValue(
A,
Y.first);
2354 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2355 return SortBase(SVI0A,
A,
B);
2357 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2358 return SortBase(SVI1A,
A,
B);
2363 for (
const auto &Mask : OrigReconstructMasks) {
2365 for (
int M : Mask) {
2367 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2368 assert(It !=
V.end() &&
"Expected all entries in Mask");
2369 return std::distance(
V.begin(), It);
2373 else if (M <
static_cast<int>(NumElts)) {
2374 ReconstructMask.
push_back(FindIndex(V1, M));
2376 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2379 ReconstructMasks.push_back(std::move(ReconstructMask));
2385 for (
unsigned I = 0;
I < V1.
size();
I++) {
2386 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2387 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2389 for (
unsigned I = 0;
I <
V2.size();
I++) {
2390 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2391 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2393 while (V1A.
size() < NumElts) {
2397 while (V2A.
size() < NumElts) {
2403 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2409 VT, SV->getShuffleMask());
2420 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2422 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2434 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2436 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2438 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2441 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2443 <<
" vs CostAfter: " << CostAfter <<
"\n");
2444 if (CostBefore <= CostAfter)
2449 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2452 if (isa<UndefValue>(SV->getOperand(1)))
2453 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2454 if (InputShuffles.contains(SSV))
2456 return SV->getOperand(
Op);
2460 GetShuffleOperand(SVI0A, 1), V1A);
2463 GetShuffleOperand(SVI0B, 1), V1B);
2466 GetShuffleOperand(SVI1A, 1), V2A);
2469 GetShuffleOperand(SVI1B, 1), V2B);
2473 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2474 I->copyIRFlags(Op0,
true);
2478 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2479 I->copyIRFlags(Op1,
true);
2481 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2484 replaceValue(*Shuffles[S], *NSV);
2487 Worklist.pushValue(NSV0A);
2488 Worklist.pushValue(NSV0B);
2489 Worklist.pushValue(NSV1A);
2490 Worklist.pushValue(NSV1B);
2491 for (
auto *S : Shuffles)
2498bool VectorCombine::run() {
2506 bool MadeChange =
false;
2509 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
2510 auto Opcode =
I.getOpcode();
2516 if (IsFixedVectorType) {
2518 case Instruction::InsertElement:
2519 MadeChange |= vectorizeLoadInsert(
I);
2521 case Instruction::ShuffleVector:
2522 MadeChange |= widenSubvectorLoad(
I);
2531 if (isa<VectorType>(
I.getType())) {
2532 MadeChange |= scalarizeBinopOrCmp(
I);
2533 MadeChange |= scalarizeLoadExtract(
I);
2534 MadeChange |= scalarizeVPIntrinsic(
I);
2537 if (Opcode == Instruction::Store)
2538 MadeChange |= foldSingleElementStore(
I);
2541 if (TryEarlyFoldsOnly)
2548 if (IsFixedVectorType) {
2550 case Instruction::InsertElement:
2551 MadeChange |= foldInsExtFNeg(
I);
2553 case Instruction::ShuffleVector:
2554 MadeChange |= foldShuffleOfBinops(
I);
2555 MadeChange |= foldShuffleOfCastops(
I);
2556 MadeChange |= foldShuffleOfShuffles(
I);
2557 MadeChange |= foldSelectShuffle(
I);
2558 MadeChange |= foldShuffleToIdentity(
I);
2560 case Instruction::BitCast:
2561 MadeChange |= foldBitcastShuffle(
I);
2566 case Instruction::Call:
2567 MadeChange |= foldShuffleFromReductions(
I);
2568 MadeChange |= foldCastFromReductions(
I);
2570 case Instruction::ICmp:
2571 case Instruction::FCmp:
2572 MadeChange |= foldExtractExtract(
I);
2576 MadeChange |= foldExtractExtract(
I);
2577 MadeChange |= foldExtractedCmps(
I);
2590 if (
I.isDebugOrPseudoInst())
2596 while (!Worklist.isEmpty()) {
2619 VectorCombine
Combiner(
F,
TTI, DT, AA, AC,
DL, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
'undef' values are things that do not have specified contents.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< UndefValue > m_UndefValue()
Match an arbitrary UndefValue constant.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
DWARFExpression::Operation Op
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.