37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
72 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
87 bool TryEarlyFoldsOnly;
98 unsigned PreferredExtractIndex)
const;
102 unsigned PreferredExtractIndex);
121 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
125 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
145 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
146 V = BitCast->getOperand(0);
154 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
155 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
161 Type *ScalarTy = Load->getType()->getScalarType();
164 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
171bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
185 auto *
Load = dyn_cast<LoadInst>(
X);
197 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
198 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
200 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
201 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
202 unsigned OffsetEltIndex = 0;
210 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
221 uint64_t ScalarSizeInBytes = ScalarSize / 8;
222 if (
Offset.urem(ScalarSizeInBytes) != 0)
226 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
227 if (OffsetEltIndex >= MinVecNumElts)
244 unsigned AS =
Load->getPointerAddressSpace();
263 auto *Ty = cast<FixedVectorType>(
I.getType());
264 unsigned OutputNumElts = Ty->getNumElements();
266 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
267 Mask[0] = OffsetEltIndex;
273 if (OldCost < NewCost || !NewCost.
isValid())
284 replaceValue(
I, *VecLd);
294 auto *Shuf = cast<ShuffleVectorInst>(&
I);
295 if (!Shuf->isIdentityWithPadding())
300 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
301 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
302 return M >= (int)(NumOpElts);
305 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
312 auto *Ty = cast<FixedVectorType>(
I.getType());
313 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
314 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
321 unsigned AS =
Load->getPointerAddressSpace();
336 if (OldCost < NewCost || !NewCost.
isValid())
343 replaceValue(
I, *VecLd);
355 assert(Index0C && Index1C &&
"Expected constant extract indexes");
357 unsigned Index0 = Index0C->getZExtValue();
358 unsigned Index1 = Index1C->getZExtValue();
361 if (Index0 == Index1)
386 if (PreferredExtractIndex == Index0)
388 if (PreferredExtractIndex == Index1)
392 return Index0 > Index1 ? Ext0 : Ext1;
404 unsigned PreferredExtractIndex) {
405 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
406 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
407 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
409 unsigned Opcode =
I.getOpcode();
420 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
421 "Expected a compare");
431 unsigned Ext0Index = Ext0IndexC->getZExtValue();
432 unsigned Ext1Index = Ext1IndexC->getZExtValue();
447 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
457 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
459 OldCost = CheapExtractCost + ScalarOpCost;
460 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
464 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
465 NewCost = VectorOpCost + CheapExtractCost +
470 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
471 if (ConvertToShuffle) {
489 return OldCost < NewCost;
499 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
501 ShufMask[NewIndex] = OldIndex;
520 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
521 if (isa<Constant>(
X))
534 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
537 "Expected matching constant extract indexes");
545 replaceValue(
I, *NewExt);
553 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
556 "Expected matching constant extract indexes");
566 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
567 VecBOInst->copyIRFlags(&
I);
570 replaceValue(
I, *NewExt);
598 auto *Ext0 = cast<ExtractElementInst>(I0);
599 auto *Ext1 = cast<ExtractElementInst>(I1);
606 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
609 if (ExtractToChange) {
610 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
615 if (ExtractToChange == Ext0)
622 foldExtExtCmp(Ext0, Ext1,
I);
624 foldExtExtBinop(Ext0, Ext1,
I);
651 auto *VecTy = cast<FixedVectorType>(
I.getType());
652 if (SrcVec->
getType() != VecTy)
656 unsigned NumElts = VecTy->getNumElements();
657 if (
Index >= NumElts)
664 std::iota(
Mask.begin(),
Mask.end(), 0);
683 if (NewCost > OldCost)
690 replaceValue(
I, *Shuf);
709 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
710 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
711 if (!DestTy || !SrcTy)
714 unsigned DestEltSize = DestTy->getScalarSizeInBits();
715 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
716 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
719 bool IsUnary = isa<UndefValue>(V1);
726 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
727 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
732 if (DestEltSize <= SrcEltSize) {
735 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
736 unsigned ScaleFactor = SrcEltSize / DestEltSize;
741 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
742 unsigned ScaleFactor = DestEltSize / SrcEltSize;
749 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
754 unsigned NumOps = IsUnary ? 1 : 2;
766 TargetTransformInfo::CastContextHint::None,
771 TargetTransformInfo::CastContextHint::None, CK);
772 if (DestCost > SrcCost || !DestCost.
isValid())
780 replaceValue(
I, *Shuf);
787bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
788 if (!isa<VPIntrinsic>(
I))
801 if (!ScalarOp0 || !ScalarOp1)
809 auto IsAllTrueMask = [](
Value *MaskVal) {
811 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
812 return ConstValue->isAllOnesValue();
828 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
829 Mask.resize(FVTy->getNumElements(), 0);
837 Args.push_back(
V->getType());
843 std::optional<unsigned> FunctionalOpcode =
845 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
846 if (!FunctionalOpcode) {
870 <<
", Cost of scalarizing:" << NewCost <<
"\n");
873 if (OldCost < NewCost || !NewCost.
isValid())
884 bool SafeToSpeculate;
887 .
hasFnAttr(Attribute::AttrKind::Speculatable);
890 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
891 if (!SafeToSpeculate &&
898 {ScalarOp0, ScalarOp1})
900 ScalarOp0, ScalarOp1);
908bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
919 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
921 for (
User *U :
I.users())
931 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
932 Value *V0 =
nullptr, *V1 =
nullptr;
945 if (IsConst0 && IsConst1)
947 if (!IsConst0 && !IsConst1 && Index0 != Index1)
952 auto *I0 = dyn_cast_or_null<Instruction>(V0);
953 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
954 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
960 Type *VecTy =
I.getType();
965 "Unexpected types for insert element into binop or cmp");
967 unsigned Opcode =
I.getOpcode();
986 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
988 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
989 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
992 if (OldCost < NewCost || !NewCost.
isValid())
1012 Scalar->setName(
I.getName() +
".scalar");
1016 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1017 ScalarInst->copyIRFlags(&
I);
1021 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1024 replaceValue(
I, *Insert);
1034 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
1040 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1058 auto *Ext0 = cast<ExtractElementInst>(I0);
1059 auto *Ext1 = cast<ExtractElementInst>(I1);
1068 : Instruction::ICmp;
1069 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1086 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1087 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1092 ShufMask[CheapIndex] = ExpensiveIndex;
1101 if (OldCost < NewCost || !NewCost.
isValid())
1115 replaceValue(
I, *NewExt);
1124 unsigned NumScanned = 0;
1134class ScalarizationResult {
1135 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1140 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1144 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1145 ~ScalarizationResult() {
1146 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1149 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1150 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1151 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1152 return {StatusTy::SafeWithFreeze, ToFreeze};
1156 bool isSafe()
const {
return Status == StatusTy::Safe; }
1158 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1161 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1166 Status = StatusTy::Unsafe;
1171 assert(isSafeWithFreeze() &&
1172 "should only be used when freezing is required");
1174 "UserI must be a user of ToFreeze");
1180 if (
U.get() == ToFreeze)
1197 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1199 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1200 if (
C->getValue().ult(NumElements))
1201 return ScalarizationResult::safe();
1202 return ScalarizationResult::unsafe();
1205 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1206 APInt Zero(IntWidth, 0);
1207 APInt MaxElts(IntWidth, NumElements);
1213 true, &AC, CtxI, &DT)))
1214 return ScalarizationResult::safe();
1215 return ScalarizationResult::unsafe();
1228 if (ValidIndices.
contains(IdxRange))
1229 return ScalarizationResult::safeWithFreeze(IdxBase);
1230 return ScalarizationResult::unsafe();
1240 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1242 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1254bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1255 auto *
SI = cast<StoreInst>(&
I);
1256 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1264 if (!
match(
SI->getValueOperand(),
1269 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1270 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1271 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1274 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1275 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1276 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1280 if (ScalarizableIdx.isUnsafe() ||
1285 if (ScalarizableIdx.isSafeWithFreeze())
1286 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1288 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1289 {ConstantInt::get(Idx->getType(), 0), Idx});
1296 replaceValue(
I, *NSI);
1305bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1310 auto *VecTy = cast<VectorType>(
I.getType());
1311 auto *LI = cast<LoadInst>(&
I);
1312 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1317 LI->getPointerAddressSpace());
1321 unsigned NumInstChecked = 0;
1325 for (
auto &Pair : NeedFreeze)
1326 Pair.second.discard();
1333 auto *UI = dyn_cast<ExtractElementInst>(U);
1334 if (!UI || UI->getParent() != LI->getParent())
1341 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1348 LastCheckedInst = UI;
1352 if (ScalarIdx.isUnsafe())
1354 if (ScalarIdx.isSafeWithFreeze()) {
1356 ScalarIdx.discard();
1359 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1366 Align(1), LI->getPointerAddressSpace());
1370 if (ScalarizedCost >= OriginalCost)
1375 auto *EI = cast<ExtractElementInst>(U);
1379 auto It = NeedFreeze.
find(EI);
1380 if (It != NeedFreeze.
end())
1381 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1386 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1387 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1390 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1391 NewLoad->setAlignment(ScalarOpAlignment);
1393 replaceValue(*EI, *NewLoad);
1396 FailureGuard.release();
1401bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1418 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1419 auto *BinOpTy = dyn_cast<FixedVectorType>(B0->
getType());
1420 if (!ShuffleDstTy || !BinOpTy)
1423 unsigned NumSrcElts = BinOpTy->getNumElements();
1428 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W &&
1432 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1433 if (M >= (
int)NumSrcElts)
1460 OldMask,
CostKind, 0,
nullptr, {B0, B1}, &
I);
1468 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1470 if (NewCost >= OldCost)
1478 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1479 NewInst->copyIRFlags(B0);
1480 NewInst->andIRFlags(B1);
1485 replaceValue(
I, *NewBO);
1491bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1497 auto *C0 = dyn_cast<CastInst>(V0);
1498 auto *C1 = dyn_cast<CastInst>(V1);
1503 if (C0->getSrcTy() != C1->getSrcTy())
1507 if (Opcode != C1->getOpcode()) {
1509 Opcode = Instruction::SExt;
1514 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1515 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1516 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1517 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1520 unsigned NumSrcElts = CastSrcTy->getNumElements();
1521 unsigned NumDstElts = CastDstTy->getNumElements();
1522 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1523 "Only bitcasts expected to alter src/dst element counts");
1527 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1528 (NumDstElts % NumSrcElts) != 0)
1532 if (NumSrcElts >= NumDstElts) {
1535 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1536 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1541 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1542 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1547 auto *NewShuffleDstTy =
1562 OldMask,
CostKind, 0,
nullptr, std::nullopt, &
I);
1574 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1576 if (NewCost > OldCost)
1584 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1585 NewInst->copyIRFlags(C0);
1586 NewInst->andIRFlags(C1);
1590 replaceValue(
I, *Cast);
1596bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1607 auto *ShufI0 = dyn_cast<Instruction>(
I.getOperand(0));
1608 auto *ShufI1 = dyn_cast<Instruction>(
I.getOperand(1));
1609 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1610 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1611 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(
I.getOperand(0)->getType());
1612 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1616 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1617 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1620 if ((!isa<PoisonValue>(U0) &&
1621 any_of(InnerMask0, [&](
int M) {
return M >= (int)NumSrcElts; })) ||
1622 (!isa<PoisonValue>(U1) &&
1623 any_of(InnerMask1, [&](
int M) {
return M >= (int)NumSrcElts; })))
1628 for (
int &M : NewMask) {
1629 if (0 <= M && M < (
int)NumImmElts) {
1631 }
else if (M >= (
int)NumImmElts) {
1632 if (InnerMask1[M - NumImmElts] >= (
int)NumSrcElts)
1635 M = InnerMask1[
M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
1641 replaceValue(
I, *V0);
1650 InnerMask0,
CostKind, 0,
nullptr, {V0, U0}, ShufI0) +
1652 InnerMask1,
CostKind, 0,
nullptr, {V1, U1}, ShufI1) +
1654 OuterMask,
CostKind, 0,
nullptr, {ShufI0, ShufI1}, &
I);
1658 NewMask,
CostKind, 0,
nullptr, {V0, V1});
1661 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1663 if (NewCost > OldCost)
1667 if (
none_of(NewMask, [&](
int M) {
return 0 <=
M &&
M < (int)NumSrcElts; }))
1669 if (
none_of(NewMask, [&](
int M) {
return (
int)NumSrcElts <= M; }))
1673 replaceValue(
I, *Shuf);
1680 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
1682 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1683 int M = SV->getMaskValue(Lane);
1686 if (
static_cast<unsigned>(M) < NumElts) {
1687 U = &SV->getOperandUse(0);
1690 U = &SV->getOperandUse(1);
1701 auto [U, Lane] = IL;
1714 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
1715 unsigned NumElts = Ty->getNumElements();
1716 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
1722 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
1727 unsigned NumSlices = Item.
size() / NumElts;
1732 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
1733 Use *SliceV = Item[Slice * NumElts].first;
1734 if (!SliceV || SliceV->get()->
getType() != Ty)
1736 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
1737 auto [V, Lane] = Item[Slice * NumElts + Elt];
1738 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
1750 auto [FrontU, FrontLane] = Item.
front();
1752 if (IdentityLeafs.
contains(FrontU)) {
1753 return FrontU->get();
1759 if (ConcatLeafs.
contains(FrontU)) {
1761 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
1763 for (
unsigned S = 0; S < Values.
size(); ++S)
1764 Values[S] = Item[S * NumElts].first->get();
1766 while (Values.
size() > 1) {
1769 std::iota(Mask.begin(), Mask.end(), 0);
1771 for (
unsigned S = 0; S < NewValues.
size(); ++S)
1779 auto *
I = cast<Instruction>(FrontU->get());
1780 auto *
II = dyn_cast<IntrinsicInst>(
I);
1781 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
1783 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
1790 IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
1794 for (
const auto &Lane : Item)
1800 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
1806 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
1807 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
1811 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
1816 if (
auto *CI = dyn_cast<CastInst>(
I)) {
1827 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
1837bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
1838 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
1839 if (!Ty ||
I.use_empty())
1843 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
1849 unsigned NumVisited = 0;
1851 while (!Worklist.
empty()) {
1856 auto [FrontU, FrontLane] = Item.
front();
1864 return X->getType() ==
Y->getType() &&
1869 if (FrontLane == 0 &&
1870 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
1871 Ty->getNumElements() &&
1874 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
1875 E.value().second == (
int)E.index());
1877 IdentityLeafs.
insert(FrontU);
1881 if (
auto *
C = dyn_cast<Constant>(FrontU);
1882 C &&
C->getSplatValue() &&
1886 return !U ||
U->get() == FrontV;
1888 SplatLeafs.
insert(FrontU);
1893 auto [FrontU, FrontLane] = Item.
front();
1894 auto [
U, Lane] = IL;
1895 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
1897 SplatLeafs.
insert(FrontU);
1907 Value *V = IL.first->get();
1908 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
1912 if (
auto *CI = dyn_cast<CmpInst>(V))
1913 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
1915 if (
auto *CI = dyn_cast<CastInst>(V))
1916 if (CI->getSrcTy() != cast<CastInst>(FrontV)->getSrcTy())
1918 if (
auto *SI = dyn_cast<SelectInst>(V))
1919 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
1920 SI->getOperand(0)->getType() !=
1921 cast<SelectInst>(FrontV)->getOperand(0)->getType())
1923 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
1925 auto *
II = dyn_cast<IntrinsicInst>(V);
1926 return !
II || (isa<IntrinsicInst>(FrontV) &&
1927 II->getIntrinsicID() ==
1928 cast<IntrinsicInst>(FrontV)->getIntrinsicID());
1931 if (isa<BinaryOperator, CmpInst>(FrontU)) {
1933 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
1934 BO && BO->isIntDivRem())
1939 }
else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
1942 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
1944 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
1945 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
1946 if (DstTy && SrcTy &&
1947 SrcTy->getNumElements() == DstTy->getNumElements()) {
1951 }
else if (isa<SelectInst>(FrontU)) {
1956 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
1958 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
1963 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
1964 cast<Instruction>(FrontV)->getOperand(
Op));
1976 ConcatLeafs.
insert(FrontU);
1983 if (NumVisited <= 1)
1990 ConcatLeafs, Builder);
1991 replaceValue(
I, *V);
1998bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1999 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2002 switch (
II->getIntrinsicID()) {
2003 case Intrinsic::vector_reduce_add:
2004 case Intrinsic::vector_reduce_mul:
2005 case Intrinsic::vector_reduce_and:
2006 case Intrinsic::vector_reduce_or:
2007 case Intrinsic::vector_reduce_xor:
2008 case Intrinsic::vector_reduce_smin:
2009 case Intrinsic::vector_reduce_smax:
2010 case Intrinsic::vector_reduce_umin:
2011 case Intrinsic::vector_reduce_umax:
2020 std::queue<Value *> Worklist;
2023 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2026 while (!Worklist.empty()) {
2027 Value *CV = Worklist.front();
2038 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2039 if (CI->isBinaryOp()) {
2040 for (
auto *
Op : CI->operand_values())
2043 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2044 if (Shuffle && Shuffle != SV)
2061 for (
auto *V : Visited)
2062 for (
auto *U :
V->users())
2063 if (!Visited.contains(U) && U != &
I)
2067 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2072 if (!ShuffleInputType)
2080 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2084 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2085 bool UsesSecondVec =
2086 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2089 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2095 VecTyForCost, ConcatMask);
2097 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2099 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2101 if (NewCost < OldCost) {
2105 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2106 replaceValue(*Shuffle, *NewShuffle);
2111 return foldSelectShuffle(*Shuffle,
true);
2118bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
2119 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2123 bool TruncOnly =
false;
2126 case Intrinsic::vector_reduce_add:
2127 case Intrinsic::vector_reduce_mul:
2130 case Intrinsic::vector_reduce_and:
2131 case Intrinsic::vector_reduce_or:
2132 case Intrinsic::vector_reduce_xor:
2139 Value *ReductionSrc =
I.getOperand(0);
2149 auto *SrcTy = cast<VectorType>(Src->getType());
2150 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2151 Type *ResultTy =
I.getType();
2155 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2158 cast<CastInst>(ReductionSrc));
2165 if (OldCost <= NewCost || !NewCost.
isValid())
2169 II->getIntrinsicID(), {Src});
2171 replaceValue(
I, *NewCast);
2185bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2186 auto *SVI = cast<ShuffleVectorInst>(&
I);
2187 auto *VT = cast<FixedVectorType>(
I.getType());
2188 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2189 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2190 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2194 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2195 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2196 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2197 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2200 if (!
I ||
I->getOperand(0)->getType() != VT)
2203 return U != Op0 && U != Op1 &&
2204 !(isa<ShuffleVectorInst>(U) &&
2205 (InputShuffles.contains(cast<Instruction>(U)) ||
2206 isInstructionTriviallyDead(cast<Instruction>(U))));
2209 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2210 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2218 for (
auto *U :
I->users()) {
2219 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2220 if (!SV || SV->getType() != VT)
2222 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2223 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2230 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2234 if (FromReduction && Shuffles.
size() > 1)
2239 if (!FromReduction) {
2241 for (
auto *U : SV->users()) {
2244 Shuffles.push_back(SSV);
2256 int MaxV1Elt = 0, MaxV2Elt = 0;
2257 unsigned NumElts = VT->getNumElements();
2260 SVN->getShuffleMask(Mask);
2264 Value *SVOp0 = SVN->getOperand(0);
2265 Value *SVOp1 = SVN->getOperand(1);
2266 if (isa<UndefValue>(SVOp1)) {
2267 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2270 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2276 if (SVOp0 == Op1 && SVOp1 == Op0) {
2280 if (SVOp0 != Op0 || SVOp1 != Op1)
2287 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2290 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2291 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2292 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2293 return Mask[
I] ==
A.first;
2302 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2303 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2304 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2307 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2310 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2318 sort(ReconstructMask);
2319 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2326 if (V1.
empty() ||
V2.empty() ||
2327 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2328 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2335 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2338 if (isa<UndefValue>(SV->getOperand(1)))
2339 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2340 if (InputShuffles.contains(SSV))
2342 return SV->getMaskValue(M);
2350 std::pair<int, int>
Y) {
2351 int MXA = GetBaseMaskValue(
A,
X.first);
2352 int MYA = GetBaseMaskValue(
A,
Y.first);
2355 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2356 return SortBase(SVI0A,
A,
B);
2358 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2359 return SortBase(SVI1A,
A,
B);
2364 for (
const auto &Mask : OrigReconstructMasks) {
2366 for (
int M : Mask) {
2368 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2369 assert(It !=
V.end() &&
"Expected all entries in Mask");
2370 return std::distance(
V.begin(), It);
2374 else if (M <
static_cast<int>(NumElts)) {
2375 ReconstructMask.
push_back(FindIndex(V1, M));
2377 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2380 ReconstructMasks.push_back(std::move(ReconstructMask));
2386 for (
unsigned I = 0;
I < V1.
size();
I++) {
2387 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2388 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2390 for (
unsigned I = 0;
I <
V2.size();
I++) {
2391 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2392 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2394 while (V1A.
size() < NumElts) {
2398 while (V2A.
size() < NumElts) {
2404 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2410 VT, SV->getShuffleMask());
2421 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2423 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2435 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2437 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2439 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2442 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2444 <<
" vs CostAfter: " << CostAfter <<
"\n");
2445 if (CostBefore <= CostAfter)
2450 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2453 if (isa<UndefValue>(SV->getOperand(1)))
2454 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2455 if (InputShuffles.contains(SSV))
2457 return SV->getOperand(
Op);
2461 GetShuffleOperand(SVI0A, 1), V1A);
2464 GetShuffleOperand(SVI0B, 1), V1B);
2467 GetShuffleOperand(SVI1A, 1), V2A);
2470 GetShuffleOperand(SVI1B, 1), V2B);
2474 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2475 I->copyIRFlags(Op0,
true);
2479 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2480 I->copyIRFlags(Op1,
true);
2482 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2485 replaceValue(*Shuffles[S], *NSV);
2488 Worklist.pushValue(NSV0A);
2489 Worklist.pushValue(NSV0B);
2490 Worklist.pushValue(NSV1A);
2491 Worklist.pushValue(NSV1B);
2492 for (
auto *S : Shuffles)
2499bool VectorCombine::run() {
2507 bool MadeChange =
false;
2510 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
2511 auto Opcode =
I.getOpcode();
2517 if (IsFixedVectorType) {
2519 case Instruction::InsertElement:
2520 MadeChange |= vectorizeLoadInsert(
I);
2522 case Instruction::ShuffleVector:
2523 MadeChange |= widenSubvectorLoad(
I);
2532 if (isa<VectorType>(
I.getType())) {
2533 MadeChange |= scalarizeBinopOrCmp(
I);
2534 MadeChange |= scalarizeLoadExtract(
I);
2535 MadeChange |= scalarizeVPIntrinsic(
I);
2538 if (Opcode == Instruction::Store)
2539 MadeChange |= foldSingleElementStore(
I);
2542 if (TryEarlyFoldsOnly)
2549 if (IsFixedVectorType) {
2551 case Instruction::InsertElement:
2552 MadeChange |= foldInsExtFNeg(
I);
2554 case Instruction::ShuffleVector:
2555 MadeChange |= foldShuffleOfBinops(
I);
2556 MadeChange |= foldShuffleOfCastops(
I);
2557 MadeChange |= foldShuffleOfShuffles(
I);
2558 MadeChange |= foldSelectShuffle(
I);
2559 MadeChange |= foldShuffleToIdentity(
I);
2561 case Instruction::BitCast:
2562 MadeChange |= foldBitcastShuffle(
I);
2567 case Instruction::Call:
2568 MadeChange |= foldShuffleFromReductions(
I);
2569 MadeChange |= foldCastFromReductions(
I);
2571 case Instruction::ICmp:
2572 case Instruction::FCmp:
2573 MadeChange |= foldExtractExtract(
I);
2577 MadeChange |= foldExtractExtract(
I);
2578 MadeChange |= foldExtractedCmps(
I);
2591 if (
I.isDebugOrPseudoInst())
2597 while (!Worklist.isEmpty()) {
2620 VectorCombine
Combiner(
F,
TTI, DT, AA, AC,
DL, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
'undef' values are things that do not have specified contents.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< UndefValue > m_UndefValue()
Match an arbitrary UndefValue constant.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.