37#define DEBUG_TYPE "vector-combine"
43STATISTIC(NumVecLoad,
"Number of vector loads formed");
44STATISTIC(NumVecCmp,
"Number of vector compares formed");
45STATISTIC(NumVecBO,
"Number of vector binops formed");
46STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
47STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
48STATISTIC(NumScalarBO,
"Number of scalar binops formed");
49STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
53 cl::desc(
"Disable all vector combine transforms"));
57 cl::desc(
"Disable binop extract to shuffle transforms"));
61 cl::desc(
"Max number of instructions to scan for vector combining."));
63static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
71 :
F(
F), Builder(
F.getContext()),
TTI(
TTI), DT(DT), AA(AA), AC(AC),
DL(
DL),
72 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
87 bool TryEarlyFoldsOnly;
98 unsigned PreferredExtractIndex)
const;
102 unsigned PreferredExtractIndex);
121 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
125 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
145 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
146 V = BitCast->getOperand(0);
154 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
155 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
161 Type *ScalarTy = Load->getType()->getScalarType();
164 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
171bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
185 auto *
Load = dyn_cast<LoadInst>(
X);
197 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
198 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
200 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
201 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
202 unsigned OffsetEltIndex = 0;
210 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
221 uint64_t ScalarSizeInBytes = ScalarSize / 8;
222 if (
Offset.urem(ScalarSizeInBytes) != 0)
226 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
227 if (OffsetEltIndex >= MinVecNumElts)
244 unsigned AS =
Load->getPointerAddressSpace();
263 auto *Ty = cast<FixedVectorType>(
I.getType());
264 unsigned OutputNumElts = Ty->getNumElements();
266 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
267 Mask[0] = OffsetEltIndex;
273 if (OldCost < NewCost || !NewCost.
isValid())
284 replaceValue(
I, *VecLd);
294 auto *Shuf = cast<ShuffleVectorInst>(&
I);
295 if (!Shuf->isIdentityWithPadding())
300 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
301 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
302 return M >= (int)(NumOpElts);
305 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
312 auto *Ty = cast<FixedVectorType>(
I.getType());
313 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
314 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
321 unsigned AS =
Load->getPointerAddressSpace();
336 if (OldCost < NewCost || !NewCost.
isValid())
343 replaceValue(
I, *VecLd);
355 assert(Index0C && Index1C &&
"Expected constant extract indexes");
357 unsigned Index0 = Index0C->getZExtValue();
358 unsigned Index1 = Index1C->getZExtValue();
361 if (Index0 == Index1)
386 if (PreferredExtractIndex == Index0)
388 if (PreferredExtractIndex == Index1)
392 return Index0 > Index1 ? Ext0 : Ext1;
404 unsigned PreferredExtractIndex) {
405 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
406 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
407 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
409 unsigned Opcode =
I.getOpcode();
420 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
421 "Expected a compare");
431 unsigned Ext0Index = Ext0IndexC->getZExtValue();
432 unsigned Ext1Index = Ext1IndexC->getZExtValue();
447 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
457 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
459 OldCost = CheapExtractCost + ScalarOpCost;
460 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
464 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
465 NewCost = VectorOpCost + CheapExtractCost +
470 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
471 if (ConvertToShuffle) {
489 return OldCost < NewCost;
499 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
501 ShufMask[NewIndex] = OldIndex;
520 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
521 if (isa<Constant>(
X))
534 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
537 "Expected matching constant extract indexes");
545 replaceValue(
I, *NewExt);
553 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
556 "Expected matching constant extract indexes");
566 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
567 VecBOInst->copyIRFlags(&
I);
570 replaceValue(
I, *NewExt);
598 auto *Ext0 = cast<ExtractElementInst>(I0);
599 auto *Ext1 = cast<ExtractElementInst>(I1);
606 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
609 if (ExtractToChange) {
610 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
615 if (ExtractToChange == Ext0)
622 foldExtExtCmp(Ext0, Ext1,
I);
624 foldExtExtBinop(Ext0, Ext1,
I);
651 auto *VecTy = cast<FixedVectorType>(
I.getType());
652 if (SrcVec->
getType() != VecTy)
656 unsigned NumElts = VecTy->getNumElements();
657 if (
Index >= NumElts)
664 std::iota(
Mask.begin(),
Mask.end(), 0);
683 if (NewCost > OldCost)
690 replaceValue(
I, *Shuf);
709 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
710 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
711 if (!DestTy || !SrcTy)
714 unsigned DestEltSize = DestTy->getScalarSizeInBits();
715 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
716 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
719 bool IsUnary = isa<UndefValue>(V1);
726 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
727 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
732 if (DestEltSize <= SrcEltSize) {
735 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
736 unsigned ScaleFactor = SrcEltSize / DestEltSize;
741 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
742 unsigned ScaleFactor = DestEltSize / SrcEltSize;
749 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
754 unsigned NumOps = IsUnary ? 1 : 2;
766 TargetTransformInfo::CastContextHint::None,
771 TargetTransformInfo::CastContextHint::None, CK);
772 if (DestCost > SrcCost || !DestCost.
isValid())
780 replaceValue(
I, *Shuf);
787bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
788 if (!isa<VPIntrinsic>(
I))
801 if (!ScalarOp0 || !ScalarOp1)
809 auto IsAllTrueMask = [](
Value *MaskVal) {
811 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
812 return ConstValue->isAllOnesValue();
828 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
829 Mask.resize(FVTy->getNumElements(), 0);
837 Args.push_back(
V->getType());
843 std::optional<unsigned> FunctionalOpcode =
845 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
846 if (!FunctionalOpcode) {
870 <<
", Cost of scalarizing:" << NewCost <<
"\n");
873 if (OldCost < NewCost || !NewCost.
isValid())
884 bool SafeToSpeculate;
887 .
hasFnAttr(Attribute::AttrKind::Speculatable);
890 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
891 if (!SafeToSpeculate &&
898 {ScalarOp0, ScalarOp1})
900 ScalarOp0, ScalarOp1);
908bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
919 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
921 for (
User *U :
I.users())
931 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
932 Value *V0 =
nullptr, *V1 =
nullptr;
945 if (IsConst0 && IsConst1)
947 if (!IsConst0 && !IsConst1 && Index0 != Index1)
952 auto *I0 = dyn_cast_or_null<Instruction>(V0);
953 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
954 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
960 Type *VecTy =
I.getType();
965 "Unexpected types for insert element into binop or cmp");
967 unsigned Opcode =
I.getOpcode();
986 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
988 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
989 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
992 if (OldCost < NewCost || !NewCost.
isValid())
1012 Scalar->setName(
I.getName() +
".scalar");
1016 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1017 ScalarInst->copyIRFlags(&
I);
1021 IsCmp ? Builder.
CreateCmp(Pred, VecC0, VecC1)
1024 replaceValue(
I, *Insert);
1034 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
1040 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1058 auto *Ext0 = cast<ExtractElementInst>(I0);
1059 auto *Ext1 = cast<ExtractElementInst>(I1);
1068 : Instruction::ICmp;
1069 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1086 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1087 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1092 ShufMask[CheapIndex] = ExpensiveIndex;
1101 if (OldCost < NewCost || !NewCost.
isValid())
1115 replaceValue(
I, *NewExt);
1124 unsigned NumScanned = 0;
1134class ScalarizationResult {
1135 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1140 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1144 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1145 ~ScalarizationResult() {
1146 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1149 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1150 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1151 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1152 return {StatusTy::SafeWithFreeze, ToFreeze};
1156 bool isSafe()
const {
return Status == StatusTy::Safe; }
1158 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1161 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1166 Status = StatusTy::Unsafe;
1171 assert(isSafeWithFreeze() &&
1172 "should only be used when freezing is required");
1174 "UserI must be a user of ToFreeze");
1180 if (
U.get() == ToFreeze)
1197 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1199 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1200 if (
C->getValue().ult(NumElements))
1201 return ScalarizationResult::safe();
1202 return ScalarizationResult::unsafe();
1205 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1206 APInt Zero(IntWidth, 0);
1207 APInt MaxElts(IntWidth, NumElements);
1213 true, &AC, CtxI, &DT)))
1214 return ScalarizationResult::safe();
1215 return ScalarizationResult::unsafe();
1228 if (ValidIndices.
contains(IdxRange))
1229 return ScalarizationResult::safeWithFreeze(IdxBase);
1230 return ScalarizationResult::unsafe();
1240 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1242 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1254bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1255 auto *
SI = cast<StoreInst>(&
I);
1256 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1264 if (!
match(
SI->getValueOperand(),
1269 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1270 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1271 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1274 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1275 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1276 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1280 if (ScalarizableIdx.isUnsafe() ||
1285 if (ScalarizableIdx.isSafeWithFreeze())
1286 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1288 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1289 {ConstantInt::get(Idx->getType(), 0), Idx});
1296 replaceValue(
I, *NSI);
1305bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1310 auto *VecTy = cast<VectorType>(
I.getType());
1311 auto *LI = cast<LoadInst>(&
I);
1312 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1317 LI->getPointerAddressSpace());
1321 unsigned NumInstChecked = 0;
1325 for (
auto &Pair : NeedFreeze)
1326 Pair.second.discard();
1333 auto *UI = dyn_cast<ExtractElementInst>(U);
1334 if (!UI || UI->getParent() != LI->getParent())
1341 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1348 LastCheckedInst = UI;
1352 if (ScalarIdx.isUnsafe())
1354 if (ScalarIdx.isSafeWithFreeze()) {
1356 ScalarIdx.discard();
1359 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1366 Align(1), LI->getPointerAddressSpace());
1370 if (ScalarizedCost >= OriginalCost)
1375 auto *EI = cast<ExtractElementInst>(U);
1379 auto It = NeedFreeze.
find(EI);
1380 if (It != NeedFreeze.
end())
1381 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1386 auto *NewLoad = cast<LoadInst>(Builder.
CreateLoad(
1387 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1390 LI->getAlign(), VecTy->getElementType(),
Idx, *
DL);
1391 NewLoad->setAlignment(ScalarOpAlignment);
1393 replaceValue(*EI, *NewLoad);
1396 FailureGuard.release();
1401bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1418 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1419 auto *BinOpTy = dyn_cast<FixedVectorType>(B0->
getType());
1420 if (!ShuffleDstTy || !BinOpTy)
1423 unsigned NumSrcElts = BinOpTy->getNumElements();
1428 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W &&
1432 auto ConvertToUnary = [NumSrcElts](
int &
M) {
1433 if (M >= (
int)NumSrcElts)
1460 OldMask,
CostKind, 0,
nullptr, {B0, B1}, &
I);
1468 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1470 if (NewCost >= OldCost)
1478 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1479 NewInst->copyIRFlags(B0);
1480 NewInst->andIRFlags(B1);
1485 replaceValue(
I, *NewBO);
1491bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
1498 auto *C0 = dyn_cast<CastInst>(V0);
1499 auto *C1 = dyn_cast<CastInst>(V1);
1504 if (C0->getSrcTy() != C1->getSrcTy())
1508 if (Opcode != C1->getOpcode()) {
1510 Opcode = Instruction::SExt;
1515 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1516 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
1517 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
1518 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
1521 unsigned NumSrcElts = CastSrcTy->getNumElements();
1522 unsigned NumDstElts = CastDstTy->getNumElements();
1523 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
1524 "Only bitcasts expected to alter src/dst element counts");
1528 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
1529 (NumDstElts % NumSrcElts) != 0)
1533 if (NumSrcElts >= NumDstElts) {
1536 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
1537 unsigned ScaleFactor = NumSrcElts / NumDstElts;
1542 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
1543 unsigned ScaleFactor = NumDstElts / NumSrcElts;
1548 auto *NewShuffleDstTy =
1561 OldMask,
CostKind, 0,
nullptr, std::nullopt, &
I);
1569 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1571 if (NewCost > OldCost)
1579 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
1580 NewInst->copyIRFlags(C0);
1581 NewInst->andIRFlags(C1);
1585 replaceValue(
I, *Cast);
1591bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
1602 auto *ShufI0 = dyn_cast<Instruction>(
I.getOperand(0));
1603 auto *ShufI1 = dyn_cast<Instruction>(
I.getOperand(1));
1604 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
1605 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(V0->
getType());
1606 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(
I.getOperand(0)->getType());
1607 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
1611 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
1612 unsigned NumImmElts = ShuffleImmTy->getNumElements();
1615 if ((!isa<PoisonValue>(U0) &&
1616 any_of(InnerMask0, [&](
int M) {
return M >= (int)NumSrcElts; })) ||
1617 (!isa<PoisonValue>(U1) &&
1618 any_of(InnerMask1, [&](
int M) {
return M >= (int)NumSrcElts; })))
1623 for (
int &M : NewMask) {
1624 if (0 <= M && M < (
int)NumImmElts) {
1626 }
else if (M >= (
int)NumImmElts) {
1627 if (InnerMask1[M - NumImmElts] >= (
int)NumSrcElts)
1630 M = InnerMask1[
M - NumImmElts] + (V0 == V1 ? 0 : NumSrcElts);
1636 replaceValue(
I, *V0);
1645 InnerMask0,
CostKind, 0,
nullptr, {V0, U0}, ShufI0) +
1647 InnerMask1,
CostKind, 0,
nullptr, {V1, U1}, ShufI1) +
1649 OuterMask,
CostKind, 0,
nullptr, {ShufI0, ShufI1}, &
I);
1653 NewMask,
CostKind, 0,
nullptr, {V0, V1});
1656 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1658 if (NewCost > OldCost)
1662 if (
none_of(NewMask, [&](
int M) {
return 0 <=
M &&
M < (int)NumSrcElts; }))
1664 if (
none_of(NewMask, [&](
int M) {
return (
int)NumSrcElts <= M; }))
1668 replaceValue(
I, *Shuf);
1675 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
1677 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1678 int M = SV->getMaskValue(Lane);
1681 if (
static_cast<unsigned>(M) < NumElts) {
1682 U = &SV->getOperandUse(0);
1685 U = &SV->getOperandUse(1);
1696 auto [U, Lane] = IL;
1709 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
1710 unsigned NumElts = Ty->getNumElements();
1711 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
1717 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
1722 unsigned NumSlices = Item.
size() / NumElts;
1727 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
1728 Use *SliceV = Item[Slice * NumElts].first;
1729 if (!SliceV || SliceV->get()->
getType() != Ty)
1731 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
1732 auto [V, Lane] = Item[Slice * NumElts + Elt];
1733 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
1745 auto [FrontU, FrontLane] = Item.
front();
1747 if (IdentityLeafs.
contains(FrontU)) {
1748 return FrontU->get();
1754 if (ConcatLeafs.
contains(FrontU)) {
1756 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
1758 for (
unsigned S = 0; S < Values.
size(); ++S)
1759 Values[S] = Item[S * NumElts].first->get();
1761 while (Values.
size() > 1) {
1764 std::iota(Mask.begin(), Mask.end(), 0);
1766 for (
unsigned S = 0; S < NewValues.
size(); ++S)
1774 auto *
I = cast<Instruction>(FrontU->get());
1775 auto *
II = dyn_cast<IntrinsicInst>(
I);
1776 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
1778 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
1785 IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
1789 for (
const auto &Lane : Item)
1795 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
1801 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
1802 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
1806 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
1811 if (
auto *CI = dyn_cast<CastInst>(
I)) {
1822 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
1832bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
1833 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
1834 if (!Ty ||
I.use_empty())
1838 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
1844 unsigned NumVisited = 0;
1846 while (!Worklist.
empty()) {
1851 auto [FrontU, FrontLane] = Item.
front();
1858 if (FrontLane == 0 &&
1859 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
1860 Ty->getNumElements() &&
1863 return !E.value().first || (E.value().first->get() == FrontV &&
1864 E.value().second == (
int)E.index());
1866 IdentityLeafs.
insert(FrontU);
1870 if (
auto *
C = dyn_cast<Constant>(FrontU);
1871 C &&
C->getSplatValue() &&
1875 return !U ||
U->get() == FrontV;
1877 SplatLeafs.
insert(FrontU);
1882 auto [FrontU, FrontLane] = Item.
front();
1883 auto [
U, Lane] = IL;
1884 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
1886 SplatLeafs.
insert(FrontU);
1896 Value *V = IL.first->get();
1897 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUse())
1901 if (
auto *CI = dyn_cast<CmpInst>(V))
1902 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
1904 if (
auto *SI = dyn_cast<SelectInst>(V))
1905 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
1906 SI->getOperand(0)->getType() !=
1907 cast<SelectInst>(FrontV)->getOperand(0)->getType())
1909 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
1911 auto *
II = dyn_cast<IntrinsicInst>(V);
1912 return !
II || (isa<IntrinsicInst>(FrontV) &&
1913 II->getIntrinsicID() ==
1914 cast<IntrinsicInst>(FrontV)->getIntrinsicID());
1917 if (isa<BinaryOperator, CmpInst>(FrontU)) {
1919 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
1920 BO && BO->isIntDivRem())
1925 }
else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
1928 }
else if (isa<SelectInst>(FrontU)) {
1933 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
1935 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
1940 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
1941 cast<Instruction>(FrontV)->getOperand(
Op));
1953 ConcatLeafs.
insert(FrontU);
1960 if (NumVisited <= 1)
1967 ConcatLeafs, Builder);
1968 replaceValue(
I, *V);
1975bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1976 auto *
II = dyn_cast<IntrinsicInst>(&
I);
1979 switch (
II->getIntrinsicID()) {
1980 case Intrinsic::vector_reduce_add:
1981 case Intrinsic::vector_reduce_mul:
1982 case Intrinsic::vector_reduce_and:
1983 case Intrinsic::vector_reduce_or:
1984 case Intrinsic::vector_reduce_xor:
1985 case Intrinsic::vector_reduce_smin:
1986 case Intrinsic::vector_reduce_smax:
1987 case Intrinsic::vector_reduce_umin:
1988 case Intrinsic::vector_reduce_umax:
1997 std::queue<Value *> Worklist;
2000 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
2003 while (!Worklist.empty()) {
2004 Value *CV = Worklist.front();
2015 if (
auto *CI = dyn_cast<Instruction>(CV)) {
2016 if (CI->isBinaryOp()) {
2017 for (
auto *
Op : CI->operand_values())
2020 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
2021 if (Shuffle && Shuffle != SV)
2038 for (
auto *V : Visited)
2039 for (
auto *U :
V->users())
2040 if (!Visited.contains(U) && U != &
I)
2044 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
2049 if (!ShuffleInputType)
2057 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
2061 bool IsTruncatingShuffle =
VecType->getNumElements() < NumInputElts;
2062 bool UsesSecondVec =
2063 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
2066 (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
2072 VecTyForCost, ConcatMask);
2074 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
2076 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2078 if (NewCost < OldCost) {
2082 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
2083 replaceValue(*Shuffle, *NewShuffle);
2088 return foldSelectShuffle(*Shuffle,
true);
2093bool VectorCombine::foldTruncFromReductions(
Instruction &
I) {
2094 auto *
II = dyn_cast<IntrinsicInst>(&
I);
2100 case Intrinsic::vector_reduce_add:
2101 case Intrinsic::vector_reduce_mul:
2102 case Intrinsic::vector_reduce_and:
2103 case Intrinsic::vector_reduce_or:
2104 case Intrinsic::vector_reduce_xor:
2111 Value *ReductionSrc =
I.getOperand(0);
2117 auto *TruncSrcTy = cast<VectorType>(TruncSrc->
getType());
2118 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
2119 Type *ResultTy =
I.getType();
2123 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
2124 if (
auto *Trunc = dyn_cast<CastInst>(ReductionSrc))
2132 ReductionSrcTy->getScalarType(),
2135 if (OldCost <= NewCost || !NewCost.
isValid())
2139 TruncSrcTy->getScalarType(),
II->getIntrinsicID(), {TruncSrc});
2141 replaceValue(
I, *NewTruncation);
2155bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
2156 auto *SVI = cast<ShuffleVectorInst>(&
I);
2157 auto *VT = cast<FixedVectorType>(
I.getType());
2158 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
2159 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
2160 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
2164 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
2165 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
2166 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
2167 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
2170 if (!
I ||
I->getOperand(0)->getType() != VT)
2173 return U != Op0 && U != Op1 &&
2174 !(isa<ShuffleVectorInst>(U) &&
2175 (InputShuffles.contains(cast<Instruction>(U)) ||
2176 isInstructionTriviallyDead(cast<Instruction>(U))));
2179 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
2180 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
2188 for (
auto *U :
I->users()) {
2189 auto *SV = dyn_cast<ShuffleVectorInst>(U);
2190 if (!SV || SV->getType() != VT)
2192 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
2193 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
2200 if (!collectShuffles(Op0) || !collectShuffles(Op1))
2204 if (FromReduction && Shuffles.
size() > 1)
2209 if (!FromReduction) {
2211 for (
auto *U : SV->users()) {
2214 Shuffles.push_back(SSV);
2226 int MaxV1Elt = 0, MaxV2Elt = 0;
2227 unsigned NumElts = VT->getNumElements();
2230 SVN->getShuffleMask(Mask);
2234 Value *SVOp0 = SVN->getOperand(0);
2235 Value *SVOp1 = SVN->getOperand(1);
2236 if (isa<UndefValue>(SVOp1)) {
2237 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
2240 for (
unsigned I = 0, E =
Mask.size();
I != E;
I++) {
2246 if (SVOp0 == Op1 && SVOp1 == Op0) {
2250 if (SVOp0 != Op0 || SVOp1 != Op1)
2257 for (
unsigned I = 0;
I <
Mask.size();
I++) {
2260 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
2261 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
2262 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
2263 return Mask[
I] ==
A.first;
2272 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
2273 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
2274 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
2277 ReconstructMask.
push_back(NumElts + It -
V2.begin());
2280 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
2288 sort(ReconstructMask);
2289 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
2296 if (V1.
empty() ||
V2.empty() ||
2297 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
2298 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
2305 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2308 if (isa<UndefValue>(SV->getOperand(1)))
2309 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2310 if (InputShuffles.contains(SSV))
2312 return SV->getMaskValue(M);
2320 std::pair<int, int>
Y) {
2321 int MXA = GetBaseMaskValue(
A,
X.first);
2322 int MYA = GetBaseMaskValue(
A,
Y.first);
2325 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2326 return SortBase(SVI0A,
A,
B);
2328 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
2329 return SortBase(SVI1A,
A,
B);
2334 for (
const auto &Mask : OrigReconstructMasks) {
2336 for (
int M : Mask) {
2338 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
2339 assert(It !=
V.end() &&
"Expected all entries in Mask");
2340 return std::distance(
V.begin(), It);
2344 else if (M <
static_cast<int>(NumElts)) {
2345 ReconstructMask.
push_back(FindIndex(V1, M));
2347 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
2350 ReconstructMasks.push_back(std::move(ReconstructMask));
2356 for (
unsigned I = 0;
I < V1.
size();
I++) {
2357 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
2358 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
2360 for (
unsigned I = 0;
I <
V2.size();
I++) {
2361 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
2362 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
2364 while (V1A.
size() < NumElts) {
2368 while (V2A.
size() < NumElts) {
2374 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2380 VT, SV->getShuffleMask());
2391 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
2393 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
2405 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
2407 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
2409 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
2412 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
2414 <<
" vs CostAfter: " << CostAfter <<
"\n");
2415 if (CostBefore <= CostAfter)
2420 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
2423 if (isa<UndefValue>(SV->getOperand(1)))
2424 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
2425 if (InputShuffles.contains(SSV))
2427 return SV->getOperand(
Op);
2431 GetShuffleOperand(SVI0A, 1), V1A);
2434 GetShuffleOperand(SVI0B, 1), V1B);
2437 GetShuffleOperand(SVI1A, 1), V2A);
2440 GetShuffleOperand(SVI1B, 1), V2B);
2444 if (
auto *
I = dyn_cast<Instruction>(NOp0))
2445 I->copyIRFlags(Op0,
true);
2449 if (
auto *
I = dyn_cast<Instruction>(NOp1))
2450 I->copyIRFlags(Op1,
true);
2452 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
2455 replaceValue(*Shuffles[S], *NSV);
2458 Worklist.pushValue(NSV0A);
2459 Worklist.pushValue(NSV0B);
2460 Worklist.pushValue(NSV1A);
2461 Worklist.pushValue(NSV1B);
2462 for (
auto *S : Shuffles)
2469bool VectorCombine::run() {
2477 bool MadeChange =
false;
2480 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
2481 auto Opcode =
I.getOpcode();
2487 if (IsFixedVectorType) {
2489 case Instruction::InsertElement:
2490 MadeChange |= vectorizeLoadInsert(
I);
2492 case Instruction::ShuffleVector:
2493 MadeChange |= widenSubvectorLoad(
I);
2502 if (isa<VectorType>(
I.getType())) {
2503 MadeChange |= scalarizeBinopOrCmp(
I);
2504 MadeChange |= scalarizeLoadExtract(
I);
2505 MadeChange |= scalarizeVPIntrinsic(
I);
2508 if (Opcode == Instruction::Store)
2509 MadeChange |= foldSingleElementStore(
I);
2512 if (TryEarlyFoldsOnly)
2519 if (IsFixedVectorType) {
2521 case Instruction::InsertElement:
2522 MadeChange |= foldInsExtFNeg(
I);
2524 case Instruction::ShuffleVector:
2525 MadeChange |= foldShuffleOfBinops(
I);
2526 MadeChange |= foldShuffleOfCastops(
I);
2527 MadeChange |= foldShuffleOfShuffles(
I);
2528 MadeChange |= foldSelectShuffle(
I);
2529 MadeChange |= foldShuffleToIdentity(
I);
2531 case Instruction::BitCast:
2532 MadeChange |= foldBitcastShuffle(
I);
2537 case Instruction::Call:
2538 MadeChange |= foldShuffleFromReductions(
I);
2539 MadeChange |= foldTruncFromReductions(
I);
2541 case Instruction::ICmp:
2542 case Instruction::FCmp:
2543 MadeChange |= foldExtractExtract(
I);
2547 MadeChange |= foldExtractExtract(
I);
2548 MadeChange |= foldExtractedCmps(
I);
2561 if (
I.isDebugOrPseudoInst())
2567 while (!Worklist.isEmpty()) {
2590 VectorCombine
Combiner(
F,
TTI, DT, AA, AC,
DL, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilder<> &Builder)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateFNegFMF(Value *V, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
'undef' values are things that do not have specified contents.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< UndefValue > m_UndefValue()
Match an arbitrary UndefValue constant.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.