33#define DEBUG_TYPE "vector-combine"
39STATISTIC(NumVecLoad,
"Number of vector loads formed");
40STATISTIC(NumVecCmp,
"Number of vector compares formed");
41STATISTIC(NumVecBO,
"Number of vector binops formed");
42STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
43STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
44STATISTIC(NumScalarBO,
"Number of scalar binops formed");
45STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
49 cl::desc(
"Disable all vector combine transforms"));
53 cl::desc(
"Disable binop extract to shuffle transforms"));
57 cl::desc(
"Max number of instructions to scan for vector combining."));
59static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
66 bool TryEarlyFoldsOnly)
68 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
82 bool TryEarlyFoldsOnly;
93 unsigned PreferredExtractIndex)
const;
97 unsigned PreferredExtractIndex);
112 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
116 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
137 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
138 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
144 Type *ScalarTy = Load->getType()->getScalarType();
147 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
154bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
168 auto *
Load = dyn_cast<LoadInst>(
X);
181 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
182 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
184 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
185 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
186 unsigned OffsetEltIndex = 0;
194 unsigned OffsetBitWidth =
DL.getIndexTypeSizeInBits(SrcPtr->
getType());
205 uint64_t ScalarSizeInBytes = ScalarSize / 8;
206 if (
Offset.urem(ScalarSizeInBytes) != 0)
210 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
211 if (OffsetEltIndex >= MinVecNumElts)
228 unsigned AS =
Load->getPointerAddressSpace();
247 auto *Ty = cast<FixedVectorType>(
I.getType());
248 unsigned OutputNumElts = Ty->getNumElements();
250 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
251 Mask[0] = OffsetEltIndex;
257 if (OldCost < NewCost || !NewCost.
isValid())
263 Value *CastedPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
264 SrcPtr, MinVecTy->getPointerTo(AS));
265 Value *VecLd =
Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
266 VecLd =
Builder.CreateShuffleVector(VecLd, Mask);
268 replaceValue(
I, *VecLd);
278 auto *Shuf = cast<ShuffleVectorInst>(&
I);
279 if (!Shuf->isIdentityWithPadding())
284 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
285 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
286 return M >= (int)(NumOpElts);
289 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
296 auto *Ty = cast<FixedVectorType>(
I.getType());
298 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
299 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
306 unsigned AS =
Load->getPointerAddressSpace();
321 if (OldCost < NewCost || !NewCost.
isValid())
326 Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Ty->getPointerTo(AS));
327 Value *VecLd =
Builder.CreateAlignedLoad(Ty, CastedPtr, Alignment);
328 replaceValue(
I, *VecLd);
340 assert(Index0C && Index1C &&
"Expected constant extract indexes");
342 unsigned Index0 = Index0C->getZExtValue();
343 unsigned Index1 = Index1C->getZExtValue();
346 if (Index0 == Index1)
371 if (PreferredExtractIndex == Index0)
373 if (PreferredExtractIndex == Index1)
377 return Index0 > Index1 ? Ext0 : Ext1;
389 unsigned PreferredExtractIndex) {
390 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
391 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
392 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
394 unsigned Opcode =
I.getOpcode();
405 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
406 "Expected a compare");
416 unsigned Ext0Index = Ext0IndexC->getZExtValue();
417 unsigned Ext1Index = Ext1IndexC->getZExtValue();
432 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
442 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
444 OldCost = CheapExtractCost + ScalarOpCost;
445 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
449 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
450 NewCost = VectorOpCost + CheapExtractCost +
455 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
456 if (ConvertToShuffle) {
474 return OldCost < NewCost;
484 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
486 ShufMask[NewIndex] = OldIndex;
487 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
505 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
506 if (isa<Constant>(
X))
511 return cast<ExtractElementInst>(
Builder.CreateExtractElement(Shuf, NewIndex));
519 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
522 "Expected matching constant extract indexes");
530 replaceValue(
I, *NewExt);
538 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
541 "Expected matching constant extract indexes");
551 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
552 VecBOInst->copyIRFlags(&
I);
555 replaceValue(
I, *NewExt);
583 auto *Ext0 = cast<ExtractElementInst>(I0);
584 auto *Ext1 = cast<ExtractElementInst>(I1);
591 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
594 if (ExtractToChange) {
595 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
600 if (ExtractToChange == Ext0)
607 foldExtExtCmp(Ext0, Ext1,
I);
609 foldExtExtBinop(Ext0, Ext1,
I);
636 auto *VecTy = cast<FixedVectorType>(
I.getType());
637 if (SrcVec->
getType() != VecTy)
641 unsigned NumElts = VecTy->getNumElements();
642 if (
Index >= NumElts)
649 std::iota(
Mask.begin(),
Mask.end(), 0);
668 if (NewCost > OldCost)
674 Value *Shuf =
Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
675 replaceValue(
I, *Shuf);
694 auto *SrcTy = dyn_cast<FixedVectorType>(
V->getType());
695 if (!SrcTy ||
I.getOperand(0)->getType() != SrcTy)
698 auto *DestTy = cast<FixedVectorType>(
I.getType());
699 unsigned DestNumElts = DestTy->getNumElements();
700 unsigned SrcNumElts = SrcTy->getNumElements();
702 if (SrcNumElts <= DestNumElts) {
705 assert(DestNumElts % SrcNumElts == 0 &&
"Unexpected shuffle mask");
706 unsigned ScaleFactor = DestNumElts / SrcNumElts;
711 assert(SrcNumElts % DestNumElts == 0 &&
"Unexpected shuffle mask");
712 unsigned ScaleFactor = SrcNumElts / DestNumElts;
723 if (DestCost > SrcCost || !DestCost.
isValid())
729 Value *Shuf =
Builder.CreateShuffleVector(CastV, NewMask);
730 replaceValue(
I, *Shuf);
737bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
738 if (!isa<VPIntrinsic>(
I))
752 auto IsAllTrueMask = [](
Value *MaskVal) {
754 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
755 return ConstValue->isAllOnesValue();
777 Args.push_back(
V->getType());
783 std::optional<unsigned> FunctionalOpcode =
785 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
786 if (!FunctionalOpcode) {
810 <<
", Cost of scalarizing:" << NewCost <<
"\n");
813 if (OldCost < NewCost || !NewCost.
isValid())
820 bool MustHaveNonZeroVL =
821 IntrID == Intrinsic::vp_sdiv || IntrID == Intrinsic::vp_udiv ||
822 IntrID == Intrinsic::vp_srem || IntrID == Intrinsic::vp_urem;
829 ?
Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
830 {ScalarOp0, ScalarOp1})
832 ScalarOp0, ScalarOp1);
833 replaceValue(VPI, *
Builder.CreateVectorSplat(EC, ScalarVal));
841bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
852 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
854 for (
User *U :
I.users())
864 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
865 Value *V0 =
nullptr, *V1 =
nullptr;
878 if (IsConst0 && IsConst1)
880 if (!IsConst0 && !IsConst1 && Index0 != Index1)
885 auto *I0 = dyn_cast_or_null<Instruction>(V0);
886 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
887 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
893 Type *VecTy =
I.getType();
898 "Unexpected types for insert element into binop or cmp");
900 unsigned Opcode =
I.getOpcode();
919 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
921 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
922 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
925 if (OldCost < NewCost || !NewCost.
isValid())
942 IsCmp ?
Builder.CreateCmp(Pred, V0, V1)
945 Scalar->setName(
I.getName() +
".scalar");
949 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
950 ScalarInst->copyIRFlags(&
I);
954 IsCmp ?
Builder.CreateCmp(Pred, VecC0, VecC1)
957 replaceValue(
I, *Insert);
967 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
973 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
991 auto *Ext0 = cast<ExtractElementInst>(I0);
992 auto *Ext1 = cast<ExtractElementInst>(I1);
1001 : Instruction::ICmp;
1002 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1019 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1020 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1025 ShufMask[CheapIndex] = ExpensiveIndex;
1034 if (OldCost < NewCost || !NewCost.
isValid())
1047 Value *NewExt =
Builder.CreateExtractElement(VecLogic, CheapIndex);
1048 replaceValue(
I, *NewExt);
1057 unsigned NumScanned = 0;
1067class ScalarizationResult {
1068 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1073 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1077 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1078 ~ScalarizationResult() {
1079 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1082 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1083 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1084 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1085 return {StatusTy::SafeWithFreeze, ToFreeze};
1089 bool isSafe()
const {
return Status == StatusTy::Safe; }
1091 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1094 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1099 Status = StatusTy::Unsafe;
1104 assert(isSafeWithFreeze() &&
1105 "should only be used when freezing is required");
1107 "UserI must be a user of ToFreeze");
1109 Builder.SetInsertPoint(cast<Instruction>(&UserI));
1113 if (
U.get() == ToFreeze)
1130 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1132 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1133 if (
C->getValue().ult(NumElements))
1134 return ScalarizationResult::safe();
1135 return ScalarizationResult::unsafe();
1138 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1139 APInt Zero(IntWidth, 0);
1140 APInt MaxElts(IntWidth, NumElements);
1146 true, &AC, CtxI, &DT)))
1147 return ScalarizationResult::safe();
1148 return ScalarizationResult::unsafe();
1161 if (ValidIndices.
contains(IdxRange))
1162 return ScalarizationResult::safeWithFreeze(IdxBase);
1163 return ScalarizationResult::unsafe();
1173 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1175 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1187bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1188 auto *
SI = cast<StoreInst>(&
I);
1189 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1197 if (!
match(
SI->getValueOperand(),
1202 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1203 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1205 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1208 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1209 !
DL.typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1210 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1214 if (ScalarizableIdx.isUnsafe() ||
1219 if (ScalarizableIdx.isSafeWithFreeze())
1220 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1222 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1223 {ConstantInt::get(Idx->getType(), 0), Idx});
1230 replaceValue(
I, *NSI);
1239bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1244 auto *VecTy = cast<VectorType>(
I.getType());
1245 auto *LI = cast<LoadInst>(&
I);
1247 if (LI->isVolatile() || !
DL.typeSizeEqualsStoreSize(VecTy->
getScalarType()))
1252 LI->getPointerAddressSpace());
1256 unsigned NumInstChecked = 0;
1262 auto *UI = dyn_cast<ExtractElementInst>(U);
1263 if (!UI || UI->getParent() != LI->getParent())
1270 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1277 LastCheckedInst = UI;
1281 if (ScalarIdx.isUnsafe())
1283 if (ScalarIdx.isSafeWithFreeze()) {
1285 ScalarIdx.discard();
1288 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1295 Align(1), LI->getPointerAddressSpace());
1299 if (ScalarizedCost >= OriginalCost)
1304 auto *EI = cast<ExtractElementInst>(U);
1308 auto It = NeedFreeze.
find(EI);
1309 if (It != NeedFreeze.
end())
1310 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1315 auto *NewLoad = cast<LoadInst>(
Builder.CreateLoad(
1316 VecTy->getElementType(),
GEP, EI->getName() +
".scalar"));
1319 LI->getAlign(), VecTy->getElementType(),
Idx,
DL);
1320 NewLoad->setAlignment(ScalarOpAlignment);
1322 replaceValue(*EI, *NewLoad);
1330bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1331 auto *VecTy = cast<FixedVectorType>(
I.getType());
1347 if (ShufCost > BinopCost)
1353 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W)
1356 Value *Shuf0, *Shuf1;
1359 Shuf0 =
Builder.CreateShuffleVector(
X, UnaryMask);
1360 Shuf1 =
Builder.CreateShuffleVector(
Y, W, Mask);
1361 }
else if (
Y == W) {
1363 Shuf0 =
Builder.CreateShuffleVector(
X, Z, Mask);
1364 Shuf1 =
Builder.CreateShuffleVector(
Y, UnaryMask);
1369 Value *NewBO =
Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1371 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1372 NewInst->copyIRFlags(B0);
1373 NewInst->andIRFlags(B1);
1375 replaceValue(
I, *NewBO);
1382bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1383 auto *II = dyn_cast<IntrinsicInst>(&
I);
1386 switch (II->getIntrinsicID()) {
1387 case Intrinsic::vector_reduce_add:
1388 case Intrinsic::vector_reduce_mul:
1389 case Intrinsic::vector_reduce_and:
1390 case Intrinsic::vector_reduce_or:
1391 case Intrinsic::vector_reduce_xor:
1392 case Intrinsic::vector_reduce_smin:
1393 case Intrinsic::vector_reduce_smax:
1394 case Intrinsic::vector_reduce_umin:
1395 case Intrinsic::vector_reduce_umax:
1404 std::queue<Value *> Worklist;
1407 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
1410 while (!Worklist.empty()) {
1411 Value *CV = Worklist.front();
1422 if (
auto *CI = dyn_cast<Instruction>(CV)) {
1423 if (CI->isBinaryOp()) {
1424 for (
auto *
Op : CI->operand_values())
1427 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1428 if (Shuffle && Shuffle != SV)
1445 for (
auto *V : Visited)
1446 for (
auto *U :
V->users())
1447 if (!Visited.contains(U) && U != &
I)
1451 dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1456 if (!ShuffleInputType)
1464 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
1465 bool UsesSecondVec =
1466 any_of(ConcatMask, [&](
int M) {
return M >= NumInputElts; });
1474 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
1476 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1478 if (NewCost < OldCost) {
1479 Builder.SetInsertPoint(Shuffle);
1482 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
1483 replaceValue(*Shuffle, *NewShuffle);
1488 return foldSelectShuffle(*Shuffle,
true);
1501bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
1502 auto *SVI = cast<ShuffleVectorInst>(&
I);
1503 auto *VT = cast<FixedVectorType>(
I.getType());
1504 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
1505 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
1506 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
1510 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
1511 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
1512 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
1513 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
1516 if (!
I ||
I->getOperand(0)->getType() != VT)
1519 return U != Op0 && U != Op1 &&
1520 !(isa<ShuffleVectorInst>(U) &&
1521 (InputShuffles.contains(cast<Instruction>(U)) ||
1522 isInstructionTriviallyDead(cast<Instruction>(U))));
1525 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
1526 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
1534 for (
auto *U :
I->users()) {
1535 auto *SV = dyn_cast<ShuffleVectorInst>(U);
1536 if (!SV || SV->getType() != VT)
1538 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
1539 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
1546 if (!collectShuffles(Op0) || !collectShuffles(Op1))
1550 if (FromReduction && Shuffles.
size() > 1)
1555 if (!FromReduction) {
1557 for (
auto *U : SV->users()) {
1560 Shuffles.push_back(SSV);
1572 int MaxV1Elt = 0, MaxV2Elt = 0;
1573 unsigned NumElts = VT->getNumElements();
1576 SVN->getShuffleMask(Mask);
1580 Value *SVOp0 = SVN->getOperand(0);
1581 Value *SVOp1 = SVN->getOperand(1);
1582 if (isa<UndefValue>(SVOp1)) {
1583 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
1586 for (
unsigned I = 0,
E =
Mask.size();
I !=
E;
I++) {
1592 if (SVOp0 == Op1 && SVOp1 == Op0) {
1596 if (SVOp0 != Op0 || SVOp1 != Op1)
1603 for (
unsigned I = 0;
I <
Mask.size();
I++) {
1606 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
1607 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
1608 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
1609 return Mask[
I] ==
A.first;
1618 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
1619 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
1620 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
1623 ReconstructMask.
push_back(NumElts + It -
V2.begin());
1626 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
1634 sort(ReconstructMask);
1635 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
1642 if (V1.
empty() ||
V2.empty() ||
1643 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
1644 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
1651 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
1654 if (isa<UndefValue>(SV->getOperand(1)))
1655 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
1656 if (InputShuffles.contains(SSV))
1658 return SV->getMaskValue(M);
1666 std::pair<int, int>
Y) {
1667 int MXA = GetBaseMaskValue(
A,
X.first);
1668 int MYA = GetBaseMaskValue(
A,
Y.first);
1671 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
1672 return SortBase(SVI0A,
A,
B);
1674 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
1675 return SortBase(SVI1A,
A,
B);
1680 for (
const auto &Mask : OrigReconstructMasks) {
1682 for (
int M : Mask) {
1684 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
1685 assert(It !=
V.end() &&
"Expected all entries in Mask");
1686 return std::distance(
V.begin(), It);
1690 else if (M <
static_cast<int>(NumElts)) {
1691 ReconstructMask.
push_back(FindIndex(V1, M));
1693 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
1696 ReconstructMasks.push_back(std::move(ReconstructMask));
1702 for (
unsigned I = 0;
I < V1.
size();
I++) {
1703 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
1704 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
1706 for (
unsigned I = 0;
I <
V2.size();
I++) {
1707 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
1708 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
1710 while (V1A.
size() < NumElts) {
1714 while (V2A.
size() < NumElts) {
1720 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
1726 VT, SV->getShuffleMask());
1737 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
1739 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
1751 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
1753 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
1755 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
1758 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
1760 <<
" vs CostAfter: " << CostAfter <<
"\n");
1761 if (CostBefore <= CostAfter)
1766 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
1769 if (isa<UndefValue>(SV->getOperand(1)))
1770 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
1771 if (InputShuffles.contains(SSV))
1773 return SV->getOperand(
Op);
1775 Builder.SetInsertPoint(SVI0A->getInsertionPointAfterDef());
1776 Value *NSV0A =
Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
1777 GetShuffleOperand(SVI0A, 1), V1A);
1778 Builder.SetInsertPoint(SVI0B->getInsertionPointAfterDef());
1779 Value *NSV0B =
Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
1780 GetShuffleOperand(SVI0B, 1), V1B);
1781 Builder.SetInsertPoint(SVI1A->getInsertionPointAfterDef());
1782 Value *NSV1A =
Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
1783 GetShuffleOperand(SVI1A, 1), V2A);
1784 Builder.SetInsertPoint(SVI1B->getInsertionPointAfterDef());
1785 Value *NSV1B =
Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
1786 GetShuffleOperand(SVI1B, 1), V2B);
1790 if (
auto *
I = dyn_cast<Instruction>(NOp0))
1791 I->copyIRFlags(Op0,
true);
1795 if (
auto *
I = dyn_cast<Instruction>(NOp1))
1796 I->copyIRFlags(Op1,
true);
1798 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
1799 Builder.SetInsertPoint(Shuffles[S]);
1800 Value *NSV =
Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
1801 replaceValue(*Shuffles[S], *NSV);
1804 Worklist.pushValue(NSV0A);
1805 Worklist.pushValue(NSV0B);
1806 Worklist.pushValue(NSV1A);
1807 Worklist.pushValue(NSV1B);
1808 for (
auto *S : Shuffles)
1815bool VectorCombine::run() {
1823 bool MadeChange =
false;
1826 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
1827 auto Opcode =
I.getOpcode();
1833 if (IsFixedVectorType) {
1835 case Instruction::InsertElement:
1836 MadeChange |= vectorizeLoadInsert(
I);
1838 case Instruction::ShuffleVector:
1839 MadeChange |= widenSubvectorLoad(
I);
1848 if (isa<VectorType>(
I.getType())) {
1849 MadeChange |= scalarizeBinopOrCmp(
I);
1850 MadeChange |= scalarizeLoadExtract(
I);
1851 MadeChange |= scalarizeVPIntrinsic(
I);
1854 if (Opcode == Instruction::Store)
1855 MadeChange |= foldSingleElementStore(
I);
1858 if (TryEarlyFoldsOnly)
1865 if (IsFixedVectorType) {
1867 case Instruction::InsertElement:
1868 MadeChange |= foldInsExtFNeg(
I);
1870 case Instruction::ShuffleVector:
1871 MadeChange |= foldShuffleOfBinops(
I);
1872 MadeChange |= foldSelectShuffle(
I);
1874 case Instruction::BitCast:
1875 MadeChange |= foldBitcastShuf(
I);
1880 case Instruction::Call:
1881 MadeChange |= foldShuffleFromReductions(
I);
1883 case Instruction::ICmp:
1884 case Instruction::FCmp:
1885 MadeChange |= foldExtractExtract(
I);
1889 MadeChange |= foldExtractExtract(
I);
1890 MadeChange |= foldExtractedCmps(
I);
1903 if (
I.isDebugOrPseudoInst())
1909 while (!Worklist.isEmpty()) {
1931 VectorCombine
Combiner(
F,
TTI, DT, AA, AC, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to be non-zero when defined.
void stable_sort(R &&Range)
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.