35#define DEBUG_TYPE "vector-combine"
41STATISTIC(NumVecLoad,
"Number of vector loads formed");
42STATISTIC(NumVecCmp,
"Number of vector compares formed");
43STATISTIC(NumVecBO,
"Number of vector binops formed");
44STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
45STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
46STATISTIC(NumScalarBO,
"Number of scalar binops formed");
47STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
51 cl::desc(
"Disable all vector combine transforms"));
55 cl::desc(
"Disable binop extract to shuffle transforms"));
59 cl::desc(
"Max number of instructions to scan for vector combining."));
61static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
68 bool TryEarlyFoldsOnly)
70 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
84 bool TryEarlyFoldsOnly;
95 unsigned PreferredExtractIndex)
const;
99 unsigned PreferredExtractIndex);
113 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
117 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
126 for (
Value *Op :
I.operands())
138 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
139 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
145 Type *ScalarTy = Load->getType()->getScalarType();
148 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
155bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
169 auto *
Load = dyn_cast<LoadInst>(
X);
182 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
183 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
185 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
186 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
187 unsigned OffsetEltIndex = 0;
195 unsigned OffsetBitWidth =
DL.getIndexTypeSizeInBits(SrcPtr->
getType());
206 uint64_t ScalarSizeInBytes = ScalarSize / 8;
207 if (
Offset.urem(ScalarSizeInBytes) != 0)
211 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
212 if (OffsetEltIndex >= MinVecNumElts)
229 unsigned AS =
Load->getPointerAddressSpace();
248 auto *Ty = cast<FixedVectorType>(
I.getType());
249 unsigned OutputNumElts = Ty->getNumElements();
251 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
252 Mask[0] = OffsetEltIndex;
258 if (OldCost < NewCost || !NewCost.
isValid())
264 Value *CastedPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
265 SrcPtr, MinVecTy->getPointerTo(AS));
266 Value *VecLd =
Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
267 VecLd =
Builder.CreateShuffleVector(VecLd, Mask);
269 replaceValue(
I, *VecLd);
279 auto *Shuf = cast<ShuffleVectorInst>(&
I);
280 if (!Shuf->isIdentityWithPadding())
285 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
286 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
287 return M >= (int)(NumOpElts);
290 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
297 auto *Ty = cast<FixedVectorType>(
I.getType());
299 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
300 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
307 unsigned AS =
Load->getPointerAddressSpace();
322 if (OldCost < NewCost || !NewCost.
isValid())
327 Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Ty->getPointerTo(AS));
328 Value *VecLd =
Builder.CreateAlignedLoad(Ty, CastedPtr, Alignment);
329 replaceValue(
I, *VecLd);
341 assert(Index0C && Index1C &&
"Expected constant extract indexes");
343 unsigned Index0 = Index0C->getZExtValue();
344 unsigned Index1 = Index1C->getZExtValue();
347 if (Index0 == Index1)
372 if (PreferredExtractIndex == Index0)
374 if (PreferredExtractIndex == Index1)
378 return Index0 > Index1 ? Ext0 : Ext1;
390 unsigned PreferredExtractIndex) {
391 auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->
getOperand(1));
392 auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->
getOperand(1));
393 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
395 unsigned Opcode =
I.getOpcode();
406 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
407 "Expected a compare");
417 unsigned Ext0Index = Ext0IndexC->getZExtValue();
418 unsigned Ext1Index = Ext1IndexC->getZExtValue();
433 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
443 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
445 OldCost = CheapExtractCost + ScalarOpCost;
446 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
450 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
451 NewCost = VectorOpCost + CheapExtractCost +
456 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
457 if (ConvertToShuffle) {
475 return OldCost < NewCost;
485 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
487 ShufMask[NewIndex] = OldIndex;
488 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
506 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
507 if (isa<Constant>(
X))
512 return cast<ExtractElementInst>(
Builder.CreateExtractElement(Shuf, NewIndex));
520 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
523 "Expected matching constant extract indexes");
531 replaceValue(
I, *NewExt);
539 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
542 "Expected matching constant extract indexes");
552 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
553 VecBOInst->copyIRFlags(&
I);
556 replaceValue(
I, *NewExt);
584 auto *Ext0 = cast<ExtractElementInst>(I0);
585 auto *Ext1 = cast<ExtractElementInst>(I1);
592 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
595 if (ExtractToChange) {
596 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
601 if (ExtractToChange == Ext0)
608 foldExtExtCmp(Ext0, Ext1,
I);
610 foldExtExtBinop(Ext0, Ext1,
I);
637 auto *VecTy = cast<FixedVectorType>(
I.getType());
638 if (SrcVec->
getType() != VecTy)
642 unsigned NumElts = VecTy->getNumElements();
643 if (
Index >= NumElts)
650 std::iota(
Mask.begin(),
Mask.end(), 0);
669 if (NewCost > OldCost)
675 Value *Shuf =
Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
676 replaceValue(
I, *Shuf);
695 auto *SrcTy = dyn_cast<FixedVectorType>(
V->getType());
696 if (!SrcTy ||
I.getOperand(0)->getType() != SrcTy)
699 auto *DestTy = cast<FixedVectorType>(
I.getType());
700 unsigned DestNumElts = DestTy->getNumElements();
701 unsigned SrcNumElts = SrcTy->getNumElements();
703 if (SrcNumElts <= DestNumElts) {
706 assert(DestNumElts % SrcNumElts == 0 &&
"Unexpected shuffle mask");
707 unsigned ScaleFactor = DestNumElts / SrcNumElts;
712 assert(SrcNumElts % DestNumElts == 0 &&
"Unexpected shuffle mask");
713 unsigned ScaleFactor = SrcNumElts / DestNumElts;
724 if (DestCost > SrcCost || !DestCost.
isValid())
730 Value *Shuf =
Builder.CreateShuffleVector(CastV, NewMask);
731 replaceValue(
I, *Shuf);
737bool VectorCombine::scalarizeBinopOrCmp(
Instruction &
I) {
748 bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
750 for (
User *U :
I.users())
760 Constant *VecC0 =
nullptr, *VecC1 =
nullptr;
761 Value *V0 =
nullptr, *V1 =
nullptr;
774 if (IsConst0 && IsConst1)
776 if (!IsConst0 && !IsConst1 && Index0 != Index1)
781 auto *I0 = dyn_cast_or_null<Instruction>(V0);
782 auto *
I1 = dyn_cast_or_null<Instruction>(V1);
783 if ((IsConst0 && I1 &&
I1->mayReadFromMemory()) ||
789 Type *VecTy =
I.getType();
794 "Unexpected types for insert element into binop or cmp");
796 unsigned Opcode =
I.getOpcode();
815 (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
817 (IsConst0 ? 0 : !Ins0->
hasOneUse() * InsertCost) +
818 (IsConst1 ? 0 : !Ins1->
hasOneUse() * InsertCost);
821 if (OldCost < NewCost || !NewCost.
isValid())
838 IsCmp ?
Builder.CreateCmp(Pred, V0, V1)
841 Scalar->setName(
I.getName() +
".scalar");
845 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
846 ScalarInst->copyIRFlags(&
I);
850 IsCmp ?
Builder.CreateCmp(Pred, VecC0, VecC1)
853 replaceValue(
I, *Insert);
863 if (!
I.isBinaryOp() || !
I.getType()->isIntegerTy(1))
869 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
887 auto *Ext0 = cast<ExtractElementInst>(I0);
888 auto *Ext1 = cast<ExtractElementInst>(I1);
898 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
915 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
916 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
921 ShufMask[CheapIndex] = ExpensiveIndex;
930 if (OldCost < NewCost || !NewCost.
isValid())
943 Value *NewExt =
Builder.CreateExtractElement(VecLogic, CheapIndex);
944 replaceValue(
I, *NewExt);
953 unsigned NumScanned = 0;
954 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
963class ScalarizationResult {
964 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
969 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
973 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
974 ~ScalarizationResult() {
975 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
978 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
979 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
980 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
981 return {StatusTy::SafeWithFreeze, ToFreeze};
985 bool isSafe()
const {
return Status == StatusTy::Safe; }
987 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
990 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
995 Status = StatusTy::Unsafe;
1000 assert(isSafeWithFreeze() &&
1001 "should only be used when freezing is required");
1003 "UserI must be a user of ToFreeze");
1005 Builder.SetInsertPoint(cast<Instruction>(&UserI));
1009 if (
U.get() == ToFreeze)
1023 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1025 return ScalarizationResult::safe();
1026 return ScalarizationResult::unsafe();
1029 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1030 APInt Zero(IntWidth, 0);
1037 true, &AC, CtxI, &DT)))
1038 return ScalarizationResult::safe();
1039 return ScalarizationResult::unsafe();
1052 if (ValidIndices.
contains(IdxRange))
1053 return ScalarizationResult::safeWithFreeze(IdxBase);
1054 return ScalarizationResult::unsafe();
1064 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1066 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1078bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1079 auto *
SI = cast<StoreInst>(&
I);
1080 if (!
SI->isSimple() ||
1081 !isa<FixedVectorType>(
SI->getValueOperand()->getType()))
1089 if (!
match(
SI->getValueOperand(),
1094 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1095 auto VecTy = cast<FixedVectorType>(
SI->getValueOperand()->getType());
1097 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1100 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1101 !
DL.typeSizeEqualsStoreSize(
Load->getType()) ||
1102 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1106 if (ScalarizableIdx.isUnsafe() ||
1111 if (ScalarizableIdx.isSafeWithFreeze())
1112 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1114 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1115 {ConstantInt::get(Idx->getType(), 0), Idx});
1122 replaceValue(
I, *NSI);
1131bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1136 auto *FixedVT = cast<FixedVectorType>(
I.getType());
1137 auto *LI = cast<LoadInst>(&
I);
1139 if (LI->isVolatile() || !
DL.typeSizeEqualsStoreSize(FixedVT))
1144 LI->getPointerAddressSpace());
1148 unsigned NumInstChecked = 0;
1153 auto *UI = dyn_cast<ExtractElementInst>(U);
1154 if (!UI || UI->getParent() != LI->getParent())
1164 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1171 LastCheckedInst = UI;
1175 if (!ScalarIdx.isSafe()) {
1177 ScalarIdx.discard();
1181 auto *
Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1188 Align(1), LI->getPointerAddressSpace());
1192 if (ScalarizedCost >= OriginalCost)
1197 auto *EI = cast<ExtractElementInst>(U);
1203 auto *NewLoad = cast<LoadInst>(
Builder.CreateLoad(
1204 FixedVT->getElementType(),
GEP, EI->getName() +
".scalar"));
1207 LI->getAlign(), FixedVT->getElementType(),
Idx,
DL);
1208 NewLoad->setAlignment(ScalarOpAlignment);
1210 replaceValue(*EI, *NewLoad);
1218bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
1219 auto *VecTy = cast<FixedVectorType>(
I.getType());
1235 if (ShufCost > BinopCost)
1241 if (BinaryOperator::isCommutative(Opcode) &&
X != Z &&
Y != W)
1244 Value *Shuf0, *Shuf1;
1247 Shuf0 =
Builder.CreateShuffleVector(
X, UnaryMask);
1248 Shuf1 =
Builder.CreateShuffleVector(
Y, W, Mask);
1249 }
else if (
Y == W) {
1251 Shuf0 =
Builder.CreateShuffleVector(
X, Z, Mask);
1252 Shuf1 =
Builder.CreateShuffleVector(
Y, UnaryMask);
1257 Value *NewBO =
Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1259 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1260 NewInst->copyIRFlags(B0);
1261 NewInst->andIRFlags(B1);
1263 replaceValue(
I, *NewBO);
1270bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
1271 auto *II = dyn_cast<IntrinsicInst>(&
I);
1274 switch (II->getIntrinsicID()) {
1275 case Intrinsic::vector_reduce_add:
1276 case Intrinsic::vector_reduce_mul:
1277 case Intrinsic::vector_reduce_and:
1278 case Intrinsic::vector_reduce_or:
1279 case Intrinsic::vector_reduce_xor:
1280 case Intrinsic::vector_reduce_smin:
1281 case Intrinsic::vector_reduce_smax:
1282 case Intrinsic::vector_reduce_umin:
1283 case Intrinsic::vector_reduce_umax:
1292 std::queue<Value *> Worklist;
1295 if (
auto *Op = dyn_cast<Instruction>(
I.getOperand(0)))
1298 while (!Worklist.empty()) {
1299 Value *CV = Worklist.front();
1310 if (
auto *CI = dyn_cast<Instruction>(CV)) {
1311 if (CI->isBinaryOp()) {
1312 for (
auto *Op : CI->operand_values())
1315 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1333 for (
auto *V : Visited)
1334 for (
auto *U :
V->users())
1335 if (!Visited.contains(U) && U != &
I)
1339 dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1343 dyn_cast<FixedVectorType>(
Shuffle->getOperand(0)->getType());
1344 if (!ShuffleInputType)
1351 Shuffle->getShuffleMask(ConcatMask);
1352 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
1353 bool UsesSecondVec =
1354 any_of(ConcatMask, [&](
int M) {
return M >= NumInputElts; });
1364 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1366 if (NewCost < OldCost) {
1370 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
1371 replaceValue(*
Shuffle, *NewShuffle);
1376 return foldSelectShuffle(*
Shuffle,
true);
1389bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
1390 auto *SVI = cast<ShuffleVectorInst>(&
I);
1391 auto *VT = cast<FixedVectorType>(
I.getType());
1392 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
1393 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
1394 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
1395 VT != Op0->getType())
1398 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
1399 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
1400 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
1401 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
1404 if (!
I ||
I->getOperand(0)->getType() != VT)
1407 return U != Op0 && U != Op1 &&
1408 !(isa<ShuffleVectorInst>(U) &&
1409 (InputShuffles.contains(cast<Instruction>(U)) ||
1410 isInstructionTriviallyDead(cast<Instruction>(U))));
1413 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
1414 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
1422 for (
auto *U :
I->users()) {
1423 auto *SV = dyn_cast<ShuffleVectorInst>(U);
1424 if (!SV || SV->getType() != VT)
1426 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
1427 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
1434 if (!collectShuffles(Op0) || !collectShuffles(Op1))
1438 if (FromReduction && Shuffles.
size() > 1)
1443 if (!FromReduction) {
1445 for (
auto *U : SV->users()) {
1448 Shuffles.push_back(SSV);
1460 int MaxV1Elt = 0, MaxV2Elt = 0;
1461 unsigned NumElts = VT->getNumElements();
1464 SVN->getShuffleMask(Mask);
1468 Value *SVOp0 = SVN->getOperand(0);
1469 Value *SVOp1 = SVN->getOperand(1);
1470 if (isa<UndefValue>(SVOp1)) {
1471 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
1474 for (
unsigned I = 0,
E =
Mask.size();
I !=
E;
I++) {
1480 if (SVOp0 == Op1 && SVOp1 == Op0) {
1484 if (SVOp0 != Op0 || SVOp1 != Op1)
1491 for (
unsigned I = 0;
I <
Mask.size();
I++) {
1494 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
1495 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
1496 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
1497 return Mask[
I] ==
A.first;
1506 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
1507 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
1508 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
1511 ReconstructMask.
push_back(NumElts + It -
V2.begin());
1514 V2.emplace_back(Mask[
I] - NumElts, NumElts +
V2.size());
1522 sort(ReconstructMask);
1523 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
1530 if (V1.
empty() ||
V2.empty() ||
1531 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
1532 MaxV2Elt ==
static_cast<int>(
V2.size()) - 1))
1539 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
1542 if (isa<UndefValue>(SV->getOperand(1)))
1543 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
1544 if (InputShuffles.contains(SSV))
1546 return SV->getMaskValue(M);
1554 std::pair<int, int>
Y) {
1555 int MXA = GetBaseMaskValue(
A,
X.first);
1556 int MYA = GetBaseMaskValue(
A,
Y.first);
1559 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
1560 return SortBase(SVI0A,
A,
B);
1562 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
1563 return SortBase(SVI1A,
A,
B);
1568 for (
auto Mask : OrigReconstructMasks) {
1570 for (
int M : Mask) {
1572 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
1573 assert(It !=
V.end() &&
"Expected all entries in Mask");
1574 return std::distance(
V.begin(), It);
1578 else if (M <
static_cast<int>(NumElts)) {
1579 ReconstructMask.
push_back(FindIndex(V1, M));
1581 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
1584 ReconstructMasks.push_back(std::move(ReconstructMask));
1590 for (
unsigned I = 0;
I < V1.
size();
I++) {
1591 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
1592 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
1594 for (
unsigned I = 0;
I <
V2.size();
I++) {
1595 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
1596 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
1598 while (V1A.
size() < NumElts) {
1602 while (V2A.
size() < NumElts) {
1608 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
1614 VT, SV->getShuffleMask());
1625 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
1627 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
1639 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
1641 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
1643 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
1646 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
1648 <<
" vs CostAfter: " << CostAfter <<
"\n");
1649 if (CostBefore <= CostAfter)
1654 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
1657 if (isa<UndefValue>(SV->getOperand(1)))
1658 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
1659 if (InputShuffles.contains(SSV))
1661 return SV->getOperand(Op);
1663 Builder.SetInsertPoint(SVI0A->getInsertionPointAfterDef());
1664 Value *NSV0A =
Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
1665 GetShuffleOperand(SVI0A, 1), V1A);
1666 Builder.SetInsertPoint(SVI0B->getInsertionPointAfterDef());
1667 Value *NSV0B =
Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
1668 GetShuffleOperand(SVI0B, 1), V1B);
1669 Builder.SetInsertPoint(SVI1A->getInsertionPointAfterDef());
1670 Value *NSV1A =
Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
1671 GetShuffleOperand(SVI1A, 1), V2A);
1672 Builder.SetInsertPoint(SVI1B->getInsertionPointAfterDef());
1673 Value *NSV1B =
Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
1674 GetShuffleOperand(SVI1B, 1), V2B);
1678 if (
auto *
I = dyn_cast<Instruction>(NOp0))
1679 I->copyIRFlags(Op0,
true);
1683 if (
auto *
I = dyn_cast<Instruction>(NOp1))
1684 I->copyIRFlags(Op1,
true);
1686 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
1687 Builder.SetInsertPoint(Shuffles[S]);
1688 Value *NSV =
Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
1689 replaceValue(*Shuffles[S], *NSV);
1692 Worklist.pushValue(NSV0A);
1693 Worklist.pushValue(NSV0B);
1694 Worklist.pushValue(NSV1A);
1695 Worklist.pushValue(NSV1B);
1696 for (
auto *S : Shuffles)
1703bool VectorCombine::run() {
1711 bool MadeChange =
false;
1714 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
1715 auto Opcode =
I.getOpcode();
1721 if (IsFixedVectorType) {
1723 case Instruction::InsertElement:
1724 MadeChange |= vectorizeLoadInsert(
I);
1726 case Instruction::ShuffleVector:
1727 MadeChange |= widenSubvectorLoad(
I);
1729 case Instruction::Load:
1730 MadeChange |= scalarizeLoadExtract(
I);
1739 if (isa<VectorType>(
I.getType()))
1740 MadeChange |= scalarizeBinopOrCmp(
I);
1742 if (Opcode == Instruction::Store)
1743 MadeChange |= foldSingleElementStore(
I);
1747 if (TryEarlyFoldsOnly)
1754 if (IsFixedVectorType) {
1756 case Instruction::InsertElement:
1757 MadeChange |= foldInsExtFNeg(
I);
1759 case Instruction::ShuffleVector:
1760 MadeChange |= foldShuffleOfBinops(
I);
1761 MadeChange |= foldSelectShuffle(
I);
1763 case Instruction::BitCast:
1764 MadeChange |= foldBitcastShuf(
I);
1769 case Instruction::Call:
1770 MadeChange |= foldShuffleFromReductions(
I);
1772 case Instruction::ICmp:
1773 case Instruction::FCmp:
1774 MadeChange |= foldExtractExtract(
I);
1778 MadeChange |= foldExtractExtract(
I);
1779 MadeChange |= foldExtractedCmps(
I);
1792 if (
I.isDebugOrPseudoInst())
1798 while (!Worklist.isEmpty()) {
1820 VectorCombine
Combiner(
F,
TTI, DT, AA, AC, TryEarlyFoldsOnly);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
constexpr int UndefMaskElem
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.