31#define DEBUG_TYPE "vectorutils"
39 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
72 case Intrinsic::atan2:
80 case Intrinsic::exp10:
83 case Intrinsic::log10:
86 case Intrinsic::minnum:
87 case Intrinsic::maxnum:
88 case Intrinsic::minimum:
89 case Intrinsic::maximum:
90 case Intrinsic::copysign:
91 case Intrinsic::floor:
93 case Intrinsic::trunc:
95 case Intrinsic::nearbyint:
96 case Intrinsic::round:
97 case Intrinsic::roundeven:
100 case Intrinsic::fmuladd:
101 case Intrinsic::is_fpclass:
102 case Intrinsic::powi:
103 case Intrinsic::canonicalize:
104 case Intrinsic::fptosi_sat:
105 case Intrinsic::fptoui_sat:
106 case Intrinsic::lrint:
107 case Intrinsic::llrint:
108 case Intrinsic::ucmp:
109 case Intrinsic::scmp:
127 case Intrinsic::frexp:
135 unsigned ScalarOpdIdx,
143 case Intrinsic::vp_abs:
144 case Intrinsic::ctlz:
145 case Intrinsic::vp_ctlz:
146 case Intrinsic::cttz:
147 case Intrinsic::vp_cttz:
148 case Intrinsic::is_fpclass:
149 case Intrinsic::vp_is_fpclass:
150 case Intrinsic::powi:
151 return (ScalarOpdIdx == 1);
152 case Intrinsic::smul_fix:
153 case Intrinsic::smul_fix_sat:
154 case Intrinsic::umul_fix:
155 case Intrinsic::umul_fix_sat:
156 return (ScalarOpdIdx == 2);
170 return OpdIdx == -1 || OpdIdx == 0;
173 case Intrinsic::fptosi_sat:
174 case Intrinsic::fptoui_sat:
175 case Intrinsic::lrint:
176 case Intrinsic::llrint:
177 case Intrinsic::vp_lrint:
178 case Intrinsic::vp_llrint:
179 case Intrinsic::ucmp:
180 case Intrinsic::scmp:
181 return OpdIdx == -1 || OpdIdx == 0;
182 case Intrinsic::is_fpclass:
183 case Intrinsic::vp_is_fpclass:
185 case Intrinsic::powi:
186 return OpdIdx == -1 || OpdIdx == 1;
199 case Intrinsic::frexp:
200 return RetIdx == 0 || RetIdx == 1;
216 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
217 ID == Intrinsic::experimental_noalias_scope_decl ||
218 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
227 assert(V->getType()->isVectorTy() &&
"Not looking at a vector?");
228 VectorType *VTy = cast<VectorType>(V->getType());
230 if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
231 unsigned Width = FVTy->getNumElements();
237 return C->getAggregateElement(EltNo);
241 if (!isa<ConstantInt>(III->getOperand(2)))
243 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
248 return III->getOperand(1);
251 if (III == III->getOperand(0))
261 if (SVI && isa<FixedVectorType>(SVI->
getType())) {
267 if (InEl < (
int)LHSWidth)
276 if (
Constant *Elt =
C->getAggregateElement(EltNo))
277 if (Elt->isNullValue())
281 if (isa<ScalableVectorType>(VTy))
283 if (EltNo < VTy->getElementCount().getKnownMinValue())
298 if (SplatIndex != -1 && SplatIndex != M)
304 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
313 if (isa<VectorType>(V->getType()))
314 if (
auto *
C = dyn_cast<Constant>(V))
315 return C->getSplatValue();
330 if (isa<VectorType>(V->getType())) {
331 if (isa<UndefValue>(V))
335 if (
auto *
C = dyn_cast<Constant>(V))
336 return C->getSplatValue() !=
nullptr;
339 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
351 return Shuf->getMaskValue(Index) == Index;
374 const APInt &DemandedElts,
APInt &DemandedLHS,
375 APInt &DemandedRHS,
bool AllowUndefElts) {
379 if (DemandedElts.
isZero())
383 if (
all_of(Mask, [](
int Elt) {
return Elt == 0; })) {
388 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I) {
390 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
391 "Invalid shuffle mask constant");
393 if (!DemandedElts[
I] || (AllowUndefElts && (M < 0)))
404 DemandedRHS.
setBit(M - SrcWidth);
412 assert(Scale > 0 &&
"Unexpected scaling factor");
416 ScaledMask.
assign(Mask.begin(), Mask.end());
421 for (
int MaskElt : Mask) {
424 "Overflowed 32-bits");
426 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
427 ScaledMask.
push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
433 assert(Scale > 0 &&
"Unexpected scaling factor");
437 ScaledMask.
assign(Mask.begin(), Mask.end());
442 int NumElts = Mask.size();
443 if (NumElts % Scale != 0)
447 ScaledMask.
reserve(NumElts / Scale);
452 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
455 int SliceFront = MaskSlice.
front();
456 if (SliceFront < 0) {
464 if (SliceFront % Scale != 0)
467 for (
int i = 1; i < Scale; ++i)
468 if (MaskSlice[i] != SliceFront + i)
470 ScaledMask.
push_back(SliceFront / Scale);
472 Mask = Mask.drop_front(Scale);
473 }
while (!Mask.empty());
475 assert((
int)ScaledMask.
size() * Scale == NumElts &&
"Unexpected scaled mask");
484 unsigned NumSrcElts = Mask.size();
485 assert(NumSrcElts > 0 && NumDstElts > 0 &&
"Unexpected scaling factor");
488 if (NumSrcElts == NumDstElts) {
489 ScaledMask.
assign(Mask.begin(), Mask.end());
494 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
495 "Unexpected scaling factor");
497 if (NumSrcElts > NumDstElts) {
498 int Scale = NumSrcElts / NumDstElts;
502 int Scale = NumDstElts / NumSrcElts;
509 std::array<SmallVector<int, 16>, 2> TmpMasks;
512 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
522 ArrayRef<int> Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
523 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
531 int Sz = Mask.size();
532 unsigned SzDest = Sz / NumOfDestRegs;
533 unsigned SzSrc = Sz / NumOfSrcRegs;
534 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
535 auto &RegMasks = Res[
I];
536 RegMasks.
assign(2 * NumOfSrcRegs, {});
539 for (
unsigned K = 0; K < SzDest; ++K) {
540 int Idx =
I * SzDest + K;
545 int MaskIdx = Mask[
Idx] % Sz;
546 int SrcRegIdx = MaskIdx / SzSrc + (Mask[
Idx] >= Sz ? NumOfSrcRegs : 0);
549 if (RegMasks[SrcRegIdx].empty())
551 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
555 for (
unsigned I : seq<unsigned>(NumOfUsedRegs)) {
559 switch (NumSrcRegs) {
568 unsigned SrcReg = std::distance(Dest.begin(), It);
569 SingleInputAction(*It, SrcReg,
I);
584 "Expected undefined mask element.");
585 FirstMask[
Idx] = SecondMask[
Idx] + VF;
590 for (
int Idx = 0, VF = Mask.size();
Idx < VF; ++
Idx) {
600 for (
unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
605 if (FirstIdx == SecondIdx) {
611 SecondMask = RegMask;
612 CombineMasks(FirstMask, SecondMask);
613 ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
614 NormalizeMask(FirstMask);
616 SecondMask = FirstMask;
617 SecondIdx = FirstIdx;
619 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
620 CombineMasks(SecondMask, FirstMask);
621 ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
622 Dest[FirstIdx].clear();
623 NormalizeMask(SecondMask);
625 }
while (SecondIdx >= 0);
633 const APInt &DemandedElts,
635 APInt &DemandedRHS) {
636 assert(VectorBitWidth >= 128 &&
"Vectors smaller than 128 bit not supported");
637 int NumLanes = VectorBitWidth / 128;
639 int NumEltsPerLane = NumElts / NumLanes;
640 int HalfEltsPerLane = NumEltsPerLane / 2;
646 for (
int Idx = 0;
Idx != NumElts; ++
Idx) {
647 if (!DemandedElts[
Idx])
649 int LaneIdx = (
Idx / NumEltsPerLane) * NumEltsPerLane;
650 int LocalIdx =
Idx % NumEltsPerLane;
651 if (LocalIdx < HalfEltsPerLane) {
652 DemandedLHS.
setBit(LaneIdx + 2 * LocalIdx);
654 LocalIdx -= HalfEltsPerLane;
655 DemandedRHS.
setBit(LaneIdx + 2 * LocalIdx);
676 bool SeenExtFromIllegalType =
false;
678 for (
auto &
I : *BB) {
681 if (
TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
683 SeenExtFromIllegalType =
true;
686 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
687 !
I.getType()->isVectorTy() &&
688 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
699 if (Worklist.
empty() || (
TTI && !SeenExtFromIllegalType))
703 while (!Worklist.
empty()) {
707 if (!Visited.
insert(Val).second)
711 if (!isa<Instruction>(Val))
717 if (DB.getDemandedBits(
I).getBitWidth() > 64)
720 uint64_t V = DB.getDemandedBits(
I).getZExtValue();
726 if (isa<SExtInst>(
I) || isa<ZExtInst>(
I) || isa<LoadInst>(
I) ||
733 if (isa<BitCastInst>(
I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(
I) ||
734 !
I->getType()->isIntegerTy()) {
735 DBits[Leader] |= ~0ULL;
745 if (DBits[Leader] == ~0ULL)
749 for (
Value *O : cast<User>(
I)->operands()) {
758 for (
auto &
I : DBits)
759 for (
auto *U :
I.first->users())
760 if (U->getType()->isIntegerTy() && DBits.
count(U) == 0)
763 for (
auto I = ECs.
begin(), E = ECs.
end();
I != E; ++
I) {
766 LeaderDemandedBits |= DBits[M];
786 auto *
MI = dyn_cast<Instruction>(M);
789 Type *Ty = M->getType();
791 Ty =
MI->getOperand(0)->getType();
799 auto *CI = dyn_cast<ConstantInt>(U);
803 isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&
804 U.getOperandNo() == 1)
805 return CI->uge(MinBW);
806 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
807 return bit_ceil(BW) > MinBW;
819template <
typename ListT>
824 List.insert(AccGroups);
828 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
829 auto *Item = cast<MDNode>(AccGroupListOp.get());
840 if (AccGroups1 == AccGroups2)
847 if (Union.size() == 0)
849 if (Union.size() == 1)
850 return cast<MDNode>(Union.front());
861 if (!MayAccessMem1 && !MayAccessMem2)
864 return Inst2->
getMetadata(LLVMContext::MD_access_group);
866 return Inst1->
getMetadata(LLVMContext::MD_access_group);
882 if (AccGroupSet2.
count(MD1))
886 auto *Item = cast<MDNode>(Node.get());
888 if (AccGroupSet2.
count(Item))
893 if (Intersection.
size() == 0)
895 if (Intersection.
size() == 1)
896 return cast<MDNode>(Intersection.
front());
910 for (
auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
911 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
912 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
913 LLVMContext::MD_access_group, LLVMContext::MD_mmra}) {
915 for (
int J = 1, E = VL.
size(); MD && J != E; ++J) {
920 case LLVMContext::MD_mmra: {
924 case LLVMContext::MD_tbaa:
927 case LLVMContext::MD_alias_scope:
930 case LLVMContext::MD_fpmath:
933 case LLVMContext::MD_noalias:
934 case LLVMContext::MD_nontemporal:
935 case LLVMContext::MD_invariant_load:
938 case LLVMContext::MD_access_group:
963 for (
unsigned i = 0; i < VF; i++)
964 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
965 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
966 Mask.push_back(Builder.
getInt1(HasMember));
975 for (
unsigned i = 0; i < VF; i++)
976 for (
unsigned j = 0; j < ReplicationFactor; j++)
985 for (
unsigned i = 0; i < VF; i++)
986 for (
unsigned j = 0; j < NumVecs; j++)
987 Mask.push_back(j * VF + i);
995 for (
unsigned i = 0; i < VF; i++)
996 Mask.push_back(Start + i * Stride);
1003 unsigned NumUndefs) {
1005 for (
unsigned i = 0; i < NumInts; i++)
1006 Mask.push_back(Start + i);
1008 for (
unsigned i = 0; i < NumUndefs; i++)
1017 int NumEltsSigned = NumElts;
1018 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
1023 for (
int MaskElt : Mask) {
1024 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
1025 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1037 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1038 assert(VecTy1 && VecTy2 &&
1039 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1040 "Expect two vectors with the same element type");
1042 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1043 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1044 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
1046 if (NumElts1 > NumElts2) {
1058 unsigned NumVecs = Vecs.
size();
1059 assert(NumVecs > 1 &&
"Should be at least two vectors");
1065 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
1066 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1067 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
1068 "Only the last vector may have a different type");
1074 if (NumVecs % 2 != 0)
1075 TmpList.
push_back(ResList[NumVecs - 1]);
1078 NumVecs = ResList.
size();
1079 }
while (NumVecs > 1);
1085 assert(isa<VectorType>(Mask->getType()) &&
1086 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1087 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1089 "Mask must be a vector of i1");
1091 auto *ConstMask = dyn_cast<Constant>(Mask);
1094 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1096 if (isa<ScalableVectorType>(ConstMask->getType()))
1100 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1102 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1103 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1111 assert(isa<VectorType>(Mask->getType()) &&
1112 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1113 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1115 "Mask must be a vector of i1");
1117 auto *ConstMask = dyn_cast<Constant>(Mask);
1120 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1122 if (isa<ScalableVectorType>(ConstMask->getType()))
1126 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1128 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1129 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1137 assert(isa<VectorType>(Mask->getType()) &&
1138 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1139 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1141 "Mask must be a vector of i1");
1143 auto *ConstMask = dyn_cast<Constant>(Mask);
1146 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1148 if (isa<ScalableVectorType>(ConstMask->getType()))
1152 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1154 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1155 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1164 assert(isa<FixedVectorType>(Mask->getType()) &&
1165 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1166 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1168 "Mask must be a fixed width vector of i1");
1170 const unsigned VWidth =
1171 cast<FixedVectorType>(Mask->getType())->getNumElements();
1173 if (
auto *CV = dyn_cast<ConstantVector>(Mask))
1174 for (
unsigned i = 0; i < VWidth; i++)
1175 if (CV->getAggregateElement(i)->isNullValue())
1177 return DemandedElts;
1180bool InterleavedAccessInfo::isStrided(
int Stride) {
1181 unsigned Factor = std::abs(Stride);
1185void InterleavedAccessInfo::collectConstStrideAccesses(
1199 for (
auto &
I : *BB) {
1208 if (
Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1220 true,
false).value_or(0);
1223 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev,
Size,
1265 bool EnablePredicatedInterleavedMemAccesses) {
1271 collectConstStrideAccesses(AccessStrideInfo, Strides);
1273 if (AccessStrideInfo.
empty())
1277 collectDependences();
1298 for (
auto BI = AccessStrideInfo.
rbegin(), E = AccessStrideInfo.
rend();
1301 StrideDescriptor DesB = BI->second;
1307 if (isStrided(DesB.Stride) &&
1308 (!isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1313 GroupB = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1314 if (
B->mayWriteToMemory())
1315 StoreGroups.
insert(GroupB);
1317 LoadGroups.
insert(GroupB);
1321 for (
auto AI = std::next(BI); AI != E; ++AI) {
1323 StrideDescriptor DesA = AI->second;
1348 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1349 A, &*AccessStrideInfo.
find(MemberOfGroupB)))
1350 return MemberOfGroupB;
1360 if (
A->mayWriteToMemory() && GroupA != GroupB) {
1368 if (GroupB && LoadGroups.
contains(GroupB))
1369 DependentInst = DependentMember(GroupB, &*AI);
1370 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1373 if (DependentInst) {
1378 if (GroupA && StoreGroups.
contains(GroupA)) {
1380 "dependence between "
1381 << *
A <<
" and " << *DependentInst <<
'\n');
1382 StoreGroups.
remove(GroupA);
1383 releaseGroup(GroupA);
1389 if (GroupB && LoadGroups.
contains(GroupB)) {
1391 <<
" as complete.\n");
1392 CompletedLoadGroups.
insert(GroupB);
1396 if (CompletedLoadGroups.
contains(GroupB)) {
1404 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1414 (
A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1415 (
A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1420 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1437 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1444 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1445 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1451 GroupB->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1456 <<
" into the interleave group with" << *
B
1458 InterleaveGroupMap[
A] = GroupB;
1461 if (
A->mayReadFromMemory())
1469 const char *FirstOrLast) ->
bool {
1471 assert(Member &&
"Group member does not exist");
1474 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1475 false,
true).value_or(0))
1477 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1479 <<
" group member potentially pointer-wrapping.\n");
1480 releaseGroup(Group);
1498 for (
auto *Group : LoadGroups) {
1510 if (InvalidateGroupIfMemberMayWrap(Group, 0,
"first"))
1513 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
"last");
1522 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1523 "a reverse access with gaps.\n");
1524 releaseGroup(Group);
1528 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1529 RequiresScalarEpilogue =
true;
1533 for (
auto *Group : StoreGroups) {
1543 if (!EnablePredicatedInterleavedMemAccesses) {
1545 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1547 releaseGroup(Group);
1557 if (InvalidateGroupIfMemberMayWrap(Group, 0,
"first"))
1559 for (
int Index = Group->
getFactor() - 1; Index > 0; Index--)
1561 InvalidateGroupIfMemberMayWrap(Group, Index,
"last");
1575 bool ReleasedGroup = InterleaveGroups.remove_if([&](
auto *Group) {
1576 if (!Group->requiresScalarEpilogue())
1580 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1581 "require a scalar epilogue (not allowed under optsize) and cannot "
1582 "be masked (not enabled). \n");
1583 releaseGroupWithoutRemovingFromSet(Group);
1586 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1587 "scalar epilogue was required");
1588 (void)ReleasedGroup;
1589 RequiresScalarEpilogue =
false;
1592template <
typename InstT>
1601 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1602 [](std::pair<int, Instruction *> p) { return p.second; });
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator member_end() const
member_iterator member_begin(iterator I) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
This is an important class for using LLVM in a threaded context.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
BlockT * getHeader() const
Store the result of a depth first search within basic blocks contained by a single loop.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
static MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
reverse_iterator rbegin()
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static bool isVPCast(Intrinsic::ID ID)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.