28#define DEBUG_TYPE "vectorutils"
36 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
46 case Intrinsic::bswap:
47 case Intrinsic::bitreverse:
48 case Intrinsic::ctpop:
57 case Intrinsic::sadd_sat:
58 case Intrinsic::ssub_sat:
59 case Intrinsic::uadd_sat:
60 case Intrinsic::usub_sat:
61 case Intrinsic::smul_fix:
62 case Intrinsic::smul_fix_sat:
63 case Intrinsic::umul_fix:
64 case Intrinsic::umul_fix_sat:
71 case Intrinsic::log10:
74 case Intrinsic::minnum:
75 case Intrinsic::maxnum:
76 case Intrinsic::minimum:
77 case Intrinsic::maximum:
78 case Intrinsic::copysign:
79 case Intrinsic::floor:
81 case Intrinsic::trunc:
83 case Intrinsic::nearbyint:
84 case Intrinsic::round:
85 case Intrinsic::roundeven:
88 case Intrinsic::fmuladd:
89 case Intrinsic::is_fpclass:
91 case Intrinsic::canonicalize:
92 case Intrinsic::fptosi_sat:
93 case Intrinsic::fptoui_sat:
94 case Intrinsic::lrint:
95 case Intrinsic::llrint:
104 unsigned ScalarOpdIdx) {
107 case Intrinsic::ctlz:
108 case Intrinsic::cttz:
109 case Intrinsic::is_fpclass:
110 case Intrinsic::powi:
111 return (ScalarOpdIdx == 1);
112 case Intrinsic::smul_fix:
113 case Intrinsic::smul_fix_sat:
114 case Intrinsic::umul_fix:
115 case Intrinsic::umul_fix_sat:
116 return (ScalarOpdIdx == 2);
125 case Intrinsic::fptosi_sat:
126 case Intrinsic::fptoui_sat:
127 case Intrinsic::lrint:
128 case Intrinsic::llrint:
129 return OpdIdx == -1 || OpdIdx == 0;
130 case Intrinsic::is_fpclass:
132 case Intrinsic::powi:
133 return OpdIdx == -1 || OpdIdx == 1;
149 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
150 ID == Intrinsic::experimental_noalias_scope_decl ||
151 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
160 assert(V->getType()->isVectorTy() &&
"Not looking at a vector?");
161 VectorType *VTy = cast<VectorType>(V->getType());
163 if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
164 unsigned Width = FVTy->getNumElements();
170 return C->getAggregateElement(EltNo);
174 if (!isa<ConstantInt>(III->getOperand(2)))
176 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
181 return III->getOperand(1);
184 if (III == III->getOperand(0))
194 if (SVI && isa<FixedVectorType>(SVI->
getType())) {
200 if (InEl < (
int)LHSWidth)
209 if (
Constant *Elt =
C->getAggregateElement(EltNo))
210 if (Elt->isNullValue())
214 if (isa<ScalableVectorType>(VTy))
216 if (EltNo < VTy->getElementCount().getKnownMinValue())
231 if (SplatIndex != -1 && SplatIndex != M)
237 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
246 if (isa<VectorType>(V->getType()))
247 if (
auto *
C = dyn_cast<Constant>(V))
248 return C->getSplatValue();
263 if (isa<VectorType>(V->getType())) {
264 if (isa<UndefValue>(V))
268 if (
auto *
C = dyn_cast<Constant>(V))
269 return C->getSplatValue() !=
nullptr;
272 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
307 const APInt &DemandedElts,
APInt &DemandedLHS,
308 APInt &DemandedRHS,
bool AllowUndefElts) {
312 if (DemandedElts.
isZero())
316 if (
all_of(Mask, [](
int Elt) {
return Elt == 0; })) {
321 for (
unsigned I = 0,
E = Mask.size();
I !=
E; ++
I) {
323 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
324 "Invalid shuffle mask constant");
326 if (!DemandedElts[
I] || (AllowUndefElts && (M < 0)))
337 DemandedRHS.
setBit(M - SrcWidth);
345 assert(Scale > 0 &&
"Unexpected scaling factor");
349 ScaledMask.
assign(Mask.begin(), Mask.end());
354 for (
int MaskElt : Mask) {
357 "Overflowed 32-bits");
359 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
360 ScaledMask.
push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
366 assert(Scale > 0 &&
"Unexpected scaling factor");
370 ScaledMask.
assign(Mask.begin(), Mask.end());
375 int NumElts = Mask.size();
376 if (NumElts % Scale != 0)
380 ScaledMask.
reserve(NumElts / Scale);
385 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
388 int SliceFront = MaskSlice.
front();
389 if (SliceFront < 0) {
397 if (SliceFront % Scale != 0)
400 for (
int i = 1; i < Scale; ++i)
401 if (MaskSlice[i] != SliceFront + i)
403 ScaledMask.
push_back(SliceFront / Scale);
405 Mask = Mask.drop_front(Scale);
406 }
while (!Mask.empty());
408 assert((
int)ScaledMask.
size() * Scale == NumElts &&
"Unexpected scaled mask");
417 std::array<SmallVector<int, 16>, 2> TmpMasks;
420 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
430 ArrayRef<int> Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
431 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
439 int Sz = Mask.size();
440 unsigned SzDest = Sz / NumOfDestRegs;
441 unsigned SzSrc = Sz / NumOfSrcRegs;
442 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
443 auto &RegMasks = Res[
I];
444 RegMasks.
assign(NumOfSrcRegs, {});
447 for (
unsigned K = 0; K < SzDest; ++K) {
448 int Idx =
I * SzDest + K;
453 int SrcRegIdx = Mask[
Idx] / SzSrc;
456 if (RegMasks[SrcRegIdx].empty())
458 RegMasks[SrcRegIdx][K] = Mask[
Idx] % SzSrc;
462 for (
unsigned I = 0;
I < NumOfUsedRegs; ++
I) {
466 switch (NumSrcRegs) {
475 unsigned SrcReg = std::distance(Dest.begin(), It);
476 SingleInputAction(*It, SrcReg,
I);
491 "Expected undefined mask element.");
492 FirstMask[
Idx] = SecondMask[
Idx] + VF;
497 for (
int Idx = 0, VF = Mask.size();
Idx < VF; ++
Idx) {
507 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
512 if (FirstIdx == SecondIdx) {
519 CombineMasks(FirstMask, SecondMask);
520 ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
521 NormalizeMask(FirstMask);
523 SecondMask = FirstMask;
524 SecondIdx = FirstIdx;
526 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
527 CombineMasks(SecondMask, FirstMask);
528 ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
529 Dest[FirstIdx].clear();
530 NormalizeMask(SecondMask);
532 }
while (SecondIdx >= 0);
555 bool SeenExtFromIllegalType =
false;
557 for (
auto &
I : *BB) {
560 if (
TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
562 SeenExtFromIllegalType =
true;
565 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
566 !
I.getType()->isVectorTy() &&
567 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
578 if (Worklist.
empty() || (
TTI && !SeenExtFromIllegalType))
582 while (!Worklist.
empty()) {
586 if (!Visited.
insert(Val).second)
590 if (!isa<Instruction>(Val))
596 if (DB.getDemandedBits(
I).getBitWidth() > 64)
599 uint64_t V = DB.getDemandedBits(
I).getZExtValue();
605 if (isa<SExtInst>(
I) || isa<ZExtInst>(
I) || isa<LoadInst>(
I) ||
612 if (isa<BitCastInst>(
I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(
I) ||
613 !
I->getType()->isIntegerTy()) {
614 DBits[Leader] |= ~0ULL;
624 if (DBits[Leader] == ~0ULL)
628 for (
Value *O : cast<User>(
I)->operands()) {
637 for (
auto &
I : DBits)
638 for (
auto *U :
I.first->users())
639 if (U->getType()->isIntegerTy() && DBits.
count(U) == 0)
645 LeaderDemandedBits |= DBits[M];
665 auto *
MI = dyn_cast<Instruction>(M);
668 Type *Ty = M->getType();
670 Ty =
MI->getOperand(0)->getType();
678 auto *CI = dyn_cast<ConstantInt>(U);
682 isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&
683 U.getOperandNo() == 1)
684 return CI->uge(MinBW);
685 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
686 return bit_ceil(BW) > MinBW;
698template <
typename ListT>
703 List.insert(AccGroups);
707 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
708 auto *Item = cast<MDNode>(AccGroupListOp.get());
719 if (AccGroups1 == AccGroups2)
726 if (Union.size() == 0)
728 if (Union.size() == 1)
729 return cast<MDNode>(Union.front());
740 if (!MayAccessMem1 && !MayAccessMem2)
743 return Inst2->
getMetadata(LLVMContext::MD_access_group);
745 return Inst1->
getMetadata(LLVMContext::MD_access_group);
761 if (AccGroupSet2.
count(MD1))
765 auto *Item = cast<MDNode>(Node.get());
767 if (AccGroupSet2.
count(Item))
772 if (Intersection.
size() == 0)
774 if (Intersection.
size() == 1)
775 return cast<MDNode>(Intersection.
front());
789 for (
auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
790 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
791 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
792 LLVMContext::MD_access_group}) {
795 for (
int J = 1,
E = VL.
size(); MD && J !=
E; ++J) {
799 case LLVMContext::MD_tbaa:
802 case LLVMContext::MD_alias_scope:
805 case LLVMContext::MD_fpmath:
808 case LLVMContext::MD_noalias:
809 case LLVMContext::MD_nontemporal:
810 case LLVMContext::MD_invariant_load:
813 case LLVMContext::MD_access_group:
838 for (
unsigned i = 0; i < VF; i++)
839 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
840 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
841 Mask.push_back(Builder.
getInt1(HasMember));
850 for (
unsigned i = 0; i < VF; i++)
851 for (
unsigned j = 0; j < ReplicationFactor; j++)
860 for (
unsigned i = 0; i < VF; i++)
861 for (
unsigned j = 0; j < NumVecs; j++)
862 Mask.push_back(j * VF + i);
870 for (
unsigned i = 0; i < VF; i++)
871 Mask.push_back(Start + i * Stride);
878 unsigned NumUndefs) {
880 for (
unsigned i = 0; i < NumInts; i++)
881 Mask.push_back(Start + i);
883 for (
unsigned i = 0; i < NumUndefs; i++)
892 int NumEltsSigned = NumElts;
893 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
898 for (
int MaskElt : Mask) {
899 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
900 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
912 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
913 assert(VecTy1 && VecTy2 &&
914 VecTy1->getScalarType() == VecTy2->getScalarType() &&
915 "Expect two vectors with the same element type");
917 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
918 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
919 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
921 if (NumElts1 > NumElts2) {
933 unsigned NumVecs = Vecs.
size();
934 assert(NumVecs > 1 &&
"Should be at least two vectors");
940 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
941 Value *V0 = ResList[i], *V1 = ResList[i + 1];
942 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
943 "Only the last vector may have a different type");
949 if (NumVecs % 2 != 0)
953 NumVecs = ResList.
size();
954 }
while (NumVecs > 1);
960 assert(isa<VectorType>(Mask->getType()) &&
961 isa<IntegerType>(Mask->getType()->getScalarType()) &&
962 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
964 "Mask must be a vector of i1");
966 auto *ConstMask = dyn_cast<Constant>(Mask);
969 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
971 if (isa<ScalableVectorType>(ConstMask->getType()))
975 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
977 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
978 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
986 assert(isa<VectorType>(Mask->getType()) &&
987 isa<IntegerType>(Mask->getType()->getScalarType()) &&
988 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
990 "Mask must be a vector of i1");
992 auto *ConstMask = dyn_cast<Constant>(Mask);
995 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
997 if (isa<ScalableVectorType>(ConstMask->getType()))
1001 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1003 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1004 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1014 assert(isa<FixedVectorType>(Mask->getType()) &&
1015 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1016 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1018 "Mask must be a fixed width vector of i1");
1020 const unsigned VWidth =
1021 cast<FixedVectorType>(Mask->getType())->getNumElements();
1023 if (
auto *CV = dyn_cast<ConstantVector>(Mask))
1024 for (
unsigned i = 0; i < VWidth; i++)
1025 if (CV->getAggregateElement(i)->isNullValue())
1027 return DemandedElts;
1030bool InterleavedAccessInfo::isStrided(
int Stride) {
1031 unsigned Factor = std::abs(Stride);
1035void InterleavedAccessInfo::collectConstStrideAccesses(
1049 for (
auto &
I : *BB) {
1058 if (
Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1070 true,
false).value_or(0);
1073 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev,
Size,
1115 bool EnablePredicatedInterleavedMemAccesses) {
1121 collectConstStrideAccesses(AccessStrideInfo, Strides);
1123 if (AccessStrideInfo.
empty())
1127 collectDependences();
1148 for (
auto BI = AccessStrideInfo.
rbegin(),
E = AccessStrideInfo.
rend();
1151 StrideDescriptor DesB = BI->second;
1157 if (isStrided(DesB.Stride) &&
1158 (!isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1163 GroupB = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1164 if (
B->mayWriteToMemory())
1165 StoreGroups.
insert(GroupB);
1167 LoadGroups.
insert(GroupB);
1171 for (
auto AI = std::next(BI); AI !=
E; ++AI) {
1173 StrideDescriptor DesA = AI->second;
1198 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1199 A, &*AccessStrideInfo.
find(MemberOfGroupB)))
1200 return MemberOfGroupB;
1210 if (
A->mayWriteToMemory() && GroupA != GroupB) {
1218 if (GroupB && LoadGroups.
contains(GroupB))
1219 DependentInst = DependentMember(GroupB, &*AI);
1220 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1223 if (DependentInst) {
1228 if (GroupA && StoreGroups.
contains(GroupA)) {
1230 "dependence between "
1231 << *
A <<
" and " << *DependentInst <<
'\n');
1232 StoreGroups.
remove(GroupA);
1233 releaseGroup(GroupA);
1239 if (GroupB && LoadGroups.
contains(GroupB)) {
1241 <<
" as complete.\n");
1242 CompletedLoadGroups.
insert(GroupB);
1246 if (CompletedLoadGroups.
contains(GroupB)) {
1254 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1264 (
A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1265 (
A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1270 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1287 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1294 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1295 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1301 GroupB->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1306 <<
" into the interleave group with" << *
B
1308 InterleaveGroupMap[
A] = GroupB;
1311 if (
A->mayReadFromMemory())
1319 std::string FirstOrLast) ->
bool {
1321 assert(Member &&
"Group member does not exist");
1324 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1325 false,
true).value_or(0))
1327 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1329 <<
" group member potentially pointer-wrapping.\n");
1330 releaseGroup(Group);
1348 for (
auto *Group : LoadGroups) {
1360 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1363 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
1364 std::string(
"last"));
1373 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1374 "a reverse access with gaps.\n");
1375 releaseGroup(Group);
1379 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1380 RequiresScalarEpilogue =
true;
1384 for (
auto *Group : StoreGroups) {
1394 if (!EnablePredicatedInterleavedMemAccesses) {
1396 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1398 releaseGroup(Group);
1408 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1412 InvalidateGroupIfMemberMayWrap(Group,
Index, std::string(
"last"));
1424 bool ReleasedGroup =
false;
1428 if (!Group->requiresScalarEpilogue())
1432 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1433 "require a scalar epilogue (not allowed under optsize) and cannot "
1434 "be masked (not enabled). \n");
1435 releaseGroup(Group);
1436 ReleasedGroup =
true;
1438 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1439 "scalar epilogue was required");
1440 (void)ReleasedGroup;
1441 RequiresScalarEpilogue =
false;
1444template <
typename InstT>
1453 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1454 [](std::pair<int, Instruction *> p) { return p.second; });
1466 S.
split(ListAttr,
",");
1471 LLVM_DEBUG(
dbgs() <<
"VFABI: Adding mapping '" << S <<
"' for " << CI
1473 VariantMappings.
push_back(std::string(S));
1480 for (
unsigned Pos = 0, NumParams =
Parameters.size(); Pos < NumParams;
1501 if (
Parameters[Pos].LinearStepOrPos >=
int(NumParams))
1508 if (
Parameters[Pos].LinearStepOrPos ==
int(Pos))
1514 for (
unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
static constexpr uint32_t RegMask
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator member_end() const
member_iterator member_begin(iterator I) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
This is an important class for using LLVM in a threaded context.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
BlockT * getHeader() const
Store the result of a depth first search within basic blocks contained by a single loop.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
static MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
reverse_iterator rbegin()
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A vector that has set insertion semantics.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
static constexpr char const * MappingsAttrName
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const CallInst &CI)
Function to construct a VFInfo out of a mangled names in the following format:
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
SmallVector< VFParameter, 8 > Parameters