28#define DEBUG_TYPE "vectorutils"
36 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
46 case Intrinsic::bswap:
47 case Intrinsic::bitreverse:
48 case Intrinsic::ctpop:
57 case Intrinsic::sadd_sat:
58 case Intrinsic::ssub_sat:
59 case Intrinsic::uadd_sat:
60 case Intrinsic::usub_sat:
61 case Intrinsic::smul_fix:
62 case Intrinsic::smul_fix_sat:
63 case Intrinsic::umul_fix:
64 case Intrinsic::umul_fix_sat:
71 case Intrinsic::log10:
74 case Intrinsic::minnum:
75 case Intrinsic::maxnum:
76 case Intrinsic::minimum:
77 case Intrinsic::maximum:
78 case Intrinsic::copysign:
79 case Intrinsic::floor:
81 case Intrinsic::trunc:
83 case Intrinsic::nearbyint:
84 case Intrinsic::round:
85 case Intrinsic::roundeven:
88 case Intrinsic::fmuladd:
89 case Intrinsic::is_fpclass:
91 case Intrinsic::canonicalize:
92 case Intrinsic::fptosi_sat:
93 case Intrinsic::fptoui_sat:
102 unsigned ScalarOpdIdx) {
105 case Intrinsic::ctlz:
106 case Intrinsic::cttz:
107 case Intrinsic::is_fpclass:
108 case Intrinsic::powi:
109 return (ScalarOpdIdx == 1);
110 case Intrinsic::smul_fix:
111 case Intrinsic::smul_fix_sat:
112 case Intrinsic::umul_fix:
113 case Intrinsic::umul_fix_sat:
114 return (ScalarOpdIdx == 2);
123 case Intrinsic::fptosi_sat:
124 case Intrinsic::fptoui_sat:
125 return OpdIdx == -1 || OpdIdx == 0;
126 case Intrinsic::is_fpclass:
128 case Intrinsic::powi:
129 return OpdIdx == -1 || OpdIdx == 1;
145 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
146 ID == Intrinsic::experimental_noalias_scope_decl ||
147 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
156 assert(V->getType()->isVectorTy() &&
"Not looking at a vector?");
157 VectorType *VTy = cast<VectorType>(V->getType());
159 if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
160 unsigned Width = FVTy->getNumElements();
166 return C->getAggregateElement(EltNo);
170 if (!isa<ConstantInt>(III->getOperand(2)))
172 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
177 return III->getOperand(1);
180 if (III == III->getOperand(0))
190 if (SVI && isa<FixedVectorType>(SVI->
getType())) {
196 if (InEl < (
int)LHSWidth)
205 if (
Constant *Elt =
C->getAggregateElement(EltNo))
206 if (Elt->isNullValue())
210 if (isa<ScalableVectorType>(VTy))
212 if (EltNo < VTy->getElementCount().getKnownMinValue())
227 if (SplatIndex != -1 && SplatIndex != M)
233 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
242 if (isa<VectorType>(V->getType()))
243 if (
auto *
C = dyn_cast<Constant>(V))
244 return C->getSplatValue();
259 if (isa<VectorType>(V->getType())) {
260 if (isa<UndefValue>(V))
264 if (
auto *
C = dyn_cast<Constant>(V))
265 return C->getSplatValue() !=
nullptr;
268 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
303 const APInt &DemandedElts,
APInt &DemandedLHS,
304 APInt &DemandedRHS,
bool AllowUndefElts) {
308 if (DemandedElts.
isZero())
312 if (
all_of(Mask, [](
int Elt) {
return Elt == 0; })) {
317 for (
unsigned I = 0,
E = Mask.size();
I !=
E; ++
I) {
319 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
320 "Invalid shuffle mask constant");
322 if (!DemandedElts[
I] || (AllowUndefElts && (M < 0)))
333 DemandedRHS.
setBit(M - SrcWidth);
341 assert(Scale > 0 &&
"Unexpected scaling factor");
345 ScaledMask.
assign(Mask.begin(), Mask.end());
350 for (
int MaskElt : Mask) {
353 "Overflowed 32-bits");
355 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
356 ScaledMask.
push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
362 assert(Scale > 0 &&
"Unexpected scaling factor");
366 ScaledMask.
assign(Mask.begin(), Mask.end());
371 int NumElts = Mask.size();
372 if (NumElts % Scale != 0)
376 ScaledMask.
reserve(NumElts / Scale);
381 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
384 int SliceFront = MaskSlice.
front();
385 if (SliceFront < 0) {
393 if (SliceFront % Scale != 0)
396 for (
int i = 1; i < Scale; ++i)
397 if (MaskSlice[i] != SliceFront + i)
399 ScaledMask.
push_back(SliceFront / Scale);
401 Mask = Mask.drop_front(Scale);
402 }
while (!Mask.empty());
404 assert((
int)ScaledMask.
size() * Scale == NumElts &&
"Unexpected scaled mask");
413 std::array<SmallVector<int, 16>, 2> TmpMasks;
416 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
426 ArrayRef<int> Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
427 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
435 int Sz = Mask.size();
436 unsigned SzDest = Sz / NumOfDestRegs;
437 unsigned SzSrc = Sz / NumOfSrcRegs;
438 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
439 auto &RegMasks = Res[
I];
440 RegMasks.
assign(NumOfSrcRegs, {});
443 for (
unsigned K = 0; K < SzDest; ++K) {
444 int Idx =
I * SzDest + K;
449 int SrcRegIdx = Mask[
Idx] / SzSrc;
452 if (RegMasks[SrcRegIdx].empty())
454 RegMasks[SrcRegIdx][K] = Mask[
Idx] % SzSrc;
458 for (
unsigned I = 0;
I < NumOfUsedRegs; ++
I) {
462 switch (NumSrcRegs) {
471 unsigned SrcReg = std::distance(Dest.begin(), It);
472 SingleInputAction(*It, SrcReg,
I);
487 "Expected undefined mask element.");
488 FirstMask[
Idx] = SecondMask[
Idx] + VF;
493 for (
int Idx = 0, VF = Mask.size();
Idx < VF; ++
Idx) {
503 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
508 if (FirstIdx == SecondIdx) {
514 SecondMask = RegMask;
515 CombineMasks(FirstMask, SecondMask);
516 ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
517 NormalizeMask(FirstMask);
519 SecondMask = FirstMask;
520 SecondIdx = FirstIdx;
522 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
523 CombineMasks(SecondMask, FirstMask);
524 ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
525 Dest[FirstIdx].clear();
526 NormalizeMask(SecondMask);
528 }
while (SecondIdx >= 0);
551 bool SeenExtFromIllegalType =
false;
553 for (
auto &
I : *BB) {
556 if (
TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
558 SeenExtFromIllegalType =
true;
561 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
562 !
I.getType()->isVectorTy() &&
563 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
574 if (Worklist.
empty() || (
TTI && !SeenExtFromIllegalType))
578 while (!Worklist.
empty()) {
582 if (!Visited.
insert(Val).second)
586 if (!isa<Instruction>(Val))
592 if (DB.getDemandedBits(
I).getBitWidth() > 64)
595 uint64_t V = DB.getDemandedBits(
I).getZExtValue();
601 if (isa<SExtInst>(
I) || isa<ZExtInst>(
I) || isa<LoadInst>(
I) ||
608 if (isa<BitCastInst>(
I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(
I) ||
609 !
I->getType()->isIntegerTy()) {
610 DBits[Leader] |= ~0ULL;
620 if (DBits[Leader] == ~0ULL)
624 for (
Value *O : cast<User>(
I)->operands()) {
633 for (
auto &
I : DBits)
634 for (
auto *U :
I.first->users())
635 if (U->getType()->isIntegerTy() && DBits.
count(U) == 0)
641 LeaderDemandedBits |= DBits[M];
661 if (!isa<Instruction>(M))
663 Type *Ty = M->getType();
665 Ty = cast<Instruction>(M)->getOperand(0)->getType();
667 MinBWs[cast<Instruction>(M)] = MinBW;
675template <
typename ListT>
680 List.insert(AccGroups);
684 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
685 auto *Item = cast<MDNode>(AccGroupListOp.get());
696 if (AccGroups1 == AccGroups2)
703 if (Union.size() == 0)
705 if (Union.size() == 1)
706 return cast<MDNode>(Union.front());
717 if (!MayAccessMem1 && !MayAccessMem2)
720 return Inst2->
getMetadata(LLVMContext::MD_access_group);
722 return Inst1->
getMetadata(LLVMContext::MD_access_group);
738 if (AccGroupSet2.
count(MD1))
742 auto *Item = cast<MDNode>(Node.get());
744 if (AccGroupSet2.
count(Item))
749 if (Intersection.
size() == 0)
751 if (Intersection.
size() == 1)
752 return cast<MDNode>(Intersection.
front());
766 for (
auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
767 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
768 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
769 LLVMContext::MD_access_group}) {
772 for (
int J = 1,
E = VL.
size(); MD && J !=
E; ++J) {
776 case LLVMContext::MD_tbaa:
779 case LLVMContext::MD_alias_scope:
782 case LLVMContext::MD_fpmath:
785 case LLVMContext::MD_noalias:
786 case LLVMContext::MD_nontemporal:
787 case LLVMContext::MD_invariant_load:
790 case LLVMContext::MD_access_group:
815 for (
unsigned i = 0; i < VF; i++)
816 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
817 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
818 Mask.push_back(
Builder.getInt1(HasMember));
827 for (
unsigned i = 0; i < VF; i++)
828 for (
unsigned j = 0; j < ReplicationFactor; j++)
837 for (
unsigned i = 0; i < VF; i++)
838 for (
unsigned j = 0; j < NumVecs; j++)
839 Mask.push_back(j * VF + i);
847 for (
unsigned i = 0; i < VF; i++)
848 Mask.push_back(Start + i * Stride);
855 unsigned NumUndefs) {
857 for (
unsigned i = 0; i < NumInts; i++)
858 Mask.push_back(Start + i);
860 for (
unsigned i = 0; i < NumUndefs; i++)
869 int NumEltsSigned = NumElts;
870 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
875 for (
int MaskElt : Mask) {
876 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
877 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
889 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
890 assert(VecTy1 && VecTy2 &&
891 VecTy1->getScalarType() == VecTy2->getScalarType() &&
892 "Expect two vectors with the same element type");
894 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
895 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
896 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
898 if (NumElts1 > NumElts2) {
900 V2 =
Builder.CreateShuffleVector(
904 return Builder.CreateShuffleVector(
910 unsigned NumVecs = Vecs.
size();
911 assert(NumVecs > 1 &&
"Should be at least two vectors");
917 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
918 Value *V0 = ResList[i], *V1 = ResList[i + 1];
919 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
920 "Only the last vector may have a different type");
926 if (NumVecs % 2 != 0)
930 NumVecs = ResList.
size();
931 }
while (NumVecs > 1);
937 assert(isa<VectorType>(Mask->getType()) &&
938 isa<IntegerType>(Mask->getType()->getScalarType()) &&
939 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
941 "Mask must be a vector of i1");
943 auto *ConstMask = dyn_cast<Constant>(Mask);
946 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
948 if (isa<ScalableVectorType>(ConstMask->getType()))
952 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
954 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
955 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
963 assert(isa<VectorType>(Mask->getType()) &&
964 isa<IntegerType>(Mask->getType()->getScalarType()) &&
965 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
967 "Mask must be a vector of i1");
969 auto *ConstMask = dyn_cast<Constant>(Mask);
972 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
974 if (isa<ScalableVectorType>(ConstMask->getType()))
978 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
980 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
981 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
991 assert(isa<FixedVectorType>(Mask->getType()) &&
992 isa<IntegerType>(Mask->getType()->getScalarType()) &&
993 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
995 "Mask must be a fixed width vector of i1");
997 const unsigned VWidth =
998 cast<FixedVectorType>(Mask->getType())->getNumElements();
1000 if (
auto *CV = dyn_cast<ConstantVector>(Mask))
1001 for (
unsigned i = 0; i < VWidth; i++)
1002 if (CV->getAggregateElement(i)->isNullValue())
1004 return DemandedElts;
1007bool InterleavedAccessInfo::isStrided(
int Stride) {
1008 unsigned Factor = std::abs(Stride);
1012void InterleavedAccessInfo::collectConstStrideAccesses(
1026 for (
auto &
I : *BB) {
1035 if (
Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1047 true,
false).value_or(0);
1050 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev,
Size,
1092 bool EnablePredicatedInterleavedMemAccesses) {
1098 collectConstStrideAccesses(AccessStrideInfo, Strides);
1100 if (AccessStrideInfo.
empty())
1104 collectDependences();
1123 for (
auto BI = AccessStrideInfo.
rbegin(),
E = AccessStrideInfo.
rend();
1126 StrideDescriptor DesB = BI->second;
1132 if (isStrided(DesB.Stride) &&
1133 (!isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1138 Group = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1140 if (
B->mayWriteToMemory())
1141 StoreGroups.
insert(Group);
1143 LoadGroups.
insert(Group);
1146 for (
auto AI = std::next(BI); AI !=
E; ++AI) {
1148 StrideDescriptor DesA = AI->second;
1169 if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1179 "dependence between " << *
A <<
" and "<< *
B <<
'\n');
1181 StoreGroups.
remove(StoreGroup);
1182 releaseGroup(StoreGroup);
1195 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1205 (
A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1206 (
A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1211 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1228 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1235 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1236 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1242 Group->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1247 <<
" into the interleave group with" << *
B
1249 InterleaveGroupMap[
A] = Group;
1252 if (
A->mayReadFromMemory())
1260 std::string FirstOrLast) ->
bool {
1262 assert(Member &&
"Group member does not exist");
1265 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1266 false,
true).value_or(0))
1268 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1270 <<
" group member potentially pointer-wrapping.\n");
1271 releaseGroup(Group);
1289 for (
auto *Group : LoadGroups) {
1301 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1304 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
1305 std::string(
"last"));
1314 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1315 "a reverse access with gaps.\n");
1316 releaseGroup(Group);
1320 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1321 RequiresScalarEpilogue =
true;
1325 for (
auto *Group : StoreGroups) {
1335 if (!EnablePredicatedInterleavedMemAccesses) {
1337 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1339 releaseGroup(Group);
1349 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1353 InvalidateGroupIfMemberMayWrap(Group,
Index, std::string(
"last"));
1365 bool ReleasedGroup =
false;
1369 if (!Group->requiresScalarEpilogue())
1373 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1374 "require a scalar epilogue (not allowed under optsize) and cannot "
1375 "be masked (not enabled). \n");
1376 releaseGroup(Group);
1377 ReleasedGroup =
true;
1379 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1380 "scalar epilogue was required");
1381 (void)ReleasedGroup;
1382 RequiresScalarEpilogue =
false;
1385template <
typename InstT>
1394 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1395 [](std::pair<int, Instruction *> p) { return p.second; });
1410 for (
unsigned I = 0;
I < numArgs; ++
I)
1412 Out <<
"_" << ScalarName <<
"(" << VectorName <<
")";
1413 return std::string(Out.
str());
1423 S.
split(ListAttr,
",");
1428 std::optional<VFInfo>
Info =
1430 assert(
Info &&
"Invalid name for a VFABI variant.");
1432 "Vector function is missing.");
1434 VariantMappings.
push_back(std::string(S));
1439 for (
unsigned Pos = 0, NumParams =
Parameters.size(); Pos < NumParams;
1460 if (
Parameters[Pos].LinearStepOrPos >=
int(NumParams))
1467 if (
Parameters[Pos].LinearStepOrPos ==
int(Pos))
1473 for (
unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator member_end() const
member_iterator member_begin(iterator I) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
This is an important class for using LLVM in a threaded context.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
BlockT * getHeader() const
Store the result of a depth first search within basic blocks contained by a single loop.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
static MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
This class implements a map that also provides access to all stored values in a deterministic order.
reverse_iterator rbegin()
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A vector that has set insertion semantics.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
static constexpr char const * MappingsAttrName
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF, bool Masked=false)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
static constexpr char const * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
SmallVector< VFParameter, 8 > Parameters