29#define DEBUG_TYPE "vectorutils"
37 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
47 case Intrinsic::bswap:
48 case Intrinsic::bitreverse:
49 case Intrinsic::ctpop:
58 case Intrinsic::sadd_sat:
59 case Intrinsic::ssub_sat:
60 case Intrinsic::uadd_sat:
61 case Intrinsic::usub_sat:
62 case Intrinsic::smul_fix:
63 case Intrinsic::smul_fix_sat:
64 case Intrinsic::umul_fix:
65 case Intrinsic::umul_fix_sat:
72 case Intrinsic::log10:
75 case Intrinsic::minnum:
76 case Intrinsic::maxnum:
77 case Intrinsic::minimum:
78 case Intrinsic::maximum:
79 case Intrinsic::copysign:
80 case Intrinsic::floor:
82 case Intrinsic::trunc:
84 case Intrinsic::nearbyint:
85 case Intrinsic::round:
86 case Intrinsic::roundeven:
89 case Intrinsic::fmuladd:
91 case Intrinsic::canonicalize:
92 case Intrinsic::fptosi_sat:
93 case Intrinsic::fptoui_sat:
102 unsigned ScalarOpdIdx) {
105 case Intrinsic::ctlz:
106 case Intrinsic::cttz:
107 case Intrinsic::powi:
108 return (ScalarOpdIdx == 1);
109 case Intrinsic::smul_fix:
110 case Intrinsic::smul_fix_sat:
111 case Intrinsic::umul_fix:
112 case Intrinsic::umul_fix_sat:
113 return (ScalarOpdIdx == 2);
122 case Intrinsic::fptosi_sat:
123 case Intrinsic::fptoui_sat:
125 case Intrinsic::powi:
142 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
143 ID == Intrinsic::experimental_noalias_scope_decl ||
144 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
161 std::advance(GEPTI, LastOperand - 2);
185 for (
unsigned i = 0, e =
GEP->getNumOperands(); i != e; ++i)
186 if (i != InductionOperand &&
189 return GEP->getOperand(InductionOperand);
194 Value *UniqueCast =
nullptr;
196 CastInst *CI = dyn_cast<CastInst>(U);
197 if (CI && CI->
getType() == Ty) {
210 auto *PtrTy = dyn_cast<PointerType>(
Ptr->getType());
211 if (!PtrTy || PtrTy->isAggregateType())
220 int64_t PtrAccessSize = 1;
240 if (OrigPtr ==
Ptr) {
241 if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
242 if (M->getOperand(0)->getSCEVType() !=
scConstant)
245 const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
252 if (PtrAccessSize != StepVal)
254 V = M->getOperand(1);
259 Type *StripedOffRecurrenceCast =
nullptr;
261 StripedOffRecurrenceCast =
C->getType();
276 if (StripedOffRecurrenceCast)
289 if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
290 unsigned Width = FVTy->getNumElements();
296 return C->getAggregateElement(EltNo);
300 if (!isa<ConstantInt>(III->getOperand(2)))
302 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
307 return III->getOperand(1);
310 if (III == III->getOperand(0))
320 if (SVI && isa<FixedVectorType>(SVI->
getType())) {
326 if (InEl < (
int)LHSWidth)
335 if (
Constant *Elt =
C->getAggregateElement(EltNo))
336 if (Elt->isNullValue())
340 if (isa<ScalableVectorType>(VTy))
342 if (EltNo < VTy->getElementCount().getKnownMinValue())
357 if (SplatIndex != -1 && SplatIndex != M)
363 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
372 if (isa<VectorType>(V->
getType()))
373 if (
auto *
C = dyn_cast<Constant>(V))
374 return C->getSplatValue();
389 if (isa<VectorType>(V->
getType())) {
390 if (isa<UndefValue>(V))
394 if (
auto *
C = dyn_cast<Constant>(V))
395 return C->getSplatValue() !=
nullptr;
398 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
433 const APInt &DemandedElts,
APInt &DemandedLHS,
434 APInt &DemandedRHS,
bool AllowUndefElts) {
438 if (DemandedElts.
isZero())
442 if (
all_of(Mask, [](
int Elt) {
return Elt == 0; })) {
447 for (
unsigned I = 0,
E = Mask.size();
I !=
E; ++
I) {
449 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
450 "Invalid shuffle mask constant");
452 if (!DemandedElts[
I] || (AllowUndefElts && (M < 0)))
463 DemandedRHS.
setBit(M - SrcWidth);
471 assert(Scale > 0 &&
"Unexpected scaling factor");
475 ScaledMask.
assign(Mask.begin(), Mask.end());
480 for (
int MaskElt : Mask) {
483 "Overflowed 32-bits");
485 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
486 ScaledMask.
push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
492 assert(Scale > 0 &&
"Unexpected scaling factor");
496 ScaledMask.
assign(Mask.begin(), Mask.end());
501 int NumElts = Mask.size();
502 if (NumElts % Scale != 0)
506 ScaledMask.
reserve(NumElts / Scale);
511 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
514 int SliceFront = MaskSlice.
front();
515 if (SliceFront < 0) {
523 if (SliceFront % Scale != 0)
526 for (
int i = 1; i < Scale; ++i)
527 if (MaskSlice[i] != SliceFront + i)
529 ScaledMask.
push_back(SliceFront / Scale);
531 Mask = Mask.drop_front(Scale);
532 }
while (!Mask.empty());
534 assert((
int)ScaledMask.
size() * Scale == NumElts &&
"Unexpected scaled mask");
543 std::array<SmallVector<int, 16>, 2> TmpMasks;
546 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
556 ArrayRef<int> Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
557 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
565 int Sz = Mask.size();
566 unsigned SzDest = Sz / NumOfDestRegs;
567 unsigned SzSrc = Sz / NumOfSrcRegs;
568 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
569 auto &RegMasks = Res[
I];
570 RegMasks.
assign(NumOfSrcRegs, {});
573 for (
unsigned K = 0; K < SzDest; ++K) {
574 int Idx =
I * SzDest + K;
579 int SrcRegIdx = Mask[
Idx] / SzSrc;
582 if (RegMasks[SrcRegIdx].empty())
584 RegMasks[SrcRegIdx][K] = Mask[
Idx] % SzSrc;
588 for (
unsigned I = 0;
I < NumOfUsedRegs; ++
I) {
592 switch (NumSrcRegs) {
601 unsigned SrcReg = std::distance(Dest.begin(), It);
602 SingleInputAction(*It, SrcReg,
I);
617 "Expected undefined mask element.");
618 FirstMask[
Idx] = SecondMask[
Idx] + VF;
623 for (
int Idx = 0, VF = Mask.size();
Idx < VF; ++
Idx) {
633 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
638 if (FirstIdx == SecondIdx) {
644 SecondMask = RegMask;
645 CombineMasks(FirstMask, SecondMask);
646 ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
647 NormalizeMask(FirstMask);
649 SecondMask = FirstMask;
650 SecondIdx = FirstIdx;
652 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
653 CombineMasks(SecondMask, FirstMask);
654 ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
655 Dest[FirstIdx].clear();
656 NormalizeMask(SecondMask);
658 }
while (SecondIdx >= 0);
681 bool SeenExtFromIllegalType =
false;
682 for (
auto *BB : Blocks)
683 for (
auto &
I : *BB) {
686 if (
TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
688 SeenExtFromIllegalType =
true;
691 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
692 !
I.getType()->isVectorTy() &&
693 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
704 if (Worklist.
empty() || (
TTI && !SeenExtFromIllegalType))
708 while (!Worklist.
empty()) {
712 if (!Visited.
insert(Val).second)
716 if (!isa<Instruction>(Val))
722 if (DB.getDemandedBits(
I).getBitWidth() > 64)
725 uint64_t V = DB.getDemandedBits(
I).getZExtValue();
731 if (isa<SExtInst>(
I) || isa<ZExtInst>(
I) || isa<LoadInst>(
I) ||
738 if (isa<BitCastInst>(
I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(
I) ||
739 !
I->getType()->isIntegerTy()) {
740 DBits[Leader] |= ~0ULL;
750 if (DBits[Leader] == ~0ULL)
754 for (
Value *O : cast<User>(
I)->operands()) {
763 for (
auto &
I : DBits)
764 for (
auto *U :
I.first->users())
765 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
771 LeaderDemandedBits |= DBits[
M];
791 if (!isa<Instruction>(M))
793 Type *Ty =
M->getType();
795 Ty = cast<Instruction>(M)->getOperand(0)->getType();
797 MinBWs[cast<Instruction>(M)] = MinBW;
805template <
typename ListT>
810 List.insert(AccGroups);
814 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
815 auto *Item = cast<MDNode>(AccGroupListOp.get());
826 if (AccGroups1 == AccGroups2)
833 if (Union.size() == 0)
835 if (Union.size() == 1)
836 return cast<MDNode>(Union.front());
847 if (!MayAccessMem1 && !MayAccessMem2)
850 return Inst2->
getMetadata(LLVMContext::MD_access_group);
852 return Inst1->
getMetadata(LLVMContext::MD_access_group);
868 if (AccGroupSet2.
count(MD1))
872 auto *Item = cast<MDNode>(Node.get());
874 if (AccGroupSet2.
count(Item))
879 if (Intersection.
size() == 0)
881 if (Intersection.
size() == 1)
882 return cast<MDNode>(Intersection.
front());
896 for (
auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
897 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
898 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
899 LLVMContext::MD_access_group}) {
902 for (
int J = 1,
E = VL.
size(); MD && J !=
E; ++J) {
906 case LLVMContext::MD_tbaa:
909 case LLVMContext::MD_alias_scope:
912 case LLVMContext::MD_fpmath:
915 case LLVMContext::MD_noalias:
916 case LLVMContext::MD_nontemporal:
917 case LLVMContext::MD_invariant_load:
920 case LLVMContext::MD_access_group:
945 for (
unsigned i = 0; i < VF; i++)
946 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
947 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
948 Mask.push_back(
Builder.getInt1(HasMember));
957 for (
unsigned i = 0; i < VF; i++)
958 for (
unsigned j = 0; j < ReplicationFactor; j++)
967 for (
unsigned i = 0; i < VF; i++)
968 for (
unsigned j = 0; j < NumVecs; j++)
969 Mask.push_back(j * VF + i);
977 for (
unsigned i = 0; i < VF; i++)
978 Mask.push_back(Start + i * Stride);
985 unsigned NumUndefs) {
987 for (
unsigned i = 0; i < NumInts; i++)
988 Mask.push_back(Start + i);
990 for (
unsigned i = 0; i < NumUndefs; i++)
999 int NumEltsSigned = NumElts;
1000 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
1005 for (
int MaskElt : Mask) {
1006 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
1007 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1019 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1020 assert(VecTy1 && VecTy2 &&
1021 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1022 "Expect two vectors with the same element type");
1024 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1025 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1026 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
1028 if (NumElts1 > NumElts2) {
1030 V2 =
Builder.CreateShuffleVector(
1034 return Builder.CreateShuffleVector(
1040 unsigned NumVecs = Vecs.
size();
1041 assert(NumVecs > 1 &&
"Should be at least two vectors");
1047 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
1048 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1049 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
1050 "Only the last vector may have a different type");
1056 if (NumVecs % 2 != 0)
1057 TmpList.
push_back(ResList[NumVecs - 1]);
1060 NumVecs = ResList.
size();
1061 }
while (NumVecs > 1);
1067 assert(isa<VectorType>(Mask->getType()) &&
1068 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1069 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1071 "Mask must be a vector of i1");
1073 auto *ConstMask = dyn_cast<Constant>(Mask);
1076 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1078 if (isa<ScalableVectorType>(ConstMask->getType()))
1082 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1084 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1085 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1093 assert(isa<VectorType>(Mask->getType()) &&
1094 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1095 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1097 "Mask must be a vector of i1");
1099 auto *ConstMask = dyn_cast<Constant>(Mask);
1102 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1104 if (isa<ScalableVectorType>(ConstMask->getType()))
1108 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1110 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1111 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1121 assert(isa<FixedVectorType>(Mask->getType()) &&
1122 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1123 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1125 "Mask must be a fixed width vector of i1");
1127 const unsigned VWidth =
1128 cast<FixedVectorType>(Mask->getType())->getNumElements();
1130 if (
auto *CV = dyn_cast<ConstantVector>(Mask))
1131 for (
unsigned i = 0; i < VWidth; i++)
1132 if (CV->getAggregateElement(i)->isNullValue())
1134 return DemandedElts;
1137bool InterleavedAccessInfo::isStrided(
int Stride) {
1138 unsigned Factor = std::abs(Stride);
1142void InterleavedAccessInfo::collectConstStrideAccesses(
1156 for (
auto &
I : *BB) {
1165 if (
Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1177 true,
false).value_or(0);
1180 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev,
Size,
1222 bool EnablePredicatedInterleavedMemAccesses) {
1228 collectConstStrideAccesses(AccessStrideInfo, Strides);
1230 if (AccessStrideInfo.
empty())
1234 collectDependences();
1253 for (
auto BI = AccessStrideInfo.
rbegin(),
E = AccessStrideInfo.
rend();
1256 StrideDescriptor DesB = BI->second;
1262 if (isStrided(DesB.Stride) &&
1263 (!isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1268 Group = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1270 if (
B->mayWriteToMemory())
1271 StoreGroups.
insert(Group);
1273 LoadGroups.
insert(Group);
1276 for (
auto AI = std::next(BI); AI !=
E; ++AI) {
1278 StrideDescriptor DesA = AI->second;
1299 if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1309 "dependence between " << *
A <<
" and "<< *
B <<
'\n');
1311 StoreGroups.
remove(StoreGroup);
1312 releaseGroup(StoreGroup);
1325 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1335 (
A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1336 (
A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1341 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1358 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1365 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1366 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1372 Group->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1377 <<
" into the interleave group with" << *
B
1379 InterleaveGroupMap[
A] = Group;
1382 if (
A->mayReadFromMemory())
1390 std::string FirstOrLast) ->
bool {
1392 assert(Member &&
"Group member does not exist");
1395 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1396 false,
true).value_or(0))
1398 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1400 <<
" group member potentially pointer-wrapping.\n");
1401 releaseGroup(Group);
1419 for (
auto *Group : LoadGroups) {
1431 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1434 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
1435 std::string(
"last"));
1444 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1445 "a reverse access with gaps.\n");
1446 releaseGroup(Group);
1450 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1451 RequiresScalarEpilogue =
true;
1455 for (
auto *Group : StoreGroups) {
1465 if (!EnablePredicatedInterleavedMemAccesses) {
1467 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1469 releaseGroup(Group);
1479 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1483 InvalidateGroupIfMemberMayWrap(Group,
Index, std::string(
"last"));
1495 bool ReleasedGroup =
false;
1499 if (!Group->requiresScalarEpilogue())
1503 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1504 "require a scalar epilogue (not allowed under optsize) and cannot "
1505 "be masked (not enabled). \n");
1506 releaseGroup(Group);
1507 ReleasedGroup =
true;
1509 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1510 "scalar epilogue was required");
1511 (void)ReleasedGroup;
1512 RequiresScalarEpilogue =
false;
1515template <
typename InstT>
1524 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1525 [](std::pair<int, Instruction *> p) { return p.second; });
1540 for (
unsigned I = 0;
I < numArgs; ++
I)
1542 Out <<
"_" << ScalarName <<
"(" << VectorName <<
")";
1543 return std::string(Out.
str());
1553 S.
split(ListAttr,
",");
1558 std::optional<VFInfo>
Info =
1560 assert(
Info &&
"Invalid name for a VFABI variant.");
1562 "Vector function is missing.");
1564 VariantMappings.
push_back(std::string(S));
1569 for (
unsigned Pos = 0, NumParams =
Parameters.size(); Pos < NumParams;
1590 if (
Parameters[Pos].LinearStepOrPos >=
int(NumParams))
1597 if (
Parameters[Pos].LinearStepOrPos ==
int(Pos))
1603 for (
unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator member_end() const
member_iterator member_begin(iterator I) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Type * getResultElementType() const
Common base class shared among various IRBuilders.
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
This is an important class for using LLVM in a threaded context.
const ValueToValueMap & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
BlockT * getHeader() const
Store the result of a depth first search within basic blocks contained by a single loop.
Represents a single loop in the control flow graph.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
static MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
This class implements a map that also provides access to all stored values in a deterministic order.
reverse_iterator rbegin()
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A vector that has set insertion semantics.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
An efficient, type-erasing, non-owning reference to a callable.
Type * getIndexedType() const
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
static constexpr char const * MappingsAttrName
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
static constexpr char const * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
constexpr int UndefMaskElem
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
gep_type_iterator gep_type_begin(const User *GEP)
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
SmallVector< VFParameter, 8 > Parameters