31#define DEBUG_TYPE "vectorutils"
39 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
75 case Intrinsic::log10:
78 case Intrinsic::minnum:
79 case Intrinsic::maxnum:
80 case Intrinsic::minimum:
81 case Intrinsic::maximum:
82 case Intrinsic::copysign:
83 case Intrinsic::floor:
85 case Intrinsic::trunc:
87 case Intrinsic::nearbyint:
88 case Intrinsic::round:
89 case Intrinsic::roundeven:
92 case Intrinsic::fmuladd:
93 case Intrinsic::is_fpclass:
95 case Intrinsic::canonicalize:
96 case Intrinsic::fptosi_sat:
97 case Intrinsic::fptoui_sat:
98 case Intrinsic::lrint:
99 case Intrinsic::llrint:
108 unsigned ScalarOpdIdx) {
111 case Intrinsic::ctlz:
112 case Intrinsic::cttz:
113 case Intrinsic::is_fpclass:
114 case Intrinsic::powi:
115 return (ScalarOpdIdx == 1);
116 case Intrinsic::smul_fix:
117 case Intrinsic::smul_fix_sat:
118 case Intrinsic::umul_fix:
119 case Intrinsic::umul_fix_sat:
120 return (ScalarOpdIdx == 2);
131 case Intrinsic::fptosi_sat:
132 case Intrinsic::fptoui_sat:
133 case Intrinsic::lrint:
134 case Intrinsic::llrint:
135 return OpdIdx == -1 || OpdIdx == 0;
136 case Intrinsic::is_fpclass:
138 case Intrinsic::powi:
139 return OpdIdx == -1 || OpdIdx == 1;
155 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
156 ID == Intrinsic::experimental_noalias_scope_decl ||
157 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
166 assert(V->getType()->isVectorTy() &&
"Not looking at a vector?");
167 VectorType *VTy = cast<VectorType>(V->getType());
169 if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
170 unsigned Width = FVTy->getNumElements();
176 return C->getAggregateElement(EltNo);
180 if (!isa<ConstantInt>(III->getOperand(2)))
182 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
187 return III->getOperand(1);
190 if (III == III->getOperand(0))
200 if (SVI && isa<FixedVectorType>(SVI->
getType())) {
206 if (InEl < (
int)LHSWidth)
215 if (
Constant *Elt =
C->getAggregateElement(EltNo))
216 if (Elt->isNullValue())
220 if (isa<ScalableVectorType>(VTy))
222 if (EltNo < VTy->getElementCount().getKnownMinValue())
237 if (SplatIndex != -1 && SplatIndex != M)
243 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
252 if (isa<VectorType>(V->getType()))
253 if (
auto *
C = dyn_cast<Constant>(V))
254 return C->getSplatValue();
269 if (isa<VectorType>(V->getType())) {
270 if (isa<UndefValue>(V))
274 if (
auto *
C = dyn_cast<Constant>(V))
275 return C->getSplatValue() !=
nullptr;
278 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
313 const APInt &DemandedElts,
APInt &DemandedLHS,
314 APInt &DemandedRHS,
bool AllowUndefElts) {
318 if (DemandedElts.
isZero())
322 if (
all_of(Mask, [](
int Elt) {
return Elt == 0; })) {
327 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I) {
329 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
330 "Invalid shuffle mask constant");
332 if (!DemandedElts[
I] || (AllowUndefElts && (M < 0)))
343 DemandedRHS.
setBit(M - SrcWidth);
351 assert(Scale > 0 &&
"Unexpected scaling factor");
355 ScaledMask.
assign(Mask.begin(), Mask.end());
360 for (
int MaskElt : Mask) {
363 "Overflowed 32-bits");
365 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
366 ScaledMask.
push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
372 assert(Scale > 0 &&
"Unexpected scaling factor");
376 ScaledMask.
assign(Mask.begin(), Mask.end());
381 int NumElts = Mask.size();
382 if (NumElts % Scale != 0)
386 ScaledMask.
reserve(NumElts / Scale);
391 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
394 int SliceFront = MaskSlice.
front();
395 if (SliceFront < 0) {
403 if (SliceFront % Scale != 0)
406 for (
int i = 1; i < Scale; ++i)
407 if (MaskSlice[i] != SliceFront + i)
409 ScaledMask.
push_back(SliceFront / Scale);
411 Mask = Mask.drop_front(Scale);
412 }
while (!Mask.empty());
414 assert((
int)ScaledMask.
size() * Scale == NumElts &&
"Unexpected scaled mask");
423 unsigned NumSrcElts = Mask.size();
424 assert(NumSrcElts > 0 && NumDstElts > 0 &&
"Unexpected scaling factor");
427 if (NumSrcElts == NumDstElts) {
428 ScaledMask.
assign(Mask.begin(), Mask.end());
433 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
434 "Unexpected scaling factor");
436 if (NumSrcElts > NumDstElts) {
437 int Scale = NumSrcElts / NumDstElts;
441 int Scale = NumDstElts / NumSrcElts;
448 std::array<SmallVector<int, 16>, 2> TmpMasks;
451 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
461 ArrayRef<int> Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
462 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
470 int Sz = Mask.size();
471 unsigned SzDest = Sz / NumOfDestRegs;
472 unsigned SzSrc = Sz / NumOfSrcRegs;
473 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
474 auto &RegMasks = Res[
I];
475 RegMasks.
assign(NumOfSrcRegs, {});
478 for (
unsigned K = 0; K < SzDest; ++K) {
479 int Idx =
I * SzDest + K;
484 int SrcRegIdx = Mask[
Idx] / SzSrc;
487 if (RegMasks[SrcRegIdx].empty())
489 RegMasks[SrcRegIdx][K] = Mask[
Idx] % SzSrc;
493 for (
unsigned I = 0;
I < NumOfUsedRegs; ++
I) {
497 switch (NumSrcRegs) {
506 unsigned SrcReg = std::distance(Dest.begin(), It);
507 SingleInputAction(*It, SrcReg,
I);
522 "Expected undefined mask element.");
523 FirstMask[
Idx] = SecondMask[
Idx] + VF;
528 for (
int Idx = 0, VF = Mask.size();
Idx < VF; ++
Idx) {
538 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
543 if (FirstIdx == SecondIdx) {
549 SecondMask = RegMask;
550 CombineMasks(FirstMask, SecondMask);
551 ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
552 NormalizeMask(FirstMask);
554 SecondMask = FirstMask;
555 SecondIdx = FirstIdx;
557 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
558 CombineMasks(SecondMask, FirstMask);
559 ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
560 Dest[FirstIdx].clear();
561 NormalizeMask(SecondMask);
563 }
while (SecondIdx >= 0);
571 const APInt &DemandedElts,
573 APInt &DemandedRHS) {
574 assert(VectorBitWidth >= 128 &&
"Vectors smaller than 128 bit not supported");
575 int NumLanes = VectorBitWidth / 128;
577 int NumEltsPerLane = NumElts / NumLanes;
578 int HalfEltsPerLane = NumEltsPerLane / 2;
584 for (
int Idx = 0;
Idx != NumElts; ++
Idx) {
585 if (!DemandedElts[
Idx])
587 int LaneIdx = (
Idx / NumEltsPerLane) * NumEltsPerLane;
588 int LocalIdx =
Idx % NumEltsPerLane;
589 if (LocalIdx < HalfEltsPerLane) {
590 DemandedLHS.
setBit(LaneIdx + 2 * LocalIdx);
592 LocalIdx -= HalfEltsPerLane;
593 DemandedRHS.
setBit(LaneIdx + 2 * LocalIdx);
614 bool SeenExtFromIllegalType =
false;
616 for (
auto &
I : *BB) {
619 if (
TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
621 SeenExtFromIllegalType =
true;
624 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
625 !
I.getType()->isVectorTy() &&
626 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
637 if (Worklist.
empty() || (
TTI && !SeenExtFromIllegalType))
641 while (!Worklist.
empty()) {
645 if (!Visited.
insert(Val).second)
649 if (!isa<Instruction>(Val))
655 if (DB.getDemandedBits(
I).getBitWidth() > 64)
658 uint64_t V = DB.getDemandedBits(
I).getZExtValue();
664 if (isa<SExtInst>(
I) || isa<ZExtInst>(
I) || isa<LoadInst>(
I) ||
671 if (isa<BitCastInst>(
I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(
I) ||
672 !
I->getType()->isIntegerTy()) {
673 DBits[Leader] |= ~0ULL;
683 if (DBits[Leader] == ~0ULL)
687 for (
Value *O : cast<User>(
I)->operands()) {
696 for (
auto &
I : DBits)
697 for (
auto *U :
I.first->users())
698 if (U->getType()->isIntegerTy() && DBits.
count(U) == 0)
701 for (
auto I = ECs.
begin(), E = ECs.
end();
I != E; ++
I) {
704 LeaderDemandedBits |= DBits[M];
724 auto *
MI = dyn_cast<Instruction>(M);
727 Type *Ty = M->getType();
729 Ty =
MI->getOperand(0)->getType();
737 auto *CI = dyn_cast<ConstantInt>(U);
741 isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&
742 U.getOperandNo() == 1)
743 return CI->uge(MinBW);
744 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
745 return bit_ceil(BW) > MinBW;
757template <
typename ListT>
762 List.insert(AccGroups);
766 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
767 auto *Item = cast<MDNode>(AccGroupListOp.get());
778 if (AccGroups1 == AccGroups2)
785 if (Union.size() == 0)
787 if (Union.size() == 1)
788 return cast<MDNode>(Union.front());
799 if (!MayAccessMem1 && !MayAccessMem2)
802 return Inst2->
getMetadata(LLVMContext::MD_access_group);
804 return Inst1->
getMetadata(LLVMContext::MD_access_group);
820 if (AccGroupSet2.
count(MD1))
824 auto *Item = cast<MDNode>(Node.get());
826 if (AccGroupSet2.
count(Item))
831 if (Intersection.
size() == 0)
833 if (Intersection.
size() == 1)
834 return cast<MDNode>(Intersection.
front());
848 for (
auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
849 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
850 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
851 LLVMContext::MD_access_group, LLVMContext::MD_mmra}) {
853 for (
int J = 1, E = VL.
size(); MD && J != E; ++J) {
858 case LLVMContext::MD_mmra: {
862 case LLVMContext::MD_tbaa:
865 case LLVMContext::MD_alias_scope:
868 case LLVMContext::MD_fpmath:
871 case LLVMContext::MD_noalias:
872 case LLVMContext::MD_nontemporal:
873 case LLVMContext::MD_invariant_load:
876 case LLVMContext::MD_access_group:
901 for (
unsigned i = 0; i < VF; i++)
902 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
903 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
904 Mask.push_back(Builder.
getInt1(HasMember));
913 for (
unsigned i = 0; i < VF; i++)
914 for (
unsigned j = 0; j < ReplicationFactor; j++)
923 for (
unsigned i = 0; i < VF; i++)
924 for (
unsigned j = 0; j < NumVecs; j++)
925 Mask.push_back(j * VF + i);
933 for (
unsigned i = 0; i < VF; i++)
934 Mask.push_back(Start + i * Stride);
941 unsigned NumUndefs) {
943 for (
unsigned i = 0; i < NumInts; i++)
944 Mask.push_back(Start + i);
946 for (
unsigned i = 0; i < NumUndefs; i++)
955 int NumEltsSigned = NumElts;
956 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
961 for (
int MaskElt : Mask) {
962 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
963 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
975 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
976 assert(VecTy1 && VecTy2 &&
977 VecTy1->getScalarType() == VecTy2->getScalarType() &&
978 "Expect two vectors with the same element type");
980 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
981 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
982 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
984 if (NumElts1 > NumElts2) {
996 unsigned NumVecs = Vecs.
size();
997 assert(NumVecs > 1 &&
"Should be at least two vectors");
1003 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
1004 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1005 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
1006 "Only the last vector may have a different type");
1012 if (NumVecs % 2 != 0)
1013 TmpList.
push_back(ResList[NumVecs - 1]);
1016 NumVecs = ResList.
size();
1017 }
while (NumVecs > 1);
1023 assert(isa<VectorType>(Mask->getType()) &&
1024 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1025 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1027 "Mask must be a vector of i1");
1029 auto *ConstMask = dyn_cast<Constant>(Mask);
1032 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1034 if (isa<ScalableVectorType>(ConstMask->getType()))
1038 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1040 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1041 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1049 assert(isa<VectorType>(Mask->getType()) &&
1050 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1051 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1053 "Mask must be a vector of i1");
1055 auto *ConstMask = dyn_cast<Constant>(Mask);
1058 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1060 if (isa<ScalableVectorType>(ConstMask->getType()))
1064 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1066 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1067 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1075 assert(isa<VectorType>(Mask->getType()) &&
1076 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1077 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1079 "Mask must be a vector of i1");
1081 auto *ConstMask = dyn_cast<Constant>(Mask);
1084 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1086 if (isa<ScalableVectorType>(ConstMask->getType()))
1090 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1092 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1093 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1102 assert(isa<FixedVectorType>(Mask->getType()) &&
1103 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1104 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1106 "Mask must be a fixed width vector of i1");
1108 const unsigned VWidth =
1109 cast<FixedVectorType>(Mask->getType())->getNumElements();
1111 if (
auto *CV = dyn_cast<ConstantVector>(Mask))
1112 for (
unsigned i = 0; i < VWidth; i++)
1113 if (CV->getAggregateElement(i)->isNullValue())
1115 return DemandedElts;
1118bool InterleavedAccessInfo::isStrided(
int Stride) {
1119 unsigned Factor = std::abs(Stride);
1123void InterleavedAccessInfo::collectConstStrideAccesses(
1137 for (
auto &
I : *BB) {
1146 if (
Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1158 true,
false).value_or(0);
1161 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev,
Size,
1203 bool EnablePredicatedInterleavedMemAccesses) {
1209 collectConstStrideAccesses(AccessStrideInfo, Strides);
1211 if (AccessStrideInfo.
empty())
1215 collectDependences();
1236 for (
auto BI = AccessStrideInfo.
rbegin(), E = AccessStrideInfo.
rend();
1239 StrideDescriptor DesB = BI->second;
1245 if (isStrided(DesB.Stride) &&
1246 (!isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1251 GroupB = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1252 if (
B->mayWriteToMemory())
1253 StoreGroups.
insert(GroupB);
1255 LoadGroups.
insert(GroupB);
1259 for (
auto AI = std::next(BI); AI != E; ++AI) {
1261 StrideDescriptor DesA = AI->second;
1286 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1287 A, &*AccessStrideInfo.
find(MemberOfGroupB)))
1288 return MemberOfGroupB;
1298 if (
A->mayWriteToMemory() && GroupA != GroupB) {
1306 if (GroupB && LoadGroups.
contains(GroupB))
1307 DependentInst = DependentMember(GroupB, &*AI);
1308 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1311 if (DependentInst) {
1316 if (GroupA && StoreGroups.
contains(GroupA)) {
1318 "dependence between "
1319 << *
A <<
" and " << *DependentInst <<
'\n');
1320 StoreGroups.
remove(GroupA);
1321 releaseGroup(GroupA);
1327 if (GroupB && LoadGroups.
contains(GroupB)) {
1329 <<
" as complete.\n");
1330 CompletedLoadGroups.
insert(GroupB);
1334 if (CompletedLoadGroups.
contains(GroupB)) {
1342 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1352 (
A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1353 (
A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1358 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1375 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1382 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1383 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1389 GroupB->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1394 <<
" into the interleave group with" << *
B
1396 InterleaveGroupMap[
A] = GroupB;
1399 if (
A->mayReadFromMemory())
1407 std::string FirstOrLast) ->
bool {
1409 assert(Member &&
"Group member does not exist");
1412 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1413 false,
true).value_or(0))
1415 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1417 <<
" group member potentially pointer-wrapping.\n");
1418 releaseGroup(Group);
1436 for (
auto *Group : LoadGroups) {
1448 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1451 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
1452 std::string(
"last"));
1461 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1462 "a reverse access with gaps.\n");
1463 releaseGroup(Group);
1467 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1468 RequiresScalarEpilogue =
true;
1472 for (
auto *Group : StoreGroups) {
1482 if (!EnablePredicatedInterleavedMemAccesses) {
1484 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1486 releaseGroup(Group);
1496 if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string(
"first")))
1500 InvalidateGroupIfMemberMayWrap(Group,
Index, std::string(
"last"));
1514 bool ReleasedGroup = InterleaveGroups.remove_if([&](
auto *Group) {
1515 if (!Group->requiresScalarEpilogue())
1519 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1520 "require a scalar epilogue (not allowed under optsize) and cannot "
1521 "be masked (not enabled). \n");
1522 releaseGroupWithoutRemovingFromSet(Group);
1525 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1526 "scalar epilogue was required");
1527 (void)ReleasedGroup;
1528 RequiresScalarEpilogue =
false;
1531template <
typename InstT>
1540 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1541 [](std::pair<int, Instruction *> p) { return p.second; });
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator member_end() const
member_iterator member_begin(iterator I) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
uint32_t getNumMembers() const
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
This is an important class for using LLVM in a threaded context.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
BlockT * getHeader() const
Store the result of a depth first search within basic blocks contained by a single loop.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
static MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
reverse_iterator rbegin()
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.