114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::
move(Inst)), OffsetFromLeader(std::
move(OffsetFromLeader)) {}
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &
E :
C) {
179 dbgs() <<
" " << *
E.Inst <<
" (offset " <<
E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &
E :
C)
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
221 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
224 InstructionsToMove.
insert(IM);
231 for (
auto BBI =
I->getIterator(),
E =
I->getParent()->end(); BBI !=
E;) {
233 if (!InstructionsToMove.
contains(IM))
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
257 DenseSet<Instruction *> ExtraElements;
260 Vectorizer(Function &F,
AliasAnalysis &AA, AssumptionCache &AC,
261 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
262 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
263 DL(F.getDataLayout()), Builder(SE.
getContext()) {}
268 static const unsigned MaxDepth = 3;
277 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
283 bool runOnChain(Chain &
C);
289 std::vector<Chain> splitChainByContiguity(Chain &
C);
295 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
299 std::vector<Chain> splitChainByAlignment(Chain &
C);
303 bool vectorizeChain(Chain &
C);
306 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
307 Instruction *ContextInst,
309 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
310 Instruction *ContextInst,
312 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
313 Instruction *ContextInst,
319 Type *getChainElemTy(
const Chain &
C);
328 template <
bool IsLoadChain>
330 Instruction *ChainElem, Instruction *ChainBegin,
331 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
332 BatchAAResults &BatchAA);
337 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const;
358 bool accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS, Align Alignment,
359 unsigned VecElemBits)
const;
365 ChainElem createExtraElementAfter(
const ChainElem &PrevElem,
Type *Ty,
366 APInt
Offset, StringRef Prefix,
367 Align Alignment =
Align());
372 FixedVectorType *VecTy);
376 void deleteExtraElements();
379class LoadStoreVectorizerLegacyPass :
public FunctionPass {
383 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {}
387 StringRef getPassName()
const override {
388 return "GPU Load and Store Vectorizer";
391 void getAnalysisUsage(AnalysisUsage &AU)
const override {
403char LoadStoreVectorizerLegacyPass::ID = 0;
406 "Vectorize load and Store instructions",
false,
false)
414 "Vectorize load and store instructions",
false,
false)
417 return new LoadStoreVectorizerLegacyPass();
420bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
422 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
425 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
426 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
427 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
428 TargetTransformInfo &
TTI =
429 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
431 AssumptionCache &AC =
432 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
434 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
440 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
455bool Vectorizer::run() {
482 for (
auto It = Barriers.
begin(), End = std::prev(Barriers.
end()); It != End;
484 Changed |= runOnPseudoBB(*It, *std::next(It));
497 I->eraseFromParent();
501 deleteExtraElements();
510 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
511 if (End != Begin->getParent()->end())
514 dbgs() <<
"<BB end>";
519 for (
const auto &[EqClassKey, EqClass] :
520 collectEquivalenceClasses(Begin, End))
521 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
526bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
531 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
532 <<
" keyed on " << EqClassKey <<
":\n";
533 for (Instruction *
I : EqClass)
534 dbgs() <<
" " << *
I <<
"\n";
537 std::vector<Chain> Chains = gatherChains(EqClass);
539 <<
" nontrivial chains.\n";);
540 for (Chain &
C : Chains)
545bool Vectorizer::runOnChain(Chain &
C) {
547 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
558 for (
auto &
C : splitChainByMayAliasInstrs(
C))
559 for (
auto &
C : splitChainByContiguity(
C))
560 for (
auto &
C : splitChainByAlignment(
C))
565std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
569 sortChainInBBOrder(
C);
572 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
580 for (
const auto &
E :
C)
581 ChainOffsets.insert({&*
E.Inst,
E.OffsetFromLeader});
585 BatchAAResults BatchAA(AA);
598 auto Impl = [&](
auto IsLoad) {
600 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
601 if constexpr (IsLoad())
602 return std::make_pair(
C.begin(),
C.end());
604 return std::make_pair(
C.rbegin(),
C.rend());
606 assert(ChainBegin != ChainEnd);
608 std::vector<Chain> Chains;
611 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
613 ChainOffsets, BatchAA)) {
614 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
615 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
620 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
621 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
622 if (NewChain.
size() > 1) {
624 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
627 Chains.emplace_back(std::move(NewChain));
634 if (NewChain.
size() > 1) {
636 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
639 Chains.emplace_back(std::move(NewChain));
645 return Impl(std::bool_constant<true>());
648 return Impl(std::bool_constant<false>());
651std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
655 sortChainInOffsetOrder(
C);
658 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
672 Align OptimisticAlign =
Align(MaxVecRegBits / 8);
673 unsigned int MaxVectorNumElems =
674 MaxVecRegBits /
DL.getTypeSizeInBits(ElementType);
681 FixedVectorType *OptimisticVectorType =
693 APInt OffsetOfBestAlignedElemFromLeader =
C[0].OffsetFromLeader;
694 for (
const auto &
E :
C) {
696 if (ElementAlignment > BestAlignedElemAlign) {
697 BestAlignedElemAlign = ElementAlignment;
698 OffsetOfBestAlignedElemFromLeader =
E.OffsetFromLeader;
702 auto DeriveAlignFromBestAlignedElem = [&](APInt NewElemOffsetFromLeader) {
704 BestAlignedElemAlign,
705 (NewElemOffsetFromLeader - OffsetOfBestAlignedElemFromLeader)
710 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
712 std::vector<Chain> Ret;
713 Ret.push_back({
C.front()});
715 unsigned ChainElemTyBits =
DL.getTypeSizeInBits(getChainElemTy(
C));
716 ChainElem &Prev =
C[0];
717 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
718 auto &CurChain = Ret.back();
722 APInt PrevReadEnd = Prev.OffsetFromLeader + PrevSzBytes;
727 8 * SzBytes % ChainElemTyBits == 0 &&
728 "Every chain-element size must be a multiple of the element size after "
730 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
732 bool AreContiguous =
false;
733 if (It->OffsetFromLeader.sle(PrevReadEnd)) {
735 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();
736 if (8 * Overlap % ChainElemTyBits == 0)
737 AreContiguous =
true;
741 << (AreContiguous ?
"contiguous" :
"chain-breaker")
742 << *It->Inst <<
" (starts at offset "
743 << It->OffsetFromLeader <<
")\n");
751 bool GapFilled =
false;
752 if (!AreContiguous && TryFillGaps && PrevSzBytes == SzBytes) {
753 APInt GapSzBytes = It->OffsetFromLeader - PrevReadEnd;
754 if (GapSzBytes == PrevSzBytes) {
756 ChainElem NewElem = createExtraElementAfter(
758 DeriveAlignFromBestAlignedElem(PrevReadEnd));
759 CurChain.push_back(NewElem);
765 if ((GapSzBytes == 2 * PrevSzBytes) && (CurChain.size() % 4 == 1)) {
766 ChainElem NewElem1 = createExtraElementAfter(
768 DeriveAlignFromBestAlignedElem(PrevReadEnd));
769 ChainElem NewElem2 = createExtraElementAfter(
771 DeriveAlignFromBestAlignedElem(PrevReadEnd + PrevSzBytes));
772 CurChain.push_back(NewElem1);
773 CurChain.push_back(NewElem2);
778 if (AreContiguous || GapFilled)
779 CurChain.push_back(*It);
781 Ret.push_back({*It});
785 if (ReadEnd.
sge(PrevReadEnd))
790 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
794Type *Vectorizer::getChainElemTy(
const Chain &
C) {
807 if (
any_of(
C, [](
const ChainElem &
E) {
810 return Type::getIntNTy(
815 for (
const ChainElem &
E :
C)
821std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
834 sortChainInOffsetOrder(
C);
837 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
842 auto GetVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
845 ChainSizeBytes, VecTy)
847 ChainSizeBytes, VecTy);
851 for (
const auto &
E :
C) {
854 "Should have filtered out non-power-of-two elements in "
855 "collectEquivalenceClasses.");
865 bool CandidateChainsMayContainExtraLoadsStores =
any_of(
866 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
868 std::vector<Chain> Ret;
869 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
877 APInt PrevReadEnd =
C[CBegin].OffsetFromLeader + Sz;
878 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
879 APInt ReadEnd =
C[CEnd].OffsetFromLeader +
881 unsigned BytesAdded =
882 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
884 if (Sz > VecRegBytes)
886 CandidateChains.emplace_back(CEnd, Sz);
891 for (
auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
893 auto [CEnd, SizeBytes] = *It;
895 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
896 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
898 Type *VecElemTy = getChainElemTy(
C);
902 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
905 assert((8 * SizeBytes) % VecElemBits == 0);
906 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
908 unsigned VF = 8 * VecRegBytes / VecElemBits;
911 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
912 VecElemBits * NumVecElems / 8, VecTy);
913 if (TargetVF != VF && TargetVF < NumVecElems) {
915 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
917 << TargetVF <<
" != VF=" << VF
918 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
932 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
935 Align PrefAlign =
Align(StackAdjustedAlignment);
936 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
937 accessIsAllowedAndFast(SizeBytes, AS, PrefAlign, VecElemBits)) {
939 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
940 if (NewAlign >= Alignment) {
942 <<
"LSV: splitByChain upgrading alloca alignment from "
943 << Alignment.
value() <<
" to " << NewAlign.
value()
945 Alignment = NewAlign;
949 Chain ExtendingLoadsStores;
950 if (!accessIsAllowedAndFast(SizeBytes, AS, Alignment, VecElemBits)) {
954 bool AllowedAndFast =
false;
959 assert(VecElemBits % 8 == 0);
960 unsigned VecElemBytes = VecElemBits / 8;
962 unsigned NewSizeBytes = VecElemBytes * NewNumVecElems;
965 "TargetVF expected to be a power of 2");
966 assert(NewNumVecElems <= TargetVF &&
967 "Should not extend past TargetVF");
970 <<
"LSV: attempting to extend chain of " << NumVecElems
971 <<
" " << (IsLoadChain ?
"loads" :
"stores") <<
" to "
972 << NewNumVecElems <<
" elements\n");
973 bool IsLegalToExtend =
983 if (IsLegalToExtend &&
984 accessIsAllowedAndFast(NewSizeBytes, AS, Alignment,
987 <<
"LSV: extending " << (IsLoadChain ?
"load" :
"store")
988 <<
" chain of " << NumVecElems <<
" "
989 << (IsLoadChain ?
"loads" :
"stores")
990 <<
" with total byte size of " << SizeBytes <<
" to "
991 << NewNumVecElems <<
" "
992 << (IsLoadChain ?
"loads" :
"stores")
993 <<
" with total byte size of " << NewSizeBytes
994 <<
", TargetVF=" << TargetVF <<
" \n");
1000 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1001 for (
unsigned I = 0;
I < (NewNumVecElems - NumVecElems);
I++) {
1002 ChainElem NewElem = createExtraElementAfter(
1003 C[CBegin], VecElemTy,
1004 APInt(ASPtrBits, SizeBytes +
I * VecElemBytes),
"Extend");
1005 ExtendingLoadsStores.push_back(NewElem);
1009 SizeBytes = NewSizeBytes;
1010 NumVecElems = NewNumVecElems;
1011 AllowedAndFast =
true;
1014 if (!AllowedAndFast) {
1017 <<
"LSV: splitChainByAlignment discarding candidate chain "
1018 "because its alignment is not AllowedAndFast: "
1019 << Alignment.
value() <<
"\n");
1029 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
1030 "because !isLegalToVectorizeLoad/StoreChain.");
1034 if (CandidateChainsMayContainExtraLoadsStores) {
1046 [
this](
const ChainElem &
E) {
1050 if (CurrCandContainsExtraLoadsStores &&
1058 <<
"LSV: splitChainByAlignment discarding candidate chain "
1059 "because it contains extra loads/stores that we cannot "
1060 "legally vectorize into a masked load/store \n");
1067 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
1068 NewChain.emplace_back(
C[
I]);
1069 for (ChainElem
E : ExtendingLoadsStores)
1070 NewChain.emplace_back(
E);
1078bool Vectorizer::vectorizeChain(Chain &
C) {
1083 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
1087 if (
C.size() == 2 && ChainContainsExtraLoadsStores)
1090 sortChainInOffsetOrder(
C);
1093 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
1097 Type *VecElemTy = getChainElemTy(
C);
1101 APInt PrevReadEnd =
C[0].OffsetFromLeader + BytesAdded;
1102 unsigned ChainBytes = BytesAdded;
1103 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
1105 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
1108 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
1109 ChainBytes += BytesAdded;
1113 assert(8 * ChainBytes %
DL.getTypeSizeInBits(VecElemTy) == 0);
1116 unsigned NumElem = 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy);
1122 if (AS ==
DL.getAllocaAddrSpace()) {
1123 Alignment = std::max(
1126 MaybeAlign(),
DL,
C[0].Inst,
nullptr, &DT));
1131 for (
const ChainElem &
E :
C)
1133 DL.getTypeStoreSize(VecElemTy));
1142 return A.Inst->comesBefore(
B.Inst);
1147 if (ChainContainsExtraLoadsStores) {
1164 for (
const ChainElem &
E :
C) {
1169 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1170 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1181 if (
V->getType() !=
I->getType())
1209 return A.Inst->comesBefore(
B.Inst);
1214 auto InsertElem = [&](
Value *
V,
unsigned VecIdx) {
1215 if (
V->getType() != VecElemTy)
1219 for (
const ChainElem &
E :
C) {
1222 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1223 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1224 if (FixedVectorType *VT =
1226 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
1232 InsertElem(
I->getValueOperand(), VecIdx);
1238 if (ChainContainsExtraLoadsStores) {
1255 for (
const ChainElem &
E :
C)
1256 ToErase.emplace_back(
E.Inst);
1258 ++NumVectorInstructions;
1259 NumScalarsVectorized +=
C.size();
1263template <
bool IsLoadChain>
1264bool Vectorizer::isSafeToMove(
1265 Instruction *ChainElem, Instruction *ChainBegin,
1266 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1267 BatchAAResults &BatchAA) {
1268 LLVM_DEBUG(
dbgs() <<
"LSV: isSafeToMove(" << *ChainElem <<
" -> "
1269 << *ChainBegin <<
")\n");
1272 if (ChainElem == ChainBegin)
1280 auto BBIt = std::next([&] {
1281 if constexpr (IsLoadChain)
1286 auto BBItEnd = std::next([&] {
1287 if constexpr (IsLoadChain)
1293 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1294 const unsigned ChainElemSize =
1297 for (; BBIt != BBItEnd; ++BBIt) {
1300 if (!
I->mayReadOrWriteMemory())
1317 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1324 const APInt &IOffset = OffsetIt->second;
1326 if (IOffset == ChainElemOffset ||
1327 (IOffset.
sle(ChainElemOffset) &&
1328 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1329 (ChainElemOffset.sle(IOffset) &&
1330 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1337 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1349 <<
" Aliasing instruction:\n"
1350 <<
" " << *
I <<
'\n'
1351 <<
" Aliased instruction and pointer:\n"
1352 <<
" " << *ChainElem <<
'\n'
1370 unsigned MatchingOpIdxB,
bool Signed) {
1371 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1372 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1373 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1374 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1375 <<
", Signed=" <<
Signed <<
"\n");
1391 AddOpB->
getOpcode() == Instruction::Add &&
1395 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1396 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1400 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1405 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1410 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1415 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1421 if (OtherInstrA && OtherInstrB &&
1422 OtherInstrA->
getOpcode() == Instruction::Add &&
1423 OtherInstrB->
getOpcode() == Instruction::Add &&
1440std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1442 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1443 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1444 <<
" Depth=" <<
Depth <<
"\n");
1448 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1452 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1453 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1454 return std::nullopt;
1457 for (
unsigned I = 0,
E = GEPA->getNumIndices() - 1;
I <
E; ++
I) {
1459 return std::nullopt;
1468 return std::nullopt;
1474 return std::nullopt;
1482 return std::nullopt;
1484 const SCEV *OffsetSCEVA = SE.
getSCEV(ValA);
1485 const SCEV *OffsetSCEVB = SE.
getSCEV(OpB);
1486 const SCEV *IdxDiffSCEV = SE.
getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1488 return std::nullopt;
1492 return std::nullopt;
1495 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1503 if (OpB->
getOpcode() == Instruction::Add &&
1512 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1518 for (
unsigned MatchingOpIdxA : {0, 1})
1519 for (
unsigned MatchingOpIdxB : {0, 1})
1540 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.
getBitWidth());
1543 Safe = BitsAllowedToBeSet.
uge(IdxDiff.
abs());
1547 return IdxDiff * Stride;
1548 return std::nullopt;
1551std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1553 if (
Depth++ == MaxDepth)
1554 return std::nullopt;
1558 if (SelectA->getCondition() != SelectB->getCondition())
1559 return std::nullopt;
1560 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1561 <<
", PtrB=" << *PtrB <<
", ContextInst="
1562 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1563 std::optional<APInt> TrueDiff = getConstantOffset(
1564 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1566 return std::nullopt;
1567 std::optional<APInt> FalseDiff =
1568 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1569 ContextInst,
Depth);
1570 if (TrueDiff == FalseDiff)
1574 return std::nullopt;
1577void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const {
1578 if (EQClasses.size() < 2)
1583 static_assert(std::tuple_size_v<EqClassKey> == 4,
1584 "EqClassKey has changed - EqClassReducedKey needs changes too");
1585 using EqClassReducedKey =
1586 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1587 std::tuple_element_t<2, EqClassKey> ,
1588 std::tuple_element_t<3, EqClassKey> >;
1589 using ECReducedKeyToUnderlyingObjectMap =
1590 MapVector<EqClassReducedKey,
1591 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1596 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1597 bool FoundPotentiallyOptimizableEC =
false;
1598 for (
const auto &EC : EQClasses) {
1599 const auto &
Key =
EC.first;
1600 EqClassReducedKey RedKey{std::get<1>(
Key), std::get<2>(
Key),
1602 auto &UOMap = RedKeyToUOMap[RedKey];
1604 if (UOMap.size() > 1)
1605 FoundPotentiallyOptimizableEC =
true;
1607 if (!FoundPotentiallyOptimizableEC)
1611 dbgs() <<
"LSV: mergeEquivalenceClasses: before merging:\n";
1612 for (
const auto &EC : EQClasses) {
1613 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1614 for (
const auto &Inst :
EC.second)
1615 dbgs() <<
" Inst: " << *Inst <<
'\n';
1619 dbgs() <<
"LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1620 for (
const auto &RedKeyToUO : RedKeyToUOMap) {
1621 dbgs() <<
" Reduced key: {" << std::get<0>(RedKeyToUO.first) <<
", "
1622 << std::get<1>(RedKeyToUO.first) <<
", "
1623 <<
static_cast<int>(std::get<2>(RedKeyToUO.first)) <<
"} --> "
1624 << RedKeyToUO.second.size() <<
" underlying objects:\n";
1625 for (
auto UObject : RedKeyToUO.second)
1626 dbgs() <<
" " << *UObject <<
'\n';
1630 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1633 auto GetUltimateTargets =
1634 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1635 UObjectToUObjectMap IndirectionMap;
1636 for (
const auto *UObject : UObjects) {
1637 const unsigned MaxLookupDepth = 1;
1639 if (UltimateTarget != UObject)
1640 IndirectionMap[UObject] = UltimateTarget;
1642 UObjectToUObjectMap UltimateTargetsMap;
1643 for (
const auto *UObject : UObjects) {
1645 auto It = IndirectionMap.find(Target);
1646 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1648 UltimateTargetsMap[UObject] =
Target;
1650 return UltimateTargetsMap;
1655 for (
auto &[RedKey, UObjects] : RedKeyToUOMap) {
1656 if (UObjects.size() < 2)
1658 auto UTMap = GetUltimateTargets(UObjects);
1659 for (
const auto &[UObject, UltimateTarget] : UTMap) {
1660 if (UObject == UltimateTarget)
1663 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1664 std::get<2>(RedKey)};
1665 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1666 std::get<2>(RedKey)};
1669 const auto &VecTo = EQClasses[KeyTo];
1670 const auto &VecFrom = EQClasses[KeyFrom];
1671 SmallVector<Instruction *, 8> MergedVec;
1672 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1673 std::back_inserter(MergedVec),
1674 [](Instruction *
A, Instruction *
B) {
1675 return A && B && A->comesBefore(B);
1677 EQClasses[KeyTo] = std::move(MergedVec);
1678 EQClasses.erase(KeyFrom);
1682 dbgs() <<
"LSV: mergeEquivalenceClasses: after merging:\n";
1683 for (
const auto &EC : EQClasses) {
1684 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1685 for (
const auto &Inst :
EC.second)
1686 dbgs() <<
" Inst: " << *Inst <<
'\n';
1694 EquivalenceClassMap Ret;
1696 auto GetUnderlyingObject = [](
const Value *Ptr) ->
const Value * {
1705 return Sel->getCondition();
1716 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1729 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1730 if ((TySize % 8) != 0)
1744 unsigned VF = VecRegSize / TySize;
1749 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1753 if (TySize > VecRegSize / 2 ||
1757 Ret[{GetUnderlyingObject(Ptr), AS,
1763 mergeEquivalenceClasses(Ret);
1772 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1776 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1777 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1786 struct InstrListElem : ilist_node<InstrListElem>,
1787 std::pair<Instruction *, Chain> {
1788 explicit InstrListElem(Instruction *
I)
1791 struct InstrListElemDenseMapInfo {
1792 using PtrInfo = DenseMapInfo<InstrListElem *>;
1793 using IInfo = DenseMapInfo<Instruction *>;
1794 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1795 static InstrListElem *getTombstoneKey() {
1796 return PtrInfo::getTombstoneKey();
1798 static unsigned getHashValue(
const InstrListElem *
E) {
1799 return IInfo::getHashValue(
E->first);
1801 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1802 if (
A == getEmptyKey() ||
B == getEmptyKey())
1803 return A == getEmptyKey() &&
B == getEmptyKey();
1804 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1805 return A == getTombstoneKey() &&
B == getTombstoneKey();
1806 return IInfo::isEqual(
A->first,
B->first);
1809 SpecificBumpPtrAllocator<InstrListElem>
Allocator;
1810 simple_ilist<InstrListElem> MRU;
1811 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1816 for (Instruction *
I : Instrs) {
1817 constexpr int MaxChainsToTry = 64;
1819 bool MatchFound =
false;
1820 auto ChainIter = MRU.
begin();
1821 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1823 if (std::optional<APInt>
Offset = getConstantOffset(
1827 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first))) {
1830 ChainIter->second.emplace_back(
I,
Offset.value());
1840 APInt ZeroOffset(ASPtrBits, 0);
1841 InstrListElem *
E =
new (
Allocator.Allocate()) InstrListElem(
I);
1842 E->second.emplace_back(
I, ZeroOffset);
1848 std::vector<Chain> Ret;
1849 Ret.reserve(Chains.
size());
1852 if (
E.second.size() > 1)
1853 Ret.emplace_back(std::move(
E.second));
1857std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1858 Instruction *ContextInst,
1861 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1862 <<
", Depth=" <<
Depth <<
"\n");
1865 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1866 APInt OffsetA(OrigBitWidth, 0);
1867 APInt OffsetB(OrigBitWidth, 0);
1870 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1871 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1872 return std::nullopt;
1877 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1878 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1880 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1881 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1883 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1888 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1894 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1897 if (std::optional<APInt> Diff =
1898 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth))
1899 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1900 .sextOrTrunc(OrigBitWidth);
1901 return std::nullopt;
1904bool Vectorizer::accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS,
1906 unsigned VecElemBits)
const {
1908 if (Alignment.
value() % SizeBytes == 0)
1912 unsigned VectorizedSpeed = 0;
1914 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
1915 if (!AllowsMisaligned) {
1917 dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace " << AS
1918 <<
" with alignment " << Alignment.
value()
1919 <<
" is misaligned, and therefore can't be vectorized.\n");
1923 unsigned ElementwiseSpeed = 0;
1924 (
TTI).allowsMisalignedMemoryAccesses((
F).
getContext(), VecElemBits, AS,
1925 Alignment, &ElementwiseSpeed);
1926 if (VectorizedSpeed < ElementwiseSpeed) {
1927 LLVM_DEBUG(
dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
1928 << AS <<
" with alignment " << Alignment.
value()
1929 <<
" has relative speed " << VectorizedSpeed
1930 <<
", which is lower than the elementwise speed of "
1932 <<
". Therefore this access won't be vectorized.\n");
1938ChainElem Vectorizer::createExtraElementAfter(
const ChainElem &Prev,
Type *Ty,
1939 APInt
Offset, StringRef Prefix,
1945 PrevLoad->getPointerOperand(), Builder.
getInt(
Offset), Prefix +
"GEP");
1946 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1953 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1963 ExtraElements.
insert(NewElement);
1965 APInt NewOffsetFromLeader = Prev.OffsetFromLeader +
Offset;
1968 <<
" OffsetFromLeader: " << NewOffsetFromLeader <<
"\n");
1969 return ChainElem{NewElement, NewOffsetFromLeader};
1973 FixedVectorType *VecTy) {
1979 for (
const ChainElem &
E :
C) {
1983 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1986 if (FixedVectorType *VT =
1988 for (
unsigned J = 0; J < VT->getNumElements(); ++J)
1989 MaskElts[VecIdx + J] = Builder.
getInt1(
true);
1991 MaskElts[VecIdx] = Builder.
getInt1(
true);
1996void Vectorizer::deleteExtraElements() {
1997 for (
auto *ExtraElement : ExtraElements) {
1999 [[maybe_unused]]
bool Deleted =
2001 assert(
Deleted &&
"Extra Load should always be trivially dead");
2007 ExtraElement->eraseFromParent();
2012 ExtraElements.clear();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Value * getPointerOperand()
Analysis pass providing the TargetTransformInfo.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
ElementType
The element type of an SRV or UAV resource.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.