114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::
move(Inst)), OffsetFromLeader(std::
move(OffsetFromLeader)) {}
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &
E :
C) {
179 dbgs() <<
" " << *
E.Inst <<
" (offset " <<
E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &
E :
C)
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
221 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
224 InstructionsToMove.
insert(IM);
231 for (
auto BBI =
I->getIterator(),
E =
I->getParent()->end(); BBI !=
E;) {
233 if (!InstructionsToMove.
contains(IM))
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
257 DenseSet<Instruction *> ExtraElements;
260 Vectorizer(Function &F,
AliasAnalysis &AA, AssumptionCache &AC,
261 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
262 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
263 DL(F.getDataLayout()), Builder(SE.
getContext()) {}
268 static const unsigned MaxDepth = 3;
277 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
283 bool runOnChain(Chain &
C);
289 std::vector<Chain> splitChainByContiguity(Chain &
C);
295 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
299 std::vector<Chain> splitChainByAlignment(Chain &
C);
303 bool vectorizeChain(Chain &
C);
306 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
307 Instruction *ContextInst,
309 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
310 Instruction *ContextInst,
312 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
313 Instruction *ContextInst,
319 Type *getChainElemTy(
const Chain &
C);
328 template <
bool IsLoadChain>
330 Instruction *ChainElem, Instruction *ChainBegin,
331 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
332 BatchAAResults &BatchAA);
337 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const;
358 bool accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS, Align Alignment,
359 unsigned VecElemBits)
const;
365 ChainElem createExtraElementAfter(
const ChainElem &PrevElem,
Type *Ty,
366 APInt
Offset, StringRef Prefix,
367 Align Alignment =
Align());
372 FixedVectorType *VecTy);
376 void deleteExtraElements();
379class LoadStoreVectorizerLegacyPass :
public FunctionPass {
383 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {}
387 StringRef getPassName()
const override {
388 return "GPU Load and Store Vectorizer";
391 void getAnalysisUsage(AnalysisUsage &AU)
const override {
403char LoadStoreVectorizerLegacyPass::ID = 0;
406 "Vectorize load and Store instructions",
false,
false)
414 "Vectorize load and store instructions",
false,
false)
417 return new LoadStoreVectorizerLegacyPass();
420bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
422 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
425 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
426 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
427 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
428 TargetTransformInfo &
TTI =
429 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
431 AssumptionCache &AC =
432 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
434 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
440 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
455bool Vectorizer::run() {
482 for (
auto It = Barriers.
begin(), End = std::prev(Barriers.
end()); It != End;
484 Changed |= runOnPseudoBB(*It, *std::next(It));
497 I->eraseFromParent();
501 deleteExtraElements();
510 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
511 if (End != Begin->getParent()->end())
514 dbgs() <<
"<BB end>";
519 for (
const auto &[EqClassKey, EqClass] :
520 collectEquivalenceClasses(Begin, End))
521 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
526bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
531 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
532 <<
" keyed on " << EqClassKey <<
":\n";
533 for (Instruction *
I : EqClass)
534 dbgs() <<
" " << *
I <<
"\n";
537 std::vector<Chain> Chains = gatherChains(EqClass);
539 <<
" nontrivial chains.\n";);
540 for (Chain &
C : Chains)
545bool Vectorizer::runOnChain(Chain &
C) {
547 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
558 for (
auto &
C : splitChainByMayAliasInstrs(
C))
559 for (
auto &
C : splitChainByContiguity(
C))
560 for (
auto &
C : splitChainByAlignment(
C))
565std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
569 sortChainInBBOrder(
C);
572 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
580 for (
const auto &
E :
C)
581 ChainOffsets.insert({&*
E.Inst,
E.OffsetFromLeader});
585 BatchAAResults BatchAA(AA);
598 auto Impl = [&](
auto IsLoad) {
600 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
601 if constexpr (IsLoad())
602 return std::make_pair(
C.begin(),
C.end());
604 return std::make_pair(
C.rbegin(),
C.rend());
606 assert(ChainBegin != ChainEnd);
608 std::vector<Chain> Chains;
611 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
613 ChainOffsets, BatchAA)) {
614 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
615 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
620 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
621 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
622 if (NewChain.
size() > 1) {
624 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
627 Chains.emplace_back(std::move(NewChain));
634 if (NewChain.
size() > 1) {
636 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
639 Chains.emplace_back(std::move(NewChain));
645 return Impl(std::bool_constant<true>());
648 return Impl(std::bool_constant<false>());
651std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
655 sortChainInOffsetOrder(
C);
658 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
672 Align OptimisticAlign =
Align(MaxVecRegBits / 8);
673 unsigned int MaxVectorNumElems =
674 MaxVecRegBits /
DL.getTypeSizeInBits(ElementType);
681 FixedVectorType *OptimisticVectorType =
693 APInt OffsetOfBestAlignedElemFromLeader =
C[0].OffsetFromLeader;
694 for (
const auto &
E :
C) {
696 if (ElementAlignment > BestAlignedElemAlign) {
697 BestAlignedElemAlign = ElementAlignment;
698 OffsetOfBestAlignedElemFromLeader =
E.OffsetFromLeader;
702 auto DeriveAlignFromBestAlignedElem = [&](APInt NewElemOffsetFromLeader) {
704 BestAlignedElemAlign,
705 (NewElemOffsetFromLeader - OffsetOfBestAlignedElemFromLeader)
710 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
712 std::vector<Chain> Ret;
713 Ret.push_back({
C.front()});
715 unsigned ChainElemTyBits =
DL.getTypeSizeInBits(getChainElemTy(
C));
716 ChainElem &Prev =
C[0];
717 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
718 auto &CurChain = Ret.back();
722 APInt PrevReadEnd = Prev.OffsetFromLeader + PrevSzBytes;
727 8 * SzBytes % ChainElemTyBits == 0 &&
728 "Every chain-element size must be a multiple of the element size after "
730 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
732 bool AreContiguous =
false;
733 if (It->OffsetFromLeader.sle(PrevReadEnd)) {
735 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();
736 if (8 * Overlap % ChainElemTyBits == 0)
737 AreContiguous =
true;
741 << (AreContiguous ?
"contiguous" :
"chain-breaker")
742 << *It->Inst <<
" (starts at offset "
743 << It->OffsetFromLeader <<
")\n");
751 bool GapFilled =
false;
752 if (!AreContiguous && TryFillGaps && PrevSzBytes == SzBytes) {
753 APInt GapSzBytes = It->OffsetFromLeader - PrevReadEnd;
754 if (GapSzBytes == PrevSzBytes) {
756 ChainElem NewElem = createExtraElementAfter(
758 DeriveAlignFromBestAlignedElem(PrevReadEnd));
759 CurChain.push_back(NewElem);
765 if ((GapSzBytes == 2 * PrevSzBytes) && (CurChain.size() % 4 == 1)) {
766 ChainElem NewElem1 = createExtraElementAfter(
768 DeriveAlignFromBestAlignedElem(PrevReadEnd));
769 ChainElem NewElem2 = createExtraElementAfter(
771 DeriveAlignFromBestAlignedElem(PrevReadEnd + PrevSzBytes));
772 CurChain.push_back(NewElem1);
773 CurChain.push_back(NewElem2);
778 if (AreContiguous || GapFilled)
779 CurChain.push_back(*It);
781 Ret.push_back({*It});
785 if (ReadEnd.
sge(PrevReadEnd))
790 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
794Type *Vectorizer::getChainElemTy(
const Chain &
C) {
807 if (
any_of(
C, [](
const ChainElem &
E) {
810 return Type::getIntNTy(
815 for (
const ChainElem &
E :
C)
821std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
834 sortChainInOffsetOrder(
C);
837 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
842 auto GetVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
845 ChainSizeBytes, VecTy)
847 ChainSizeBytes, VecTy);
851 for (
const auto &
E :
C) {
854 "Should have filtered out non-power-of-two elements in "
855 "collectEquivalenceClasses.");
865 bool CandidateChainsMayContainExtraLoadsStores =
any_of(
866 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
868 std::vector<Chain> Ret;
869 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
877 APInt PrevReadEnd =
C[CBegin].OffsetFromLeader + Sz;
878 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
879 APInt ReadEnd =
C[CEnd].OffsetFromLeader +
881 unsigned BytesAdded =
882 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
884 if (Sz > VecRegBytes)
886 CandidateChains.emplace_back(CEnd, Sz);
891 for (
auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
893 auto [CEnd, SizeBytes] = *It;
895 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
896 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
898 Type *VecElemTy = getChainElemTy(
C);
902 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
905 assert((8 * SizeBytes) % VecElemBits == 0);
906 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
908 unsigned VF = 8 * VecRegBytes / VecElemBits;
911 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
912 VecElemBits * NumVecElems / 8, VecTy);
913 if (TargetVF != VF && TargetVF < NumVecElems) {
915 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
917 << TargetVF <<
" != VF=" << VF
918 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
932 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
935 Align PrefAlign =
Align(StackAdjustedAlignment);
936 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
937 accessIsAllowedAndFast(SizeBytes, AS, PrefAlign, VecElemBits)) {
939 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
940 if (NewAlign >= Alignment) {
942 <<
"LSV: splitByChain upgrading alloca alignment from "
943 << Alignment.
value() <<
" to " << NewAlign.
value()
945 Alignment = NewAlign;
949 Chain ExtendingLoadsStores;
950 if (!accessIsAllowedAndFast(SizeBytes, AS, Alignment, VecElemBits)) {
954 bool AllowedAndFast =
false;
959 assert(VecElemBits % 8 == 0);
960 unsigned VecElemBytes = VecElemBits / 8;
962 unsigned NewSizeBytes = VecElemBytes * NewNumVecElems;
965 "TargetVF expected to be a power of 2");
966 assert(NewNumVecElems <= TargetVF &&
967 "Should not extend past TargetVF");
970 <<
"LSV: attempting to extend chain of " << NumVecElems
971 <<
" " << (IsLoadChain ?
"loads" :
"stores") <<
" to "
972 << NewNumVecElems <<
" elements\n");
973 bool IsLegalToExtend =
983 if (IsLegalToExtend &&
984 accessIsAllowedAndFast(NewSizeBytes, AS, Alignment,
987 <<
"LSV: extending " << (IsLoadChain ?
"load" :
"store")
988 <<
" chain of " << NumVecElems <<
" "
989 << (IsLoadChain ?
"loads" :
"stores")
990 <<
" with total byte size of " << SizeBytes <<
" to "
991 << NewNumVecElems <<
" "
992 << (IsLoadChain ?
"loads" :
"stores")
993 <<
" with total byte size of " << NewSizeBytes
994 <<
", TargetVF=" << TargetVF <<
" \n");
1000 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1001 for (
unsigned I = 0;
I < (NewNumVecElems - NumVecElems);
I++) {
1002 ChainElem NewElem = createExtraElementAfter(
1003 C[CBegin], VecElemTy,
1004 APInt(ASPtrBits, SizeBytes +
I * VecElemBytes),
"Extend");
1005 ExtendingLoadsStores.push_back(NewElem);
1009 SizeBytes = NewSizeBytes;
1010 NumVecElems = NewNumVecElems;
1011 AllowedAndFast =
true;
1014 if (!AllowedAndFast) {
1017 <<
"LSV: splitChainByAlignment discarding candidate chain "
1018 "because its alignment is not AllowedAndFast: "
1019 << Alignment.
value() <<
"\n");
1029 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
1030 "because !isLegalToVectorizeLoad/StoreChain.");
1034 if (CandidateChainsMayContainExtraLoadsStores) {
1046 [
this](
const ChainElem &
E) {
1050 if (CurrCandContainsExtraLoadsStores &&
1058 <<
"LSV: splitChainByAlignment discarding candidate chain "
1059 "because it contains extra loads/stores that we cannot "
1060 "legally vectorize into a masked load/store \n");
1067 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
1068 NewChain.emplace_back(
C[
I]);
1069 for (ChainElem
E : ExtendingLoadsStores)
1070 NewChain.emplace_back(
E);
1078bool Vectorizer::vectorizeChain(Chain &
C) {
1083 C, [
this](
const ChainElem &
E) {
return ExtraElements.
contains(
E.Inst); });
1087 if (
C.size() == 2 && ChainContainsExtraLoadsStores)
1090 sortChainInOffsetOrder(
C);
1093 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
1097 Type *VecElemTy = getChainElemTy(
C);
1101 APInt PrevReadEnd =
C[0].OffsetFromLeader + BytesAdded;
1102 unsigned ChainBytes = BytesAdded;
1103 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
1105 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
1108 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
1109 ChainBytes += BytesAdded;
1113 assert(8 * ChainBytes %
DL.getTypeSizeInBits(VecElemTy) == 0);
1116 unsigned NumElem = 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy);
1122 if (AS ==
DL.getAllocaAddrSpace()) {
1123 Alignment = std::max(
1126 MaybeAlign(),
DL,
C[0].Inst,
nullptr, &DT));
1131 for (
const ChainElem &
E :
C)
1133 DL.getTypeStoreSize(VecElemTy));
1142 return A.Inst->comesBefore(
B.Inst);
1147 if (ChainContainsExtraLoadsStores) {
1164 for (
const ChainElem &
E :
C) {
1169 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1170 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1181 if (
V->getType() !=
I->getType())
1209 return A.Inst->comesBefore(
B.Inst);
1214 auto InsertElem = [&](
Value *
V,
unsigned VecIdx) {
1215 if (
V->getType() != VecElemTy)
1219 for (
const ChainElem &
E :
C) {
1222 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1223 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1224 if (FixedVectorType *VT =
1226 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
1232 InsertElem(
I->getValueOperand(), VecIdx);
1238 if (ChainContainsExtraLoadsStores) {
1255 for (
const ChainElem &
E :
C)
1256 ToErase.emplace_back(
E.Inst);
1258 ++NumVectorInstructions;
1259 NumScalarsVectorized +=
C.size();
1263template <
bool IsLoadChain>
1264bool Vectorizer::isSafeToMove(
1265 Instruction *ChainElem, Instruction *ChainBegin,
1266 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1267 BatchAAResults &BatchAA) {
1268 LLVM_DEBUG(
dbgs() <<
"LSV: isSafeToMove(" << *ChainElem <<
" -> "
1269 << *ChainBegin <<
")\n");
1272 if (ChainElem == ChainBegin)
1280 auto BBIt = std::next([&] {
1281 if constexpr (IsLoadChain)
1286 auto BBItEnd = std::next([&] {
1287 if constexpr (IsLoadChain)
1293 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1294 const unsigned ChainElemSize =
1297 for (; BBIt != BBItEnd; ++BBIt) {
1300 if (!
I->mayReadOrWriteMemory())
1317 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1324 const APInt &IOffset = OffsetIt->second;
1326 if (IOffset == ChainElemOffset ||
1327 (IOffset.
sle(ChainElemOffset) &&
1328 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1329 (ChainElemOffset.sle(IOffset) &&
1330 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1337 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1349 <<
" Aliasing instruction:\n"
1350 <<
" " << *
I <<
'\n'
1351 <<
" Aliased instruction and pointer:\n"
1352 <<
" " << *ChainElem <<
'\n'
1370 unsigned MatchingOpIdxB,
bool Signed) {
1371 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1372 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1373 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1374 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1375 <<
", Signed=" <<
Signed <<
"\n");
1391 AddOpB->
getOpcode() == Instruction::Add &&
1395 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1396 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1400 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1405 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1410 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1415 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1421 if (OtherInstrA && OtherInstrB &&
1422 OtherInstrA->
getOpcode() == Instruction::Add &&
1423 OtherInstrB->
getOpcode() == Instruction::Add &&
1440std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1442 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1443 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1444 <<
" Depth=" <<
Depth <<
"\n");
1448 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1452 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1453 GEPA->getPointerOperand() != GEPB->getPointerOperand() ||
1454 GEPA->getSourceElementType() != GEPB->getSourceElementType())
1455 return std::nullopt;
1458 for (
unsigned I = 0,
E = GEPA->getNumIndices() - 1;
I <
E; ++
I) {
1460 return std::nullopt;
1469 return std::nullopt;
1475 return std::nullopt;
1483 return std::nullopt;
1485 const SCEV *OffsetSCEVA = SE.
getSCEV(ValA);
1486 const SCEV *OffsetSCEVB = SE.
getSCEV(OpB);
1487 const SCEV *IdxDiffSCEV = SE.
getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1489 return std::nullopt;
1493 return std::nullopt;
1496 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1504 if (OpB->
getOpcode() == Instruction::Add &&
1513 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1519 for (
unsigned MatchingOpIdxA : {0, 1})
1520 for (
unsigned MatchingOpIdxB : {0, 1})
1541 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.
getBitWidth());
1544 Safe = BitsAllowedToBeSet.
uge(IdxDiff.
abs());
1548 return IdxDiff * Stride;
1549 return std::nullopt;
1552std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1554 if (
Depth++ == MaxDepth)
1555 return std::nullopt;
1559 if (SelectA->getCondition() != SelectB->getCondition())
1560 return std::nullopt;
1561 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1562 <<
", PtrB=" << *PtrB <<
", ContextInst="
1563 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1564 std::optional<APInt> TrueDiff = getConstantOffset(
1565 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1567 return std::nullopt;
1568 std::optional<APInt> FalseDiff =
1569 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1570 ContextInst,
Depth);
1571 if (TrueDiff == FalseDiff)
1575 return std::nullopt;
1578void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const {
1579 if (EQClasses.size() < 2)
1584 static_assert(std::tuple_size_v<EqClassKey> == 4,
1585 "EqClassKey has changed - EqClassReducedKey needs changes too");
1586 using EqClassReducedKey =
1587 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1588 std::tuple_element_t<2, EqClassKey> ,
1589 std::tuple_element_t<3, EqClassKey> >;
1590 using ECReducedKeyToUnderlyingObjectMap =
1591 MapVector<EqClassReducedKey,
1592 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1597 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1598 bool FoundPotentiallyOptimizableEC =
false;
1599 for (
const auto &EC : EQClasses) {
1600 const auto &
Key =
EC.first;
1601 EqClassReducedKey RedKey{std::get<1>(
Key), std::get<2>(
Key),
1603 auto &UOMap = RedKeyToUOMap[RedKey];
1605 if (UOMap.size() > 1)
1606 FoundPotentiallyOptimizableEC =
true;
1608 if (!FoundPotentiallyOptimizableEC)
1612 dbgs() <<
"LSV: mergeEquivalenceClasses: before merging:\n";
1613 for (
const auto &EC : EQClasses) {
1614 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1615 for (
const auto &Inst :
EC.second)
1616 dbgs() <<
" Inst: " << *Inst <<
'\n';
1620 dbgs() <<
"LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1621 for (
const auto &RedKeyToUO : RedKeyToUOMap) {
1622 dbgs() <<
" Reduced key: {" << std::get<0>(RedKeyToUO.first) <<
", "
1623 << std::get<1>(RedKeyToUO.first) <<
", "
1624 <<
static_cast<int>(std::get<2>(RedKeyToUO.first)) <<
"} --> "
1625 << RedKeyToUO.second.size() <<
" underlying objects:\n";
1626 for (
auto UObject : RedKeyToUO.second)
1627 dbgs() <<
" " << *UObject <<
'\n';
1631 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1634 auto GetUltimateTargets =
1635 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1636 UObjectToUObjectMap IndirectionMap;
1637 for (
const auto *UObject : UObjects) {
1638 const unsigned MaxLookupDepth = 1;
1640 if (UltimateTarget != UObject)
1641 IndirectionMap[UObject] = UltimateTarget;
1643 UObjectToUObjectMap UltimateTargetsMap;
1644 for (
const auto *UObject : UObjects) {
1646 auto It = IndirectionMap.find(Target);
1647 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1649 UltimateTargetsMap[UObject] =
Target;
1651 return UltimateTargetsMap;
1656 for (
auto &[RedKey, UObjects] : RedKeyToUOMap) {
1657 if (UObjects.size() < 2)
1659 auto UTMap = GetUltimateTargets(UObjects);
1660 for (
const auto &[UObject, UltimateTarget] : UTMap) {
1661 if (UObject == UltimateTarget)
1664 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1665 std::get<2>(RedKey)};
1666 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1667 std::get<2>(RedKey)};
1670 const auto &VecTo = EQClasses[KeyTo];
1671 const auto &VecFrom = EQClasses[KeyFrom];
1672 SmallVector<Instruction *, 8> MergedVec;
1673 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1674 std::back_inserter(MergedVec),
1675 [](Instruction *
A, Instruction *
B) {
1676 return A && B && A->comesBefore(B);
1678 EQClasses[KeyTo] = std::move(MergedVec);
1679 EQClasses.erase(KeyFrom);
1683 dbgs() <<
"LSV: mergeEquivalenceClasses: after merging:\n";
1684 for (
const auto &EC : EQClasses) {
1685 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1686 for (
const auto &Inst :
EC.second)
1687 dbgs() <<
" Inst: " << *Inst <<
'\n';
1695 EquivalenceClassMap Ret;
1697 auto GetUnderlyingObject = [](
const Value *Ptr) ->
const Value * {
1706 return Sel->getCondition();
1717 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1730 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1731 if ((TySize % 8) != 0)
1745 unsigned VF = VecRegSize / TySize;
1750 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1754 if (TySize > VecRegSize / 2 ||
1758 Ret[{GetUnderlyingObject(Ptr), AS,
1764 mergeEquivalenceClasses(Ret);
1773 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1777 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1778 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1787 struct InstrListElem : ilist_node<InstrListElem>,
1788 std::pair<Instruction *, Chain> {
1789 explicit InstrListElem(Instruction *
I)
1792 struct InstrListElemDenseMapInfo {
1793 using PtrInfo = DenseMapInfo<InstrListElem *>;
1794 using IInfo = DenseMapInfo<Instruction *>;
1795 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1796 static InstrListElem *getTombstoneKey() {
1797 return PtrInfo::getTombstoneKey();
1799 static unsigned getHashValue(
const InstrListElem *
E) {
1800 return IInfo::getHashValue(
E->first);
1802 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1803 if (
A == getEmptyKey() ||
B == getEmptyKey())
1804 return A == getEmptyKey() &&
B == getEmptyKey();
1805 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1806 return A == getTombstoneKey() &&
B == getTombstoneKey();
1807 return IInfo::isEqual(
A->first,
B->first);
1810 SpecificBumpPtrAllocator<InstrListElem>
Allocator;
1811 simple_ilist<InstrListElem> MRU;
1812 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1817 for (Instruction *
I : Instrs) {
1818 constexpr int MaxChainsToTry = 64;
1820 bool MatchFound =
false;
1821 auto ChainIter = MRU.
begin();
1822 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1824 if (std::optional<APInt>
Offset = getConstantOffset(
1828 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first))) {
1831 ChainIter->second.emplace_back(
I,
Offset.value());
1841 APInt ZeroOffset(ASPtrBits, 0);
1842 InstrListElem *
E =
new (
Allocator.Allocate()) InstrListElem(
I);
1843 E->second.emplace_back(
I, ZeroOffset);
1849 std::vector<Chain> Ret;
1850 Ret.reserve(Chains.
size());
1853 if (
E.second.size() > 1)
1854 Ret.emplace_back(std::move(
E.second));
1858std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1859 Instruction *ContextInst,
1862 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1863 <<
", Depth=" <<
Depth <<
"\n");
1866 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1867 APInt OffsetA(OrigBitWidth, 0);
1868 APInt OffsetB(OrigBitWidth, 0);
1871 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1872 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1873 return std::nullopt;
1878 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1879 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1881 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1882 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1884 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1889 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1895 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1898 if (std::optional<APInt> Diff =
1899 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth))
1900 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1901 .sextOrTrunc(OrigBitWidth);
1902 return std::nullopt;
1905bool Vectorizer::accessIsAllowedAndFast(
unsigned SizeBytes,
unsigned AS,
1907 unsigned VecElemBits)
const {
1909 if (Alignment.
value() % SizeBytes == 0)
1913 unsigned VectorizedSpeed = 0;
1915 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
1916 if (!AllowsMisaligned) {
1918 dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace " << AS
1919 <<
" with alignment " << Alignment.
value()
1920 <<
" is misaligned, and therefore can't be vectorized.\n");
1924 unsigned ElementwiseSpeed = 0;
1925 (
TTI).allowsMisalignedMemoryAccesses((
F).
getContext(), VecElemBits, AS,
1926 Alignment, &ElementwiseSpeed);
1927 if (VectorizedSpeed < ElementwiseSpeed) {
1928 LLVM_DEBUG(
dbgs() <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
1929 << AS <<
" with alignment " << Alignment.
value()
1930 <<
" has relative speed " << VectorizedSpeed
1931 <<
", which is lower than the elementwise speed of "
1933 <<
". Therefore this access won't be vectorized.\n");
1939ChainElem Vectorizer::createExtraElementAfter(
const ChainElem &Prev,
Type *Ty,
1940 APInt
Offset, StringRef Prefix,
1946 PrevLoad->getPointerOperand(), Builder.
getInt(
Offset), Prefix +
"GEP");
1947 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1954 LLVM_DEBUG(
dbgs() <<
"LSV: Extra GEP Created: \n" << *NewGep <<
"\n");
1964 ExtraElements.
insert(NewElement);
1966 APInt NewOffsetFromLeader = Prev.OffsetFromLeader +
Offset;
1969 <<
" OffsetFromLeader: " << NewOffsetFromLeader <<
"\n");
1970 return ChainElem{NewElement, NewOffsetFromLeader};
1974 FixedVectorType *VecTy) {
1980 for (
const ChainElem &
E :
C) {
1984 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1987 if (FixedVectorType *VT =
1989 for (
unsigned J = 0; J < VT->getNumElements(); ++J)
1990 MaskElts[VecIdx + J] = Builder.
getInt1(
true);
1992 MaskElts[VecIdx] = Builder.
getInt1(
true);
1997void Vectorizer::deleteExtraElements() {
1998 for (
auto *ExtraElement : ExtraElements) {
2000 [[maybe_unused]]
bool Deleted =
2002 assert(
Deleted &&
"Extra Load should always be trivially dead");
2008 ExtraElement->eraseFromParent();
2013 ExtraElements.clear();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI, bool Optimize)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Value * getPointerOperand()
Analysis pass providing the TargetTransformInfo.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
ElementType
The element type of an SRV or UAV resource.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.