114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
160 ChainElem(Instruction *Inst, APInt OffsetFromLeader)
161 : Inst(std::
move(Inst)), OffsetFromLeader(std::
move(OffsetFromLeader)) {}
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &
E :
C) {
179 dbgs() <<
" " << *
E.Inst <<
" (offset " <<
E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &
E :
C)
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int Idx = 0; Idx < NumOperands; Idx++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
221 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
224 InstructionsToMove.
insert(IM);
231 for (
auto BBI =
I->getIterator(),
E =
I->getParent()->end(); BBI !=
E;) {
233 if (!InstructionsToMove.
contains(IM))
245 TargetTransformInfo &TTI;
246 const DataLayout &DL;
256 Vectorizer(Function &F,
AliasAnalysis &AA, AssumptionCache &AC,
257 DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
258 : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
259 DL(F.getDataLayout()), Builder(SE.
getContext()) {}
264 static const unsigned MaxDepth = 3;
273 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
279 bool runOnChain(Chain &
C);
284 std::vector<Chain> splitChainByContiguity(Chain &
C);
289 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
293 std::vector<Chain> splitChainByAlignment(Chain &
C);
297 bool vectorizeChain(Chain &
C);
300 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
301 Instruction *ContextInst,
303 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
304 Instruction *ContextInst,
306 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
307 Instruction *ContextInst,
313 Type *getChainElemTy(
const Chain &
C);
322 template <
bool IsLoadChain>
324 Instruction *ChainElem, Instruction *ChainBegin,
325 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
326 BatchAAResults &BatchAA);
331 void mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const;
349class LoadStoreVectorizerLegacyPass :
public FunctionPass {
353 LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
360 StringRef getPassName()
const override {
361 return "GPU Load and Store Vectorizer";
364 void getAnalysisUsage(AnalysisUsage &AU)
const override {
376char LoadStoreVectorizerLegacyPass::ID = 0;
379 "Vectorize load and Store instructions",
false,
false)
387 "Vectorize load and store instructions",
false,
false)
390 return new LoadStoreVectorizerLegacyPass();
393bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
395 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
398 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
399 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
400 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
401 TargetTransformInfo &
TTI =
402 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
404 AssumptionCache &AC =
405 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
407 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
413 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
428bool Vectorizer::run() {
455 for (
auto It = Barriers.
begin(), End = std::prev(Barriers.
end()); It != End;
457 Changed |= runOnPseudoBB(*It, *std::next(It));
462 I->eraseFromParent();
474 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
475 if (End != Begin->getParent()->end())
478 dbgs() <<
"<BB end>";
483 for (
const auto &[EqClassKey, EqClass] :
484 collectEquivalenceClasses(Begin, End))
485 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
490bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
495 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
496 <<
" keyed on " << EqClassKey <<
":\n";
497 for (Instruction *
I : EqClass)
498 dbgs() <<
" " << *
I <<
"\n";
501 std::vector<Chain> Chains = gatherChains(EqClass);
503 <<
" nontrivial chains.\n";);
504 for (Chain &
C : Chains)
509bool Vectorizer::runOnChain(Chain &
C) {
511 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
522 for (
auto &
C : splitChainByMayAliasInstrs(
C))
523 for (
auto &
C : splitChainByContiguity(
C))
524 for (
auto &
C : splitChainByAlignment(
C))
529std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
533 sortChainInBBOrder(
C);
536 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
544 for (
const auto &
E :
C)
545 ChainOffsets.insert({&*
E.Inst,
E.OffsetFromLeader});
549 BatchAAResults BatchAA(AA);
562 auto Impl = [&](
auto IsLoad) {
564 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
565 if constexpr (IsLoad())
566 return std::make_pair(
C.begin(),
C.end());
568 return std::make_pair(
C.rbegin(),
C.rend());
570 assert(ChainBegin != ChainEnd);
572 std::vector<Chain> Chains;
575 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
577 ChainOffsets, BatchAA)) {
578 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
579 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
584 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
585 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
586 if (NewChain.
size() > 1) {
588 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
591 Chains.emplace_back(std::move(NewChain));
598 if (NewChain.
size() > 1) {
600 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
603 Chains.emplace_back(std::move(NewChain));
609 return Impl(std::bool_constant<true>());
612 return Impl(std::bool_constant<false>());
615std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
619 sortChainInOffsetOrder(
C);
622 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
626 std::vector<Chain>
Ret;
627 Ret.push_back({
C.front()});
629 unsigned ChainElemTyBits =
DL.getTypeSizeInBits(getChainElemTy(
C));
630 APInt PrevReadEnd =
C[0].OffsetFromLeader +
632 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
633 auto &CurChain =
Ret.back();
638 8 * SzBytes % ChainElemTyBits == 0 &&
639 "Every chain-element size must be a multiple of the element size after "
641 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
643 bool AreContiguous =
false;
644 if (It->OffsetFromLeader.sle(PrevReadEnd)) {
646 uint64_t Overlap = (PrevReadEnd - It->OffsetFromLeader).getZExtValue();
647 if (8 * Overlap % ChainElemTyBits == 0)
648 AreContiguous =
true;
652 << (AreContiguous ?
"contiguous" :
"chain-breaker")
653 << *It->Inst <<
" (starts at offset "
654 << It->OffsetFromLeader <<
")\n");
657 CurChain.push_back(*It);
659 Ret.push_back({*It});
664 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
668Type *Vectorizer::getChainElemTy(
const Chain &
C) {
681 if (
any_of(
C, [](
const ChainElem &
E) {
684 return Type::getIntNTy(
689 for (
const ChainElem &
E :
C)
695std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
708 sortChainInOffsetOrder(
C);
711 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
716 auto GetVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
719 ChainSizeBytes, VecTy)
721 ChainSizeBytes, VecTy);
725 for (
const auto &
E :
C) {
728 "Should have filtered out non-power-of-two elements in "
729 "collectEquivalenceClasses.");
736 std::vector<Chain>
Ret;
737 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
745 APInt PrevReadEnd =
C[CBegin].OffsetFromLeader + Sz;
746 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
747 APInt ReadEnd =
C[CEnd].OffsetFromLeader +
749 unsigned BytesAdded =
750 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
752 if (Sz > VecRegBytes)
754 CandidateChains.emplace_back(CEnd, Sz);
759 for (
auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
761 auto [CEnd, SizeBytes] = *It;
763 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
764 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
766 Type *VecElemTy = getChainElemTy(
C);
770 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
773 assert((8 * SizeBytes) % VecElemBits == 0);
774 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
776 unsigned VF = 8 * VecRegBytes / VecElemBits;
779 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
780 VecElemBits * NumVecElems / 8, VecTy);
781 if (TargetVF != VF && TargetVF < NumVecElems) {
783 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
785 << TargetVF <<
" != VF=" << VF
786 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
794 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &
TTI =
TTI,
796 if (Alignment.value() % SizeBytes == 0)
798 unsigned VectorizedSpeed = 0;
800 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
801 if (!AllowsMisaligned) {
803 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
804 << AS <<
" with alignment " << Alignment.value()
805 <<
" is misaligned, and therefore can't be vectorized.\n");
809 unsigned ElementwiseSpeed = 0;
810 (
TTI).allowsMisalignedMemoryAccesses((
F).
getContext(), VecElemBits, AS,
811 Alignment, &ElementwiseSpeed);
812 if (VectorizedSpeed < ElementwiseSpeed) {
814 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
815 << AS <<
" with alignment " << Alignment.value()
816 <<
" has relative speed " << VectorizedSpeed
817 <<
", which is lower than the elementwise speed of "
819 <<
". Therefore this access won't be vectorized.\n");
835 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
838 Align PrefAlign =
Align(StackAdjustedAlignment);
839 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
840 IsAllowedAndFast(PrefAlign)) {
842 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
843 if (NewAlign >= Alignment) {
845 <<
"LSV: splitByChain upgrading alloca alignment from "
846 << Alignment.
value() <<
" to " << NewAlign.
value()
848 Alignment = NewAlign;
852 if (!IsAllowedAndFast(Alignment)) {
854 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
855 "because its alignment is not AllowedAndFast: "
856 << Alignment.
value() <<
"\n");
865 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
866 "because !isLegalToVectorizeLoad/StoreChain.");
871 Chain &NewChain =
Ret.emplace_back();
872 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
873 NewChain.emplace_back(
C[
I]);
881bool Vectorizer::vectorizeChain(Chain &
C) {
885 sortChainInOffsetOrder(
C);
888 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
892 Type *VecElemTy = getChainElemTy(
C);
896 APInt PrevReadEnd =
C[0].OffsetFromLeader + BytesAdded;
897 unsigned ChainBytes = BytesAdded;
898 for (
auto It = std::next(
C.begin()), End =
C.end(); It != End; ++It) {
900 APInt ReadEnd = It->OffsetFromLeader + SzBytes;
903 PrevReadEnd.
sle(ReadEnd) ? (ReadEnd - PrevReadEnd).getSExtValue() : 0;
904 ChainBytes += BytesAdded;
908 assert(8 * ChainBytes %
DL.getTypeSizeInBits(VecElemTy) == 0);
911 unsigned NumElem = 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy);
917 if (AS ==
DL.getAllocaAddrSpace()) {
918 Alignment = std::max(
921 MaybeAlign(),
DL,
C[0].Inst,
nullptr, &DT));
926 for (
const ChainElem &
E :
C)
928 DL.getTypeStoreSize(VecElemTy));
937 return A.Inst->comesBefore(
B.Inst);
949 for (
const ChainElem &
E :
C) {
954 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
955 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
960 }
else if (VecTy != VecElemTy) {
966 if (
V->getType() !=
I->getType())
994 return A.Inst->comesBefore(
B.Inst);
999 auto InsertElem = [&](
Value *
V,
unsigned VecIdx) {
1000 if (
V->getType() != VecElemTy)
1004 for (
const ChainElem &
E :
C) {
1007 (
E.OffsetFromLeader -
C[0].OffsetFromLeader).getZExtValue();
1008 unsigned VecIdx = 8 * EOffset /
DL.getTypeSizeInBits(VecElemTy);
1009 if (FixedVectorType *VT =
1011 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
1017 InsertElem(
I->getValueOperand(), VecIdx);
1031 for (
const ChainElem &
E :
C)
1032 ToErase.emplace_back(
E.Inst);
1034 ++NumVectorInstructions;
1035 NumScalarsVectorized +=
C.size();
1039template <
bool IsLoadChain>
1040bool Vectorizer::isSafeToMove(
1041 Instruction *ChainElem, Instruction *ChainBegin,
1042 const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
1043 BatchAAResults &BatchAA) {
1044 LLVM_DEBUG(
dbgs() <<
"LSV: isSafeToMove(" << *ChainElem <<
" -> "
1045 << *ChainBegin <<
")\n");
1048 if (ChainElem == ChainBegin)
1056 auto BBIt = std::next([&] {
1057 if constexpr (IsLoadChain)
1062 auto BBItEnd = std::next([&] {
1063 if constexpr (IsLoadChain)
1069 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1070 const unsigned ChainElemSize =
1073 for (; BBIt != BBItEnd; ++BBIt) {
1076 if (!
I->mayReadOrWriteMemory())
1093 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1100 const APInt &IOffset = OffsetIt->second;
1102 if (IOffset == ChainElemOffset ||
1103 (IOffset.
sle(ChainElemOffset) &&
1104 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1105 (ChainElemOffset.sle(IOffset) &&
1106 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1113 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1125 <<
" Aliasing instruction:\n"
1126 <<
" " << *
I <<
'\n'
1127 <<
" Aliased instruction and pointer:\n"
1128 <<
" " << *ChainElem <<
'\n'
1146 unsigned MatchingOpIdxB,
bool Signed) {
1147 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1148 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1149 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1150 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1151 <<
", Signed=" <<
Signed <<
"\n");
1167 AddOpB->
getOpcode() == Instruction::Add &&
1171 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1172 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1176 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1181 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1186 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1191 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1197 if (OtherInstrA && OtherInstrB &&
1198 OtherInstrA->
getOpcode() == Instruction::Add &&
1199 OtherInstrB->
getOpcode() == Instruction::Add &&
1216std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1218 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1219 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1220 <<
" Depth=" <<
Depth <<
"\n");
1224 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1228 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1229 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1230 return std::nullopt;
1233 for (
unsigned I = 0,
E = GEPA->getNumIndices() - 1;
I <
E; ++
I) {
1235 return std::nullopt;
1244 return std::nullopt;
1250 return std::nullopt;
1258 return std::nullopt;
1260 const SCEV *OffsetSCEVA = SE.
getSCEV(ValA);
1261 const SCEV *OffsetSCEVB = SE.
getSCEV(OpB);
1262 const SCEV *IdxDiffSCEV = SE.
getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1264 return std::nullopt;
1268 return std::nullopt;
1271 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1279 if (OpB->
getOpcode() == Instruction::Add &&
1288 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1294 for (
unsigned MatchingOpIdxA : {0, 1})
1295 for (
unsigned MatchingOpIdxB : {0, 1})
1316 APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.
getBitWidth());
1319 Safe = BitsAllowedToBeSet.
uge(IdxDiff.
abs());
1323 return IdxDiff * Stride;
1324 return std::nullopt;
1327std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1329 if (
Depth++ == MaxDepth)
1330 return std::nullopt;
1334 if (SelectA->getCondition() != SelectB->getCondition())
1335 return std::nullopt;
1336 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1337 <<
", PtrB=" << *PtrB <<
", ContextInst="
1338 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1339 std::optional<APInt> TrueDiff = getConstantOffset(
1340 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1342 return std::nullopt;
1343 std::optional<APInt> FalseDiff =
1344 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1345 ContextInst,
Depth);
1346 if (TrueDiff == FalseDiff)
1350 return std::nullopt;
1353void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses)
const {
1354 if (EQClasses.size() < 2)
1359 static_assert(std::tuple_size_v<EqClassKey> == 4,
1360 "EqClassKey has changed - EqClassReducedKey needs changes too");
1361 using EqClassReducedKey =
1362 std::tuple<std::tuple_element_t<1, EqClassKey> ,
1363 std::tuple_element_t<2, EqClassKey> ,
1364 std::tuple_element_t<3, EqClassKey> >;
1365 using ECReducedKeyToUnderlyingObjectMap =
1366 MapVector<EqClassReducedKey,
1367 SmallPtrSet<std::tuple_element_t<0, EqClassKey>, 4>>;
1372 ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1373 bool FoundPotentiallyOptimizableEC =
false;
1374 for (
const auto &EC : EQClasses) {
1375 const auto &
Key =
EC.first;
1376 EqClassReducedKey RedKey{std::get<1>(
Key), std::get<2>(
Key),
1378 auto &UOMap = RedKeyToUOMap[RedKey];
1380 if (UOMap.size() > 1)
1381 FoundPotentiallyOptimizableEC =
true;
1383 if (!FoundPotentiallyOptimizableEC)
1387 dbgs() <<
"LSV: mergeEquivalenceClasses: before merging:\n";
1388 for (
const auto &EC : EQClasses) {
1389 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1390 for (
const auto &Inst :
EC.second)
1391 dbgs() <<
" Inst: " << *Inst <<
'\n';
1395 dbgs() <<
"LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
1396 for (
const auto &RedKeyToUO : RedKeyToUOMap) {
1397 dbgs() <<
" Reduced key: {" << std::get<0>(RedKeyToUO.first) <<
", "
1398 << std::get<1>(RedKeyToUO.first) <<
", "
1399 <<
static_cast<int>(std::get<2>(RedKeyToUO.first)) <<
"} --> "
1400 << RedKeyToUO.second.size() <<
" underlying objects:\n";
1401 for (
auto UObject : RedKeyToUO.second)
1402 dbgs() <<
" " << *UObject <<
'\n';
1406 using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1409 auto GetUltimateTargets =
1410 [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1411 UObjectToUObjectMap IndirectionMap;
1412 for (
const auto *UObject : UObjects) {
1413 const unsigned MaxLookupDepth = 1;
1415 if (UltimateTarget != UObject)
1416 IndirectionMap[UObject] = UltimateTarget;
1418 UObjectToUObjectMap UltimateTargetsMap;
1419 for (
const auto *UObject : UObjects) {
1421 auto It = IndirectionMap.find(Target);
1422 for (; It != IndirectionMap.end(); It = IndirectionMap.find(Target))
1424 UltimateTargetsMap[UObject] =
Target;
1426 return UltimateTargetsMap;
1431 for (
auto &[RedKey, UObjects] : RedKeyToUOMap) {
1432 if (UObjects.size() < 2)
1434 auto UTMap = GetUltimateTargets(UObjects);
1435 for (
const auto &[UObject, UltimateTarget] : UTMap) {
1436 if (UObject == UltimateTarget)
1439 EqClassKey KeyFrom{UObject, std::get<0>(RedKey), std::get<1>(RedKey),
1440 std::get<2>(RedKey)};
1441 EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
1442 std::get<2>(RedKey)};
1445 const auto &VecTo = EQClasses[KeyTo];
1446 const auto &VecFrom = EQClasses[KeyFrom];
1447 SmallVector<Instruction *, 8> MergedVec;
1448 std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
1449 std::back_inserter(MergedVec),
1450 [](Instruction *
A, Instruction *
B) {
1451 return A && B && A->comesBefore(B);
1453 EQClasses[KeyTo] = std::move(MergedVec);
1454 EQClasses.erase(KeyFrom);
1458 dbgs() <<
"LSV: mergeEquivalenceClasses: after merging:\n";
1459 for (
const auto &EC : EQClasses) {
1460 dbgs() <<
" Key: {" <<
EC.first <<
"}\n";
1461 for (
const auto &Inst :
EC.second)
1462 dbgs() <<
" Inst: " << *Inst <<
'\n';
1470 EquivalenceClassMap
Ret;
1472 auto GetUnderlyingObject = [](
const Value *Ptr) ->
const Value * {
1481 return Sel->getCondition();
1492 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1505 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1506 if ((TySize % 8) != 0)
1520 unsigned VF = VecRegSize / TySize;
1525 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1529 if (TySize > VecRegSize / 2 ||
1533 Ret[{GetUnderlyingObject(Ptr), AS,
1539 mergeEquivalenceClasses(Ret);
1548 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1552 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1553 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1562 struct InstrListElem : ilist_node<InstrListElem>,
1563 std::pair<Instruction *, Chain> {
1564 explicit InstrListElem(Instruction *
I)
1567 struct InstrListElemDenseMapInfo {
1568 using PtrInfo = DenseMapInfo<InstrListElem *>;
1569 using IInfo = DenseMapInfo<Instruction *>;
1570 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1571 static InstrListElem *getTombstoneKey() {
1572 return PtrInfo::getTombstoneKey();
1574 static unsigned getHashValue(
const InstrListElem *
E) {
1575 return IInfo::getHashValue(
E->first);
1577 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1578 if (
A == getEmptyKey() ||
B == getEmptyKey())
1579 return A == getEmptyKey() &&
B == getEmptyKey();
1580 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1581 return A == getTombstoneKey() &&
B == getTombstoneKey();
1582 return IInfo::isEqual(
A->first,
B->first);
1585 SpecificBumpPtrAllocator<InstrListElem>
Allocator;
1586 simple_ilist<InstrListElem> MRU;
1587 DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
1592 for (Instruction *
I : Instrs) {
1593 constexpr int MaxChainsToTry = 64;
1595 bool MatchFound =
false;
1596 auto ChainIter = MRU.
begin();
1597 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1599 if (std::optional<APInt>
Offset = getConstantOffset(
1603 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first))) {
1606 ChainIter->second.emplace_back(
I,
Offset.value());
1616 APInt ZeroOffset(ASPtrBits, 0);
1617 InstrListElem *
E =
new (
Allocator.Allocate()) InstrListElem(
I);
1618 E->second.emplace_back(
I, ZeroOffset);
1624 std::vector<Chain>
Ret;
1628 if (
E.second.size() > 1)
1629 Ret.emplace_back(std::move(
E.second));
1633std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1634 Instruction *ContextInst,
1637 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1638 <<
", Depth=" <<
Depth <<
"\n");
1641 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1642 APInt OffsetA(OrigBitWidth, 0);
1643 APInt OffsetB(OrigBitWidth, 0);
1646 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1647 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1648 return std::nullopt;
1653 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1654 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1656 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1657 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1659 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1664 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1670 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1673 if (std::optional<APInt> Diff =
1674 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth))
1675 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1676 .sextOrTrunc(OrigBitWidth);
1677 return std::nullopt;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
static bool isInvariantLoad(const LoadInst *LI, const bool IsKernelFn)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
iterator find(const_arg_type_t< KeyT > Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
An instruction for reading from memory.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
bool isModOrRefSet(const ModRefInfo MRI)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.