114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
161 : Inst(
std::
move(Inst)), OffsetFromLeader(
std::
move(OffsetFromLeader)) {}
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &E :
C) {
179 dbgs() <<
" " << *E.Inst <<
" (offset " << E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &E :
C)
197 const LoadInst *LI = dyn_cast<LoadInst>(
I);
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int Idx = 0;
Idx < NumOperands;
Idx++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
221 assert(IM !=
I &&
"Unexpected cycle while re-ordering instructions");
224 InstructionsToMove.
insert(IM);
231 for (
auto BBI =
I->getIterator(), E =
I->getParent()->end(); BBI != E;) {
233 if (!InstructionsToMove.
contains(IM))
258 :
F(
F), AA(AA), AC(AC), DT(DT), SE(SE),
TTI(
TTI),
259 DL(
F.getDataLayout()), Builder(SE.getContext()) {}
264 static const unsigned MaxDepth = 3;
273 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
279 bool runOnChain(Chain &
C);
284 std::vector<Chain> splitChainByContiguity(Chain &
C);
289 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
293 std::vector<Chain> splitChainByAlignment(Chain &
C);
297 bool vectorizeChain(Chain &
C);
300 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
303 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
306 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
313 Type *getChainElemTy(
const Chain &
C);
322 template <
bool IsLoadChain>
343class LoadStoreVectorizerLegacyPass :
public FunctionPass {
355 return "GPU Load and Store Vectorizer";
370char LoadStoreVectorizerLegacyPass::ID = 0;
373 "Vectorize load and Store instructions",
false,
false)
384 return new LoadStoreVectorizerLegacyPass();
387bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
389 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
392 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
393 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
394 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
396 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
399 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
401 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
407 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
416 bool Changed = Vectorizer(
F, AA, AC, DT, SE,
TTI).
run();
422bool Vectorizer::run() {
423 bool Changed =
false;
449 for (
auto It = Barriers.
begin(),
End = std::prev(Barriers.
end()); It !=
End;
451 Changed |= runOnPseudoBB(*It, *std::next(It));
456 I->eraseFromParent();
468 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
469 if (
End != Begin->getParent()->end())
472 dbgs() <<
"<BB end>";
476 bool Changed =
false;
477 for (
const auto &[EqClassKey, EqClass] :
478 collectEquivalenceClasses(Begin,
End))
479 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
484bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
486 bool Changed =
false;
489 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
490 <<
" keyed on " << EqClassKey <<
":\n";
492 dbgs() <<
" " << *
I <<
"\n";
495 std::vector<Chain> Chains = gatherChains(EqClass);
497 <<
" nontrivial chains.\n";);
498 for (Chain &
C : Chains)
499 Changed |= runOnChain(
C);
503bool Vectorizer::runOnChain(Chain &
C) {
505 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
515 bool Changed =
false;
516 for (
auto &
C : splitChainByMayAliasInstrs(
C))
517 for (
auto &
C : splitChainByContiguity(
C))
518 for (
auto &
C : splitChainByAlignment(
C))
519 Changed |= vectorizeChain(
C);
523std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
527 sortChainInBBOrder(
C);
530 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
538 for (
const auto &E :
C)
539 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
552 auto Impl = [&](
auto IsLoad) {
554 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
555 if constexpr (IsLoad())
556 return std::make_pair(
C.begin(),
C.end());
558 return std::make_pair(
C.rbegin(),
C.rend());
560 assert(ChainBegin != ChainEnd);
562 std::vector<Chain> Chains;
565 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
566 if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.
front().Inst,
568 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
569 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
574 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
575 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
576 if (NewChain.
size() > 1) {
578 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
581 Chains.emplace_back(std::move(NewChain));
588 if (NewChain.
size() > 1) {
590 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
593 Chains.emplace_back(std::move(NewChain));
598 if (isa<LoadInst>(
C[0].Inst))
599 return Impl(std::bool_constant<true>());
601 assert(isa<StoreInst>(
C[0].Inst));
602 return Impl(std::bool_constant<false>());
605std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
609 sortChainInOffsetOrder(
C);
612 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
616 std::vector<Chain>
Ret;
617 Ret.push_back({
C.front()});
619 for (
auto It = std::next(
C.begin()),
End =
C.end(); It !=
End; ++It) {
621 auto &CurChain =
Ret.back();
622 const ChainElem &Prev = CurChain.back();
624 assert(SzBits % 8 == 0 &&
"Non-byte sizes should have been filtered out by "
625 "collectEquivalenceClass");
626 APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;
629 bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
631 << (AreContiguous ?
"" :
"not ") <<
"contiguous: "
632 << *Prev.Inst <<
" (ends at offset " << PrevReadEnd
633 <<
") -> " << *It->Inst <<
" (starts at offset "
634 << It->OffsetFromLeader <<
")\n");
636 CurChain.push_back(*It);
638 Ret.push_back({*It});
642 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
646Type *Vectorizer::getChainElemTy(
const Chain &
C) {
659 if (
any_of(
C, [](
const ChainElem &E) {
667 for (
const ChainElem &E :
C)
673std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
686 sortChainInOffsetOrder(
C);
689 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
693 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
694 auto GetVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
697 ChainSizeBytes, VecTy)
699 ChainSizeBytes, VecTy);
703 for (
const auto &E :
C) {
706 "Should have filtered out non-power-of-two elements in "
707 "collectEquivalenceClasses.");
714 std::vector<Chain>
Ret;
715 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
720 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
721 APInt Sz =
C[CEnd].OffsetFromLeader +
723 C[CBegin].OffsetFromLeader;
724 if (Sz.
sgt(VecRegBytes))
726 CandidateChains.emplace_back(CEnd,
731 for (
auto It = CandidateChains.rbegin(),
End = CandidateChains.rend();
733 auto [CEnd, SizeBytes] = *It;
735 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
736 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
738 Type *VecElemTy = getChainElemTy(
C);
742 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
745 assert((8 * SizeBytes) % VecElemBits == 0);
746 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
748 unsigned VF = 8 * VecRegBytes / VecElemBits;
751 unsigned TargetVF = GetVectorFactor(VF, VecElemBits,
752 VecElemBits * NumVecElems / 8, VecTy);
753 if (TargetVF != VF && TargetVF < NumVecElems) {
755 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
757 << TargetVF <<
" != VF=" << VF
758 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
766 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &
TTI =
TTI,
768 if (Alignment.value() % SizeBytes == 0)
770 unsigned VectorizedSpeed = 0;
772 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
773 if (!AllowsMisaligned) {
775 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
776 << AS <<
" with alignment " << Alignment.value()
777 <<
" is misaligned, and therefore can't be vectorized.\n");
781 unsigned ElementwiseSpeed = 0;
782 (
TTI).allowsMisalignedMemoryAccesses((
F).getContext(), VecElemBits, AS,
783 Alignment, &ElementwiseSpeed);
784 if (VectorizedSpeed < ElementwiseSpeed) {
786 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
787 << AS <<
" with alignment " << Alignment.value()
788 <<
" has relative speed " << VectorizedSpeed
789 <<
", which is lower than the elementwise speed of "
791 <<
". Therefore this access won't be vectorized.\n");
807 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
810 Align PrefAlign =
Align(StackAdjustedAlignment);
811 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
812 IsAllowedAndFast(PrefAlign)) {
814 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
815 if (NewAlign >= Alignment) {
817 <<
"LSV: splitByChain upgrading alloca alignment from "
818 << Alignment.
value() <<
" to " << NewAlign.
value()
820 Alignment = NewAlign;
824 if (!IsAllowedAndFast(Alignment)) {
826 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
827 "because its alignment is not AllowedAndFast: "
828 << Alignment.
value() <<
"\n");
837 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
838 "because !isLegalToVectorizeLoad/StoreChain.");
843 Chain &NewChain =
Ret.emplace_back();
844 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
845 NewChain.emplace_back(
C[
I]);
853bool Vectorizer::vectorizeChain(Chain &
C) {
857 sortChainInOffsetOrder(
C);
860 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
864 Type *VecElemTy = getChainElemTy(
C);
865 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
867 unsigned ChainBytes = std::accumulate(
868 C.begin(),
C.end(), 0u, [&](
unsigned Bytes,
const ChainElem &E) {
869 return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));
871 assert(ChainBytes %
DL.getTypeStoreSize(VecElemTy) == 0);
875 VecElemTy, 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy));
880 if (AS ==
DL.getAllocaAddrSpace()) {
881 Alignment = std::max(
889 for (
const ChainElem &E :
C)
891 DL.getTypeStoreSize(VecElemTy));
898 Builder.SetInsertPoint(
900 return A.Inst->comesBefore(
B.Inst);
905 VecInst = Builder.CreateAlignedLoad(VecTy,
910 for (
const ChainElem &E :
C) {
914 if (
auto *VT = dyn_cast<FixedVectorType>(
T)) {
915 auto Mask = llvm::to_vector<8>(
916 llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
917 V = Builder.CreateShuffleVector(VecInst, Mask,
I->getName());
918 VecIdx += VT->getNumElements();
920 V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),
924 if (
V->getType() !=
I->getType())
925 V = Builder.CreateBitOrPointerCast(V,
I->getType());
926 I->replaceAllUsesWith(V);
952 return A.Inst->comesBefore(
B.Inst);
958 auto InsertElem = [&](
Value *
V) {
959 if (
V->getType() != VecElemTy)
960 V = Builder.CreateBitOrPointerCast(V, VecElemTy);
961 Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx++));
963 for (
const ChainElem &E :
C) {
964 auto *
I = cast<StoreInst>(E.Inst);
967 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
968 InsertElem(Builder.CreateExtractElement(
I->getValueOperand(),
969 Builder.getInt32(J)));
972 InsertElem(
I->getValueOperand());
978 VecInst = Builder.CreateAlignedStore(
986 for (
const ChainElem &E :
C)
987 ToErase.emplace_back(E.Inst);
989 ++NumVectorInstructions;
990 NumScalarsVectorized +=
C.size();
994template <
bool IsLoadChain>
995bool Vectorizer::isSafeToMove(
999 << *ChainBegin <<
")\n");
1001 assert(isa<LoadInst>(ChainElem) == IsLoadChain);
1002 if (ChainElem == ChainBegin)
1007 if (isInvariantLoad(ChainElem))
1010 auto BBIt = std::next([&] {
1011 if constexpr (IsLoadChain)
1016 auto BBItEnd = std::next([&] {
1017 if constexpr (IsLoadChain)
1023 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1024 const unsigned ChainElemSize =
1027 for (; BBIt != BBItEnd; ++BBIt) {
1030 if (!
I->mayReadOrWriteMemory())
1034 if (IsLoadChain && isa<LoadInst>(
I))
1038 if (!IsLoadChain && isInvariantLoad(
I))
1047 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1054 const APInt &IOffset = OffsetIt->second;
1056 if (IOffset == ChainElemOffset ||
1057 (IOffset.
sle(ChainElemOffset) &&
1058 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1059 (ChainElemOffset.
sle(IOffset) &&
1060 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1066 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1078 <<
" Aliasing instruction:\n"
1079 <<
" " << *
I <<
'\n'
1080 <<
" Aliased instruction and pointer:\n"
1081 <<
" " << *ChainElem <<
'\n'
1099 unsigned MatchingOpIdxB,
bool Signed) {
1100 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1101 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1102 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1103 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1104 <<
", Signed=" <<
Signed <<
"\n");
1120 AddOpB->
getOpcode() == Instruction::Add &&
1124 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1125 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1126 Instruction *OtherInstrA = dyn_cast<Instruction>(OtherOperandA);
1127 Instruction *OtherInstrB = dyn_cast<Instruction>(OtherOperandB);
1129 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1131 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1133 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1134 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1139 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1141 isa<ConstantInt>(OtherInstrA->
getOperand(1))) {
1143 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1144 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1150 if (OtherInstrA && OtherInstrB &&
1151 OtherInstrA->
getOpcode() == Instruction::Add &&
1152 OtherInstrB->
getOpcode() == Instruction::Add &&
1155 isa<ConstantInt>(OtherInstrA->
getOperand(1)) &&
1156 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1158 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1160 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1169std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1171 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1172 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1173 <<
" Depth=" <<
Depth <<
"\n");
1174 auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
1175 auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
1177 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1181 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1182 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1183 return std::nullopt;
1186 for (
unsigned I = 0, E = GEPA->getNumIndices() - 1;
I < E; ++
I) {
1188 return std::nullopt;
1197 return std::nullopt;
1202 if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
1203 return std::nullopt;
1205 bool Signed = isa<SExtInst>(OpA);
1209 OpB = dyn_cast<Instruction>(OpB->
getOperand(0));
1211 return std::nullopt;
1217 return std::nullopt;
1221 return std::nullopt;
1224 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1232 if (OpB->
getOpcode() == Instruction::Add &&
1234 IdxDiff.
sle(cast<ConstantInt>(OpB->
getOperand(1))->getSExtValue()) &&
1240 OpA = dyn_cast<Instruction>(ValA);
1241 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1247 for (
unsigned MatchingOpIdxA : {0, 1})
1248 for (
unsigned MatchingOpIdxB : {0, 1})
1272 if (BitsAllowedToBeSet.
ult(IdxDiff.
abs()))
1273 return std::nullopt;
1278 return IdxDiff * Stride;
1279 return std::nullopt;
1282std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1285 return std::nullopt;
1287 if (
auto *SelectA = dyn_cast<SelectInst>(PtrA)) {
1288 if (
auto *SelectB = dyn_cast<SelectInst>(PtrB)) {
1289 if (SelectA->getCondition() != SelectB->getCondition())
1290 return std::nullopt;
1291 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1292 <<
", PtrB=" << *PtrB <<
", ContextInst="
1293 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1294 std::optional<APInt> TrueDiff = getConstantOffset(
1295 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1297 return std::nullopt;
1298 std::optional<APInt> FalseDiff =
1299 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1300 ContextInst,
Depth);
1301 if (TrueDiff == FalseDiff)
1305 return std::nullopt;
1311 EquivalenceClassMap
Ret;
1313 auto GetUnderlyingObject = [](
const Value *
Ptr) ->
const Value * {
1315 if (
const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
1322 return Sel->getCondition();
1328 auto *LI = dyn_cast<LoadInst>(&
I);
1329 auto *
SI = dyn_cast<StoreInst>(&
I);
1333 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1346 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1347 if ((TySize % 8) != 0)
1358 unsigned AS =
Ptr->getType()->getPointerAddressSpace();
1361 unsigned VF = VecRegSize / TySize;
1362 VectorType *VecTy = dyn_cast<VectorType>(Ty);
1366 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1370 if (TySize > VecRegSize / 2 ||
1374 Ret[{GetUnderlyingObject(
Ptr), AS,
1388 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1392 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1393 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1402 struct InstrListElem :
ilist_node<InstrListElem>,
1403 std::pair<Instruction *, Chain> {
1407 struct InstrListElemDenseMapInfo {
1410 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1411 static InstrListElem *getTombstoneKey() {
1412 return PtrInfo::getTombstoneKey();
1414 static unsigned getHashValue(
const InstrListElem *
E) {
1415 return IInfo::getHashValue(
E->first);
1417 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1418 if (
A == getEmptyKey() ||
B == getEmptyKey())
1419 return A == getEmptyKey() &&
B == getEmptyKey();
1420 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1421 return A == getTombstoneKey() &&
B == getTombstoneKey();
1422 return IInfo::isEqual(
A->first,
B->first);
1433 constexpr int MaxChainsToTry = 64;
1435 bool MatchFound =
false;
1436 auto ChainIter = MRU.
begin();
1437 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1439 if (std::optional<APInt>
Offset = getConstantOffset(
1443 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first))) {
1446 ChainIter->second.emplace_back(
I,
Offset.value());
1456 APInt ZeroOffset(ASPtrBits, 0);
1457 InstrListElem *E =
new (
Allocator.Allocate()) InstrListElem(
I);
1458 E->second.emplace_back(
I, ZeroOffset);
1464 std::vector<Chain>
Ret;
1468 if (E.second.size() > 1)
1469 Ret.emplace_back(std::move(E.second));
1473std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1477 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1478 <<
", Depth=" <<
Depth <<
"\n");
1481 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1482 APInt OffsetA(OrigBitWidth, 0);
1483 APInt OffsetB(OrigBitWidth, 0);
1486 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1487 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1488 return std::nullopt;
1493 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1494 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1496 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1497 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1499 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1504 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1510 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1513 if (std::optional<APInt> Diff =
1514 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth))
1515 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1517 return std::nullopt;
AMDGPU Mark last scratch load
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
Module.h This file contains the declarations for the Module class.
static const unsigned MaxDepth
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
APInt abs() const
Get the absolute value.
bool sgt(const APInt &RHS) const
Signed greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
This class represents a range of values.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
This class represents an analyzed expression in the program.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getCouldNotCompute()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
std::pair< iterator, bool > insert(const ValueT &V)
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
A simple intrusive list implementation.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< po_iterator< T > > post_order(const T &G)
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.