51#define DEBUG_TYPE "scalarizer"
57 if (isa<PHINode>(Itr))
73using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
84 unsigned NumPacked = 0;
88 unsigned NumFragments = 0;
91 Type *SplitTy =
nullptr;
95 Type *RemainderTy =
nullptr;
97 Type *getFragmentType(
unsigned I)
const {
98 return RemainderTy &&
I == NumFragments - 1 ? RemainderTy : SplitTy;
106 Scatterer() =
default;
112 const VectorSplit &VS, ValueVector *cachePtr =
nullptr);
115 Value *operator[](
unsigned I);
118 unsigned size()
const {
return VS.NumFragments; }
126 ValueVector *CachePtr;
133 FCmpSplitter(
FCmpInst &fci) : FCI(fci) {}
146 ICmpSplitter(
ICmpInst &ici) : ICI(ici) {}
158struct UnarySplitter {
170struct BinarySplitter {
183 VectorLayout() =
default;
186 Align getFragmentAlign(
unsigned Frag) {
200static bool isStructOfMatchingFixedVectors(
Type *Ty) {
201 if (!isa<StructType>(Ty))
210 for (
unsigned I = 1;
I < StructSize;
I++) {
222 unsigned NumElements = VS.VecTy->getNumElements();
226 if (VS.NumPacked > 1) {
229 ExtendMask.
resize(NumElements, -1);
230 for (
unsigned I = 0;
I < VS.NumPacked; ++
I)
233 InsertMask.
resize(NumElements);
234 for (
unsigned I = 0;
I < NumElements; ++
I)
239 for (
unsigned I = 0;
I < VS.NumFragments; ++
I) {
240 Value *Fragment = Fragments[
I];
242 unsigned NumPacked = VS.NumPacked;
243 if (
I == VS.NumFragments - 1 && VS.RemainderTy) {
244 if (
auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
245 NumPacked = RemVecTy->getNumElements();
250 if (NumPacked == 1) {
258 for (
unsigned J = 0; J < NumPacked; ++J)
259 InsertMask[
I * VS.NumPacked + J] = NumElements + J;
262 for (
unsigned J = 0; J < NumPacked; ++J)
263 InsertMask[
I * VS.NumPacked + J] =
I * VS.NumPacked + J;
271class ScalarizerVisitor :
public InstVisitor<ScalarizerVisitor, bool> {
276 ScalarizeVariableInsertExtract(
Options.ScalarizeVariableInsertExtract),
277 ScalarizeLoadStore(
Options.ScalarizeLoadStore),
278 ScalarizeMinBits(
Options.ScalarizeMinBits) {}
305 void gather(
Instruction *
Op,
const ValueVector &CV,
const VectorSplit &VS);
307 bool canTransferMetadata(
unsigned Kind);
308 void transferMetadataAndIRFlags(
Instruction *
Op,
const ValueVector &CV);
309 std::optional<VectorSplit> getVectorSplit(
Type *Ty);
310 std::optional<VectorLayout> getVectorLayout(
Type *Ty,
Align Alignment,
314 template<
typename T>
bool splitUnary(
Instruction &,
const T &);
315 template<
typename T>
bool splitBinary(
Instruction &,
const T &);
319 ScatterMap Scattered;
328 const bool ScalarizeVariableInsertExtract;
329 const bool ScalarizeLoadStore;
330 const unsigned ScalarizeMinBits;
348void ScalarizerLegacyPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
354char ScalarizerLegacyPass::ID = 0;
356 "Scalarize vector operations",
false,
false)
362 const VectorSplit &VS, ValueVector *cachePtr)
363 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
364 IsPointer = V->getType()->isPointerTy();
366 Tmp.resize(VS.NumFragments,
nullptr);
368 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
370 "Inconsistent vector sizes");
371 if (VS.NumFragments > CachePtr->size())
372 CachePtr->resize(VS.NumFragments,
nullptr);
377Value *Scatterer::operator[](
unsigned Frag) {
378 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
387 CV[Frag] = Builder.CreateConstGEP1_32(
VS.SplitTy, V, Frag,
388 V->getName() +
".i" +
Twine(Frag));
392 Type *FragmentTy =
VS.getFragmentType(Frag);
394 if (
auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
396 for (
unsigned J = 0; J < VecTy->getNumElements(); ++J)
397 Mask.push_back(Frag *
VS.NumPacked + J);
400 V->getName() +
".i" +
Twine(Frag));
412 unsigned J =
Idx->getZExtValue();
414 if (Frag *
VS.NumPacked == J) {
415 CV[Frag] =
Insert->getOperand(1);
419 if (
VS.NumPacked == 1 && !CV[J]) {
423 CV[J] =
Insert->getOperand(1);
426 CV[Frag] = Builder.CreateExtractElement(V, Frag *
VS.NumPacked,
427 V->getName() +
".i" +
Twine(Frag));
433bool ScalarizerLegacyPass::runOnFunction(
Function &
F) {
437 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
439 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
441 return Impl.visit(
F);
445 return new ScalarizerLegacyPass(
Options);
448bool ScalarizerVisitor::visit(
Function &
F) {
449 assert(Gathered.empty() && Scattered.empty());
461 if (
Done &&
I->getType()->isVoidTy())
462 I->eraseFromParent();
471 const VectorSplit &VS) {
472 if (
Argument *VArg = dyn_cast<Argument>(V)) {
477 return Scatterer(BB, BB->
begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
494 &Scattered[{
V,
VS.SplitTy}]);
505void ScalarizerVisitor::gather(
Instruction *
Op,
const ValueVector &CV,
506 const VectorSplit &VS) {
507 transferMetadataAndIRFlags(
Op, CV);
511 ValueVector &SV = Scattered[{
Op,
VS.SplitTy}];
513 for (
unsigned I = 0, E = SV.size();
I != E; ++
I) {
515 if (V ==
nullptr || SV[
I] == CV[
I])
519 if (isa<Instruction>(CV[
I]))
520 CV[
I]->takeName(Old);
522 PotentiallyDeadInstrs.emplace_back(Old);
526 Gathered.push_back(GatherList::value_type(
Op, &SV));
532 Op->replaceAllUsesWith(CV);
533 PotentiallyDeadInstrs.emplace_back(
Op);
540bool ScalarizerVisitor::canTransferMetadata(
unsigned Tag) {
541 return (Tag == LLVMContext::MD_tbaa
542 || Tag == LLVMContext::MD_fpmath
543 || Tag == LLVMContext::MD_tbaa_struct
544 || Tag == LLVMContext::MD_invariant_load
545 || Tag == LLVMContext::MD_alias_scope
546 || Tag == LLVMContext::MD_noalias
547 || Tag == LLVMContext::MD_mem_parallel_loop_access
548 || Tag == LLVMContext::MD_access_group);
553void ScalarizerVisitor::transferMetadataAndIRFlags(
Instruction *
Op,
554 const ValueVector &CV) {
556 Op->getAllMetadataOtherThanDebugLoc(MDs);
557 for (
Value *V : CV) {
559 for (
const auto &MD : MDs)
560 if (canTransferMetadata(MD.first))
561 New->setMetadata(MD.first, MD.second);
562 New->copyIRFlags(
Op);
563 if (
Op->getDebugLoc() && !
New->getDebugLoc())
564 New->setDebugLoc(
Op->getDebugLoc());
570std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(
Type *Ty) {
572 Split.VecTy = dyn_cast<FixedVectorType>(Ty);
576 unsigned NumElems =
Split.VecTy->getNumElements();
577 Type *ElemTy =
Split.VecTy->getElementType();
582 Split.NumFragments = NumElems;
583 Split.SplitTy = ElemTy;
586 if (
Split.NumPacked >= NumElems)
592 unsigned RemainderElems = NumElems %
Split.NumPacked;
593 if (RemainderElems > 1)
595 else if (RemainderElems == 1)
596 Split.RemainderTy = ElemTy;
605std::optional<VectorLayout>
606ScalarizerVisitor::getVectorLayout(
Type *Ty,
Align Alignment,
608 std::optional<VectorSplit>
VS = getVectorSplit(Ty);
615 if (!
DL.typeSizeEqualsStoreSize(
VS->SplitTy) ||
616 (
VS->RemainderTy && !
DL.typeSizeEqualsStoreSize(
VS->RemainderTy)))
618 Layout.VecAlign = Alignment;
619 Layout.SplitSize =
DL.getTypeStoreSize(
VS->SplitTy);
625template<
typename Splitter>
626bool ScalarizerVisitor::splitUnary(
Instruction &
I,
const Splitter &Split) {
627 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
631 std::optional<VectorSplit> OpVS;
632 if (
I.getOperand(0)->getType() ==
I.getType()) {
635 OpVS = getVectorSplit(
I.getOperand(0)->getType());
636 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
641 Scatterer
Op = scatter(&
I,
I.getOperand(0), *OpVS);
642 assert(
Op.size() ==
VS->NumFragments &&
"Mismatched unary operation");
644 Res.resize(
VS->NumFragments);
645 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag)
646 Res[Frag] =
Split(Builder,
Op[Frag],
I.getName() +
".i" +
Twine(Frag));
647 gather(&
I, Res, *VS);
653template<
typename Splitter>
654bool ScalarizerVisitor::splitBinary(
Instruction &
I,
const Splitter &Split) {
655 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
659 std::optional<VectorSplit> OpVS;
660 if (
I.getOperand(0)->getType() ==
I.getType()) {
663 OpVS = getVectorSplit(
I.getOperand(0)->getType());
664 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
669 Scatterer VOp0 = scatter(&
I,
I.getOperand(0), *OpVS);
670 Scatterer VOp1 = scatter(&
I,
I.getOperand(1), *OpVS);
671 assert(VOp0.size() ==
VS->NumFragments &&
"Mismatched binary operation");
672 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched binary operation");
674 Res.resize(
VS->NumFragments);
675 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag) {
676 Value *Op0 = VOp0[Frag];
677 Value *Op1 = VOp1[Frag];
678 Res[Frag] =
Split(Builder, Op0, Op1,
I.getName() +
".i" +
Twine(Frag));
680 gather(&
I, Res, *VS);
686bool ScalarizerVisitor::splitCall(
CallInst &CI) {
688 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
689 std::optional<VectorSplit>
VS;
690 if (AreAllVectorsOfMatchingSize)
693 VS = getVectorSplit(CallType);
709 ValueVector ScalarOperands(NumArgs);
716 Tys.push_back(
VS->SplitTy);
718 if (AreAllVectorsOfMatchingSize) {
720 std::optional<VectorSplit> CurrVS =
729 if (!CurrVS || CurrVS->NumPacked !=
VS->NumPacked)
732 Tys.push_back(CurrVS->SplitTy);
737 for (
unsigned I = 0;
I != NumArgs; ++
I) {
739 if ([[maybe_unused]]
auto *OpVecTy =
740 dyn_cast<FixedVectorType>(OpI->
getType())) {
741 assert(OpVecTy->getNumElements() ==
VS->VecTy->getNumElements());
742 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->
getType());
743 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
754 Scattered[
I] = scatter(&CI, OpI, *OpVS);
756 OverloadIdx[
I] =
Tys.size();
757 Tys.push_back(OpVS->SplitTy);
760 ScalarOperands[
I] = OpI;
766 ValueVector Res(
VS->NumFragments);
767 ValueVector ScalarCallOps(NumArgs);
774 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
775 bool IsRemainder =
I ==
VS->NumFragments - 1 &&
VS->RemainderTy;
776 ScalarCallOps.clear();
779 Tys[0] =
VS->RemainderTy;
781 for (
unsigned J = 0; J != NumArgs; ++J) {
783 ScalarCallOps.push_back(ScalarOperands[J]);
785 ScalarCallOps.push_back(Scattered[J][
I]);
786 if (IsRemainder && OverloadIdx[J] >= 0)
787 Tys[OverloadIdx[J]] = Scattered[J][
I]->getType();
794 Res[
I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
798 gather(&CI, Res, *VS);
802bool ScalarizerVisitor::visitSelectInst(
SelectInst &SI) {
803 std::optional<VectorSplit>
VS = getVectorSplit(
SI.getType());
807 std::optional<VectorSplit> CondVS;
808 if (isa<FixedVectorType>(
SI.getCondition()->getType())) {
809 CondVS = getVectorSplit(
SI.getCondition()->getType());
810 if (!CondVS || CondVS->NumPacked !=
VS->NumPacked) {
817 Scatterer VOp1 = scatter(&SI,
SI.getOperand(1), *VS);
818 Scatterer VOp2 = scatter(&SI,
SI.getOperand(2), *VS);
819 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched select");
820 assert(VOp2.size() ==
VS->NumFragments &&
"Mismatched select");
822 Res.resize(
VS->NumFragments);
825 Scatterer VOp0 = scatter(&SI,
SI.getOperand(0), *CondVS);
826 assert(VOp0.size() == CondVS->NumFragments &&
"Mismatched select");
827 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
831 Res[
I] = Builder.CreateSelect(Op0, Op1, Op2,
836 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
839 Res[
I] = Builder.CreateSelect(Op0, Op1, Op2,
843 gather(&SI, Res, *VS);
847bool ScalarizerVisitor::visitICmpInst(
ICmpInst &ICI) {
848 return splitBinary(ICI, ICmpSplitter(ICI));
851bool ScalarizerVisitor::visitFCmpInst(
FCmpInst &FCI) {
852 return splitBinary(FCI, FCmpSplitter(FCI));
855bool ScalarizerVisitor::visitUnaryOperator(
UnaryOperator &UO) {
856 return splitUnary(UO, UnarySplitter(UO));
860 return splitBinary(BO, BinarySplitter(BO));
864 std::optional<VectorSplit>
VS = getVectorSplit(GEPI.
getType());
875 for (
unsigned I = 0;
I < 1 + NumIndices; ++
I) {
878 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
879 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
883 ScatterOps[
I] = scatter(&GEPI, GEPI.
getOperand(
I), *OpVS);
890 Res.resize(
VS->NumFragments);
891 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
893 SplitOps.
resize(1 + NumIndices);
894 for (
unsigned J = 0; J < 1 + NumIndices; ++J) {
896 SplitOps[J] = ScalarOps[J];
898 SplitOps[J] = ScatterOps[J][
I];
905 NewGEPI->setIsInBounds();
907 gather(&GEPI, Res, *VS);
911bool ScalarizerVisitor::visitCastInst(
CastInst &CI) {
912 std::optional<VectorSplit> DestVS = getVectorSplit(CI.
getDestTy());
916 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.
getSrcTy());
917 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
921 Scatterer Op0 = scatter(&CI, CI.
getOperand(0), *SrcVS);
922 assert(Op0.size() == SrcVS->NumFragments &&
"Mismatched cast");
924 Res.resize(DestVS->NumFragments);
925 for (
unsigned I = 0;
I < DestVS->NumFragments; ++
I)
927 Builder.CreateCast(CI.
getOpcode(), Op0[
I], DestVS->getFragmentType(
I),
929 gather(&CI, Res, *DestVS);
933bool ScalarizerVisitor::visitBitCastInst(
BitCastInst &BCI) {
934 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.
getDestTy());
935 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.
getSrcTy());
936 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
939 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
945 Scatterer Op0 = scatter(&BCI, BCI.
getOperand(0), *SrcVS);
947 Res.resize(DstVS->NumFragments);
949 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
950 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
953 assert(DstVS->NumFragments == SrcVS->NumFragments);
954 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
955 Res[
I] = Builder.CreateBitCast(Op0[
I], DstVS->getFragmentType(
I),
958 }
else if (SrcSplitBits % DstSplitBits == 0) {
962 MidVS.NumPacked = DstVS->NumPacked;
963 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
965 MidVS.NumPacked * MidVS.NumFragments);
966 MidVS.SplitTy = DstVS->SplitTy;
969 for (
unsigned I = 0;
I < SrcVS->NumFragments; ++
I) {
975 while ((VI = dyn_cast<Instruction>(V)) &&
976 VI->getOpcode() == Instruction::BitCast)
977 V =
VI->getOperand(0);
979 V = Builder.CreateBitCast(V, MidVS.VecTy,
V->getName() +
".cast");
981 Scatterer Mid = scatter(&BCI, V, MidVS);
982 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
983 Res[ResI++] = Mid[J];
985 }
else if (DstSplitBits % SrcSplitBits == 0) {
989 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
990 MidVS.NumPacked = SrcVS->NumPacked;
992 MidVS.NumPacked * MidVS.NumFragments);
993 MidVS.SplitTy = SrcVS->SplitTy;
997 ConcatOps.
resize(MidVS.NumFragments);
998 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
999 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
1000 ConcatOps[J] = Op0[SrcI++];
1001 Value *
V = concatenate(Builder, ConcatOps, MidVS,
1003 Res[
I] = Builder.CreateBitCast(V, DstVS->getFragmentType(
I),
1010 gather(&BCI, Res, *DstVS);
1015 std::optional<VectorSplit>
VS = getVectorSplit(IEI.
getType());
1020 Scatterer Op0 = scatter(&IEI, IEI.
getOperand(0), *VS);
1025 Res.resize(
VS->NumFragments);
1027 if (
auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1028 unsigned Idx = CI->getZExtValue();
1029 unsigned Fragment =
Idx /
VS->NumPacked;
1030 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1031 if (
I == Fragment) {
1033 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1034 !
VS->RemainderTy->isVectorTy())
1038 Builder.CreateInsertElement(Op0[
I], NewElt,
Idx %
VS->NumPacked);
1048 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1051 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1052 Value *ShouldReplace =
1053 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->
getType(),
I),
1056 Res[
I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1061 gather(&IEI, Res, *VS);
1067 Type *OpTy =
Op->getType();
1069 if (!isStructOfMatchingFixedVectors(OpTy))
1083 std::optional<VectorSplit>
VS = getVectorSplit(VecType);
1087 Scatterer Op0 = scatter(&EVI,
Op, *VS);
1091 for (
unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1092 Value *ResElem = Builder.CreateExtractValue(
1093 Op0[OpIdx], Index, EVI.
getName() +
".elem" +
Twine(Index));
1094 Res.push_back(ResElem);
1097 gather(&EVI, Res, *VS);
1107 Scatterer Op0 = scatter(&EEI, EEI.
getOperand(0), *VS);
1110 if (
auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1111 unsigned Idx = CI->getZExtValue();
1112 unsigned Fragment =
Idx /
VS->NumPacked;
1113 Value *Res = Op0[Fragment];
1115 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1116 !
VS->RemainderTy->isVectorTy())
1119 Res = Builder.CreateExtractElement(Res,
Idx %
VS->NumPacked);
1120 replaceUses(&EEI, Res);
1125 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1129 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1130 Value *ShouldExtract =
1131 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->
getType(),
I),
1134 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1137 replaceUses(&EEI, Res);
1142 std::optional<VectorSplit>
VS = getVectorSplit(SVI.
getType());
1143 std::optional<VectorSplit> VSOp =
1145 if (!VS || !VSOp ||
VS->NumPacked > 1 || VSOp->NumPacked > 1)
1148 Scatterer Op0 = scatter(&SVI, SVI.
getOperand(0), *VSOp);
1149 Scatterer Op1 = scatter(&SVI, SVI.
getOperand(1), *VSOp);
1151 Res.resize(
VS->NumFragments);
1153 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1157 else if (
unsigned(Selector) < Op0.size())
1158 Res[
I] = Op0[Selector];
1160 Res[
I] = Op1[Selector - Op0.size()];
1162 gather(&SVI, Res, *VS);
1166bool ScalarizerVisitor::visitPHINode(
PHINode &
PHI) {
1167 std::optional<VectorSplit>
VS = getVectorSplit(
PHI.getType());
1173 Res.resize(
VS->NumFragments);
1175 unsigned NumOps =
PHI.getNumOperands();
1176 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1177 Res[
I] = Builder.CreatePHI(
VS->getFragmentType(
I), NumOps,
1181 for (
unsigned I = 0;
I < NumOps; ++
I) {
1182 Scatterer
Op = scatter(&
PHI,
PHI.getIncomingValue(
I), *VS);
1184 for (
unsigned J = 0; J <
VS->NumFragments; ++J)
1185 cast<PHINode>(Res[J])->addIncoming(
Op[J], IncomingBlock);
1187 gather(&
PHI, Res, *VS);
1191bool ScalarizerVisitor::visitLoadInst(
LoadInst &LI) {
1192 if (!ScalarizeLoadStore)
1197 std::optional<VectorLayout> Layout = getVectorLayout(
1205 Res.resize(Layout->VS.NumFragments);
1207 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1208 Res[
I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(
I),
Ptr[
I],
1209 Align(Layout->getFragmentAlign(
I)),
1212 gather(&LI, Res, Layout->VS);
1216bool ScalarizerVisitor::visitStoreInst(
StoreInst &SI) {
1217 if (!ScalarizeLoadStore)
1222 Value *FullValue =
SI.getValueOperand();
1223 std::optional<VectorLayout> Layout = getVectorLayout(
1224 FullValue->
getType(),
SI.getAlign(),
SI.getDataLayout());
1229 Scatterer VPtr = scatter(&SI,
SI.getPointerOperand(), Layout->VS);
1230 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1233 Stores.resize(Layout->VS.NumFragments);
1234 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1238 Builder.CreateAlignedStore(Val,
Ptr, Layout->getFragmentAlign(
I));
1240 transferMetadataAndIRFlags(&SI, Stores);
1244bool ScalarizerVisitor::visitCallInst(
CallInst &CI) {
1245 return splitCall(CI);
1248bool ScalarizerVisitor::visitFreezeInst(
FreezeInst &FI) {
1256bool ScalarizerVisitor::finish() {
1259 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1261 for (
const auto &GMI : Gathered) {
1263 ValueVector &CV = *GMI.second;
1264 if (!
Op->use_empty()) {
1268 if (
auto *Ty = dyn_cast<FixedVectorType>(
Op->getType())) {
1271 if (isa<PHINode>(
Op))
1274 VectorSplit
VS = *getVectorSplit(Ty);
1275 assert(
VS.NumFragments == CV.size());
1277 Res = concatenate(Builder, CV, VS,
Op->getName());
1280 }
else if (
auto *Ty = dyn_cast<StructType>(
Op->getType())) {
1283 if (isa<PHINode>(
Op))
1287 unsigned NumOfStructElements = Ty->getNumElements();
1289 for (
unsigned I = 0;
I < NumOfStructElements; ++
I) {
1290 for (
auto *CVelem : CV) {
1292 CVelem,
I,
Op->getName() +
".elem" +
Twine(
I));
1293 ElemCV[
I].push_back(Elem);
1297 for (
unsigned I = 0;
I < NumOfStructElements; ++
I) {
1298 Type *ElemTy = Ty->getElementType(
I);
1299 assert(isa<FixedVectorType>(ElemTy) &&
1300 "Only Structs of all FixedVectorType supported");
1301 VectorSplit
VS = *getVectorSplit(ElemTy);
1302 assert(
VS.NumFragments == CV.size());
1304 Value *ConcatenatedVector =
1305 concatenate(Builder, ElemCV[
I], VS,
Op->getName());
1307 Op->getName() +
".insert");
1310 assert(CV.size() == 1 &&
Op->getType() == CV[0]->getType());
1315 Op->replaceAllUsesWith(Res);
1317 PotentiallyDeadInstrs.emplace_back(
Op);
1331 ScalarizerVisitor Impl(DT,
TTI, Options);
1332 bool Changed = Impl.visit(
F);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Scalarize vector operations
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Type * getSrcTy() const
Return the source type, as a convenience.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Type * getDestTy() const
Return the destination type, as a convenience.
This is the shared class of boolean and integer constants.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
RetTy visitFreezeInst(FreezeInst &I)
RetTy visitFCmpInst(FCmpInst &I)
RetTy visitExtractElementInst(ExtractElementInst &I)
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
RetTy visitBitCastInst(BitCastInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitPHINode(PHINode &I)
RetTy visitExtractValueInst(ExtractValueInst &I)
RetTy visitUnaryOperator(UnaryOperator &I)
RetTy visitStoreInst(StoreInst &I)
RetTy visitInsertElementInst(InsertElementInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitICmpInst(ICmpInst &I)
RetTy visitCallInst(CallInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitSelectInst(SelectInst &I)
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
void visitInstruction(Instruction &I)
RetTy visitLoadInst(LoadInst &I)
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
ID ArrayRef< Type * > Tys
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
This is an optimization pass for GlobalISel generic memory operations.
bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
bool isPointerTy(const Type *T)
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.