51#define DEBUG_TYPE "scalarizer"
84 unsigned NumPacked = 0;
88 unsigned NumFragments = 0;
91 Type *SplitTy =
nullptr;
95 Type *RemainderTy =
nullptr;
97 Type *getFragmentType(
unsigned I)
const {
98 return RemainderTy &&
I == NumFragments - 1 ? RemainderTy : SplitTy;
106 Scatterer() =
default;
112 const VectorSplit &VS,
ValueVector *cachePtr =
nullptr);
115 Value *operator[](
unsigned I);
118 unsigned size()
const {
return VS.NumFragments; }
133 FCmpSplitter(
FCmpInst &fci) : FCI(fci) {}
136 const Twine &Name)
const {
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
146 ICmpSplitter(
ICmpInst &ici) : ICI(ici) {}
149 const Twine &Name)
const {
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
158struct UnarySplitter {
162 return Builder.CreateUnOp(UO.getOpcode(),
Op, Name);
170struct BinarySplitter {
174 const Twine &Name)
const {
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
183 VectorLayout() =
default;
186 Align getFragmentAlign(
unsigned Frag) {
204 unsigned StructSize = Ty->getNumContainedTypes();
211 for (
unsigned I = 1;
I < StructSize;
I++) {
222 const VectorSplit &VS,
Twine Name) {
223 unsigned NumElements = VS.VecTy->getNumElements();
227 if (VS.NumPacked > 1) {
230 ExtendMask.
resize(NumElements, -1);
231 for (
unsigned I = 0;
I < VS.NumPacked; ++
I)
234 InsertMask.
resize(NumElements);
235 for (
unsigned I = 0;
I < NumElements; ++
I)
240 for (
unsigned I = 0;
I < VS.NumFragments; ++
I) {
241 Value *Fragment = Fragments[
I];
243 unsigned NumPacked = VS.NumPacked;
244 if (
I == VS.NumFragments - 1 && VS.RemainderTy) {
246 NumPacked = RemVecTy->getNumElements();
251 if (NumPacked == 1) {
252 Res = Builder.CreateInsertElement(Res, Fragment,
I * VS.NumPacked,
253 Name +
".upto" +
Twine(
I));
255 Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
259 for (
unsigned J = 0; J < NumPacked; ++J)
260 InsertMask[
I * VS.NumPacked + J] = NumElements + J;
261 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
262 Name +
".upto" +
Twine(
I));
263 for (
unsigned J = 0; J < NumPacked; ++J)
264 InsertMask[
I * VS.NumPacked + J] =
I * VS.NumPacked + J;
273class ScalarizerVisitor :
public InstVisitor<ScalarizerVisitor, bool> {
275 ScalarizerVisitor(DominatorTree *DT,
const TargetTransformInfo *TTI,
278 ScalarizeVariableInsertExtract(
Options.ScalarizeVariableInsertExtract),
279 ScalarizeLoadStore(
Options.ScalarizeLoadStore),
280 ScalarizeMinBits(
Options.ScalarizeMinBits) {}
286 bool visitInstruction(Instruction &
I) {
return false; }
287 bool visitSelectInst(SelectInst &SI);
288 bool visitICmpInst(ICmpInst &ICI);
289 bool visitFCmpInst(FCmpInst &FCI);
290 bool visitUnaryOperator(UnaryOperator &UO);
291 bool visitBinaryOperator(BinaryOperator &BO);
292 bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
293 bool visitCastInst(CastInst &CI);
294 bool visitBitCastInst(BitCastInst &BCI);
295 bool visitInsertElementInst(InsertElementInst &IEI);
296 bool visitExtractElementInst(ExtractElementInst &EEI);
297 bool visitExtractValueInst(ExtractValueInst &EVI);
298 bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
299 bool visitPHINode(PHINode &
PHI);
300 bool visitLoadInst(LoadInst &LI);
301 bool visitStoreInst(StoreInst &SI);
302 bool visitCallInst(CallInst &ICI);
303 bool visitFreezeInst(FreezeInst &FI);
306 Scatterer scatter(Instruction *Point,
Value *V,
const VectorSplit &VS);
307 void gather(Instruction *
Op,
const ValueVector &CV,
const VectorSplit &VS);
308 void replaceUses(Instruction *
Op,
Value *CV);
309 bool canTransferMetadata(
unsigned Kind);
310 void transferMetadataAndIRFlags(Instruction *
Op,
const ValueVector &CV);
311 std::optional<VectorSplit> getVectorSplit(
Type *Ty);
312 std::optional<VectorLayout> getVectorLayout(
Type *Ty, Align Alignment,
313 const DataLayout &
DL);
316 template<
typename T>
bool splitUnary(Instruction &,
const T &);
317 template<
typename T>
bool splitBinary(Instruction &,
const T &);
319 bool splitCall(CallInst &CI);
328 const TargetTransformInfo *TTI;
330 const bool ScalarizeVariableInsertExtract;
331 const bool ScalarizeLoadStore;
332 const unsigned ScalarizeMinBits;
338 ScalarizerPassOptions Options;
339 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
340 ScalarizerLegacyPass(
const ScalarizerPassOptions &Options);
342 void getAnalysisUsage(AnalysisUsage &AU)
const override;
350void ScalarizerLegacyPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
356char ScalarizerLegacyPass::ID = 0;
358 "Scalarize vector operations",
false,
false)
365 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
368 Tmp.resize(VS.NumFragments,
nullptr);
370 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
372 "Inconsistent vector sizes");
373 if (VS.NumFragments > CachePtr->size())
374 CachePtr->resize(VS.NumFragments,
nullptr);
379Value *Scatterer::operator[](
unsigned Frag) {
389 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
390 V->
getName() +
".i" + Twine(Frag));
394 Type *FragmentTy = VS.getFragmentType(Frag);
397 SmallVector<int>
Mask;
398 for (
unsigned J = 0; J < VecTy->getNumElements(); ++J)
399 Mask.push_back(Frag * VS.NumPacked + J);
402 V->
getName() +
".i" + Twine(Frag));
415 V =
Insert->getOperand(0);
416 if (Frag * VS.NumPacked == J) {
417 CV[Frag] =
Insert->getOperand(1);
421 if (VS.NumPacked == 1 && !CV[J]) {
425 CV[J] =
Insert->getOperand(1);
428 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
429 V->
getName() +
".i" + Twine(Frag));
435bool ScalarizerLegacyPass::runOnFunction(Function &
F) {
439 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
440 const TargetTransformInfo *
TTI =
441 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
443 return Impl.visit(
F);
447 return new ScalarizerLegacyPass(
Options);
450bool ScalarizerVisitor::visit(
Function &
F) {
463 if (
Done &&
I->getType()->isVoidTy()) {
464 I->eraseFromParent();
474Scatterer ScalarizerVisitor::scatter(Instruction *Point,
Value *V,
475 const VectorSplit &VS) {
481 return Scatterer(BB, BB->
begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
498 &Scattered[{
V,
VS.SplitTy}]);
509void ScalarizerVisitor::gather(Instruction *
Op,
const ValueVector &CV,
510 const VectorSplit &VS) {
511 transferMetadataAndIRFlags(
Op, CV);
517 for (
unsigned I = 0,
E = SV.
size();
I !=
E; ++
I) {
519 if (V ==
nullptr || SV[
I] == CV[
I])
524 CV[
I]->takeName(Old);
526 PotentiallyDeadInstrs.emplace_back(Old);
534void ScalarizerVisitor::replaceUses(Instruction *
Op,
Value *CV) {
536 Op->replaceAllUsesWith(CV);
537 PotentiallyDeadInstrs.emplace_back(
Op);
544bool ScalarizerVisitor::canTransferMetadata(
unsigned Tag) {
545 return (
Tag == LLVMContext::MD_tbaa
546 ||
Tag == LLVMContext::MD_fpmath
547 ||
Tag == LLVMContext::MD_tbaa_struct
548 ||
Tag == LLVMContext::MD_invariant_load
549 ||
Tag == LLVMContext::MD_alias_scope
550 ||
Tag == LLVMContext::MD_noalias
551 ||
Tag == LLVMContext::MD_mem_parallel_loop_access
552 ||
Tag == LLVMContext::MD_access_group);
557void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *
Op,
560 Op->getAllMetadataOtherThanDebugLoc(MDs);
561 for (
Value *V : CV) {
563 for (
const auto &MD : MDs)
564 if (canTransferMetadata(MD.first))
565 New->setMetadata(MD.first, MD.second);
566 New->copyIRFlags(
Op);
567 if (
Op->getDebugLoc() && !
New->getDebugLoc())
568 New->setDebugLoc(
Op->getDebugLoc());
574std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(
Type *Ty) {
580 unsigned NumElems =
Split.VecTy->getNumElements();
581 Type *ElemTy =
Split.VecTy->getElementType();
586 Split.NumFragments = NumElems;
587 Split.SplitTy = ElemTy;
590 if (
Split.NumPacked >= NumElems)
596 unsigned RemainderElems = NumElems %
Split.NumPacked;
597 if (RemainderElems > 1)
599 else if (RemainderElems == 1)
600 Split.RemainderTy = ElemTy;
609std::optional<VectorLayout>
610ScalarizerVisitor::getVectorLayout(
Type *Ty, Align Alignment,
611 const DataLayout &
DL) {
612 std::optional<VectorSplit>
VS = getVectorSplit(Ty);
619 if (!
DL.typeSizeEqualsStoreSize(
VS->SplitTy) ||
620 (
VS->RemainderTy && !
DL.typeSizeEqualsStoreSize(
VS->RemainderTy)))
622 Layout.VecAlign = Alignment;
623 Layout.SplitSize =
DL.getTypeStoreSize(
VS->SplitTy);
629template<
typename Splitter>
630bool ScalarizerVisitor::splitUnary(Instruction &
I,
const Splitter &Split) {
631 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
635 std::optional<VectorSplit> OpVS;
636 if (
I.getOperand(0)->getType() ==
I.getType()) {
639 OpVS = getVectorSplit(
I.getOperand(0)->getType());
640 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
645 Scatterer
Op = scatter(&
I,
I.getOperand(0), *OpVS);
646 assert(
Op.size() ==
VS->NumFragments &&
"Mismatched unary operation");
649 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag)
650 Res[Frag] =
Split(Builder,
Op[Frag],
I.getName() +
".i" + Twine(Frag));
651 gather(&
I, Res, *VS);
657template<
typename Splitter>
658bool ScalarizerVisitor::splitBinary(Instruction &
I,
const Splitter &Split) {
659 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
663 std::optional<VectorSplit> OpVS;
664 if (
I.getOperand(0)->getType() ==
I.getType()) {
667 OpVS = getVectorSplit(
I.getOperand(0)->getType());
668 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
673 Scatterer VOp0 = scatter(&
I,
I.getOperand(0), *OpVS);
674 Scatterer VOp1 = scatter(&
I,
I.getOperand(1), *OpVS);
675 assert(VOp0.size() ==
VS->NumFragments &&
"Mismatched binary operation");
676 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched binary operation");
679 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag) {
680 Value *Op0 = VOp0[Frag];
681 Value *Op1 = VOp1[Frag];
682 Res[Frag] =
Split(Builder, Op0, Op1,
I.getName() +
".i" + Twine(Frag));
684 gather(&
I, Res, *VS);
690bool ScalarizerVisitor::splitCall(CallInst &CI) {
693 std::optional<VectorSplit>
VS;
694 if (AreAllVectorsOfMatchingSize)
697 VS = getVectorSplit(CallType);
715 SmallVector<int> OverloadIdx(NumArgs, -1);
722 if (AreAllVectorsOfMatchingSize) {
724 std::optional<VectorSplit> CurrVS =
732 if (!CurrVS || CurrVS->NumPacked !=
VS->NumPacked)
740 for (
unsigned I = 0;
I != NumArgs; ++
I) {
742 if ([[maybe_unused]]
auto *OpVecTy =
744 assert(OpVecTy->getNumElements() ==
VS->VecTy->getNumElements());
745 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->
getType());
746 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
757 Scattered[
I] = scatter(&CI, OpI, *OpVS);
759 OverloadIdx[
I] = Tys.
size();
763 ScalarOperands[
I] = OpI;
777 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
778 bool IsRemainder =
I ==
VS->NumFragments - 1 &&
VS->RemainderTy;
779 ScalarCallOps.clear();
782 Tys[0] =
VS->RemainderTy;
784 for (
unsigned J = 0; J != NumArgs; ++J) {
786 ScalarCallOps.push_back(ScalarOperands[J]);
788 ScalarCallOps.push_back(Scattered[J][
I]);
789 if (IsRemainder && OverloadIdx[J] >= 0)
790 Tys[OverloadIdx[J]] = Scattered[J][
I]->getType();
797 Res[
I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
801 gather(&CI, Res, *VS);
805bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
806 std::optional<VectorSplit>
VS = getVectorSplit(
SI.getType());
810 std::optional<VectorSplit> CondVS;
812 CondVS = getVectorSplit(
SI.getCondition()->getType());
813 if (!CondVS || CondVS->NumPacked !=
VS->NumPacked) {
820 Scatterer VOp1 = scatter(&SI,
SI.getOperand(1), *VS);
821 Scatterer VOp2 = scatter(&SI,
SI.getOperand(2), *VS);
822 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched select");
823 assert(VOp2.size() ==
VS->NumFragments &&
"Mismatched select");
828 Scatterer VOp0 = scatter(&SI,
SI.getOperand(0), *CondVS);
829 assert(VOp0.size() == CondVS->NumFragments &&
"Mismatched select");
830 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
834 Res[
I] = Builder.CreateSelect(Op0, Op1, Op2,
835 SI.getName() +
".i" + Twine(
I));
839 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
842 Res[
I] = Builder.CreateSelect(Op0, Op1, Op2,
843 SI.getName() +
".i" + Twine(
I));
846 gather(&SI, Res, *VS);
850bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
851 return splitBinary(ICI, ICmpSplitter(ICI));
854bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
855 return splitBinary(FCI, FCmpSplitter(FCI));
858bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
859 return splitUnary(UO, UnarySplitter(UO));
862bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
863 return splitBinary(BO, BinarySplitter(BO));
866bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
867 std::optional<VectorSplit>
VS = getVectorSplit(GEPI.
getType());
875 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
878 for (
unsigned I = 0;
I < 1 + NumIndices; ++
I) {
881 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
882 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
886 ScatterOps[
I] = scatter(&GEPI, GEPI.
getOperand(
I), *OpVS);
894 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
895 SmallVector<Value *, 8> SplitOps;
896 SplitOps.
resize(1 + NumIndices);
897 for (
unsigned J = 0; J < 1 + NumIndices; ++J) {
899 SplitOps[J] = ScalarOps[J];
901 SplitOps[J] = ScatterOps[J][
I];
908 NewGEPI->setIsInBounds();
910 gather(&GEPI, Res, *VS);
914bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
915 std::optional<VectorSplit> DestVS = getVectorSplit(CI.
getDestTy());
919 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.
getSrcTy());
920 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
924 Scatterer Op0 = scatter(&CI, CI.
getOperand(0), *SrcVS);
925 assert(Op0.size() == SrcVS->NumFragments &&
"Mismatched cast");
927 Res.
resize(DestVS->NumFragments);
928 for (
unsigned I = 0;
I < DestVS->NumFragments; ++
I)
930 Builder.CreateCast(CI.
getOpcode(), Op0[
I], DestVS->getFragmentType(
I),
932 gather(&CI, Res, *DestVS);
936bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
937 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.
getDestTy());
938 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.
getSrcTy());
939 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
942 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
948 Scatterer Op0 = scatter(&BCI, BCI.
getOperand(0), *SrcVS);
950 Res.
resize(DstVS->NumFragments);
952 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
953 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
956 assert(DstVS->NumFragments == SrcVS->NumFragments);
957 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
958 Res[
I] = Builder.CreateBitCast(Op0[
I], DstVS->getFragmentType(
I),
961 }
else if (SrcSplitBits % DstSplitBits == 0) {
965 MidVS.NumPacked = DstVS->NumPacked;
966 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
968 MidVS.NumPacked * MidVS.NumFragments);
969 MidVS.SplitTy = DstVS->SplitTy;
972 for (
unsigned I = 0;
I < SrcVS->NumFragments; ++
I) {
979 VI->getOpcode() == Instruction::BitCast)
980 V =
VI->getOperand(0);
982 V = Builder.CreateBitCast(V, MidVS.VecTy,
V->getName() +
".cast");
984 Scatterer Mid = scatter(&BCI, V, MidVS);
985 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
986 Res[ResI++] = Mid[J];
988 }
else if (DstSplitBits % SrcSplitBits == 0) {
992 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
993 MidVS.NumPacked = SrcVS->NumPacked;
995 MidVS.NumPacked * MidVS.NumFragments);
996 MidVS.SplitTy = SrcVS->SplitTy;
999 SmallVector<Value *, 8> ConcatOps;
1000 ConcatOps.
resize(MidVS.NumFragments);
1001 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
1002 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
1003 ConcatOps[J] = Op0[SrcI++];
1006 Res[
I] = Builder.CreateBitCast(V, DstVS->getFragmentType(
I),
1013 gather(&BCI, Res, *DstVS);
1017bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1018 std::optional<VectorSplit>
VS = getVectorSplit(IEI.
getType());
1023 Scatterer Op0 = scatter(&IEI, IEI.
getOperand(0), *VS);
1031 unsigned Idx = CI->getZExtValue();
1032 unsigned Fragment = Idx /
VS->NumPacked;
1033 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1034 if (
I == Fragment) {
1036 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1037 !
VS->RemainderTy->isVectorTy())
1041 Builder.CreateInsertElement(Op0[
I], NewElt, Idx %
VS->NumPacked);
1051 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1054 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1055 Value *ShouldReplace =
1056 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->
getType(),
I),
1057 InsIdx->
getName() +
".is." + Twine(
I));
1059 Res[
I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1064 gather(&IEI, Res, *VS);
1068bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1070 Type *OpTy =
Op->getType();
1086 std::optional<VectorSplit>
VS = getVectorSplit(VecType);
1090 std::optional<VectorSplit> CurrVS =
1098 if (!CurrVS || CurrVS->NumPacked !=
VS->NumPacked)
1102 Scatterer Op0 = scatter(&EVI,
Op, *VS);
1107 Value *ResElem = Builder.CreateExtractValue(
1108 Op0[
OpIdx], Index, EVI.
getName() +
".elem" + Twine(Index));
1113 std::optional<VectorSplit> AVS = getVectorSplit(ActualVecType);
1114 gather(&EVI, Res, *AVS);
1118bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1124 Scatterer Op0 = scatter(&EEI, EEI.
getOperand(0), *VS);
1128 unsigned Idx = CI->getZExtValue();
1129 unsigned Fragment = Idx /
VS->NumPacked;
1130 Value *Res = Op0[Fragment];
1132 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1133 !
VS->RemainderTy->isVectorTy())
1136 Res = Builder.CreateExtractElement(Res, Idx %
VS->NumPacked);
1137 replaceUses(&EEI, Res);
1142 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1146 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1147 Value *ShouldExtract =
1148 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->
getType(),
I),
1149 ExtIdx->
getName() +
".is." + Twine(
I));
1151 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1152 EEI.
getName() +
".upto" + Twine(
I));
1154 replaceUses(&EEI, Res);
1158bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1159 std::optional<VectorSplit>
VS = getVectorSplit(SVI.
getType());
1160 std::optional<VectorSplit> VSOp =
1162 if (!VS || !VSOp ||
VS->NumPacked > 1 || VSOp->NumPacked > 1)
1165 Scatterer Op0 = scatter(&SVI, SVI.
getOperand(0), *VSOp);
1166 Scatterer Op1 = scatter(&SVI, SVI.
getOperand(1), *VSOp);
1170 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1174 else if (
unsigned(Selector) < Op0.size())
1175 Res[
I] = Op0[Selector];
1177 Res[
I] = Op1[Selector - Op0.size()];
1179 gather(&SVI, Res, *VS);
1183bool ScalarizerVisitor::visitPHINode(PHINode &
PHI) {
1184 std::optional<VectorSplit>
VS = getVectorSplit(
PHI.getType());
1193 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1194 Res[
I] = Builder.CreatePHI(
VS->getFragmentType(
I),
NumOps,
1195 PHI.getName() +
".i" + Twine(
I));
1199 Scatterer
Op = scatter(&
PHI,
PHI.getIncomingValue(
I), *VS);
1201 for (
unsigned J = 0; J <
VS->NumFragments; ++J)
1204 gather(&
PHI, Res, *VS);
1208bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1209 if (!ScalarizeLoadStore)
1214 std::optional<VectorLayout> Layout = getVectorLayout(
1222 Res.
resize(Layout->VS.NumFragments);
1224 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1225 Res[
I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(
I),
Ptr[
I],
1226 Align(Layout->getFragmentAlign(
I)),
1229 gather(&LI, Res, Layout->VS);
1233bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1234 if (!ScalarizeLoadStore)
1239 Value *FullValue =
SI.getValueOperand();
1240 std::optional<VectorLayout> Layout = getVectorLayout(
1241 FullValue->
getType(),
SI.getAlign(),
SI.getDataLayout());
1246 Scatterer VPtr = scatter(&SI,
SI.getPointerOperand(), Layout->VS);
1247 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1250 Stores.
resize(Layout->VS.NumFragments);
1251 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1255 Builder.CreateAlignedStore(Val,
Ptr, Layout->getFragmentAlign(
I));
1257 transferMetadataAndIRFlags(&SI, Stores);
1261bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1262 return splitCall(CI);
1265bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1273bool ScalarizerVisitor::finish() {
1276 if (Gathered.
empty() && Scattered.empty() && !Scalarized)
1278 for (
const auto &GMI : Gathered) {
1281 if (!
Op->use_empty()) {
1291 VectorSplit
VS = *getVectorSplit(Ty);
1304 unsigned NumOfStructElements = Ty->getNumElements();
1306 for (
unsigned I = 0;
I < NumOfStructElements; ++
I) {
1307 for (
auto *CVelem : CV) {
1309 CVelem,
I,
Op->getName() +
".elem" + Twine(
I));
1310 ElemCV[
I].push_back(Elem);
1314 for (
unsigned I = 0;
I < NumOfStructElements; ++
I) {
1315 Type *ElemTy = Ty->getElementType(
I);
1317 "Only Structs of all FixedVectorType supported");
1318 VectorSplit
VS = *getVectorSplit(ElemTy);
1319 assert(
VS.NumFragments == CV.size());
1321 Value *ConcatenatedVector =
1324 Op->getName() +
".insert");
1327 assert(CV.size() == 1 &&
Op->getType() == CV[0]->getType());
1332 Op->replaceAllUsesWith(Res);
1334 PotentiallyDeadInstrs.emplace_back(
Op);
1348 ScalarizerVisitor Impl(DT,
TTI, Options);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
SmallVector< std::pair< Instruction *, ValueVector * >, 16 > GatherList
static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr)
static bool isStructOfMatchingFixedVectors(Type *Ty)
std::map< std::pair< Value *, Type * >, ValueVector > ScatterMap
SmallVector< Value *, 8 > ValueVector
static Value * concatenate(IRBuilder<> &Builder, ArrayRef< Value * > Fragments, const VectorSplit &VS, Twine Name)
Concatenate the given fragments to a single vector value of the type described in VS.
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
Type * getSrcTy() const
Return the source type, as a convenience.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Type * getDestTy() const
Return the destination type, as a convenience.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
void visit(Iterator Start, Iterator End)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
bool isPointerTy(const Type *T)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.