51#define DEBUG_TYPE "scalarizer"
84 unsigned NumPacked = 0;
88 unsigned NumFragments = 0;
91 Type *SplitTy =
nullptr;
95 Type *RemainderTy =
nullptr;
97 Type *getFragmentType(
unsigned I)
const {
98 return RemainderTy &&
I == NumFragments - 1 ? RemainderTy : SplitTy;
106 Scatterer() =
default;
112 const VectorSplit &VS,
ValueVector *cachePtr =
nullptr);
115 Value *operator[](
unsigned I);
118 unsigned size()
const {
return VS.NumFragments; }
133 FCmpSplitter(
FCmpInst &fci) : FCI(fci) {}
136 const Twine &Name)
const {
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
146 ICmpSplitter(
ICmpInst &ici) : ICI(ici) {}
149 const Twine &Name)
const {
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
158struct UnarySplitter {
162 return Builder.CreateUnOp(UO.getOpcode(),
Op, Name);
170struct BinarySplitter {
174 const Twine &Name)
const {
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
183 VectorLayout() =
default;
186 Align getFragmentAlign(
unsigned Frag) {
204 unsigned StructSize = Ty->getNumContainedTypes();
211 for (
unsigned I = 1;
I < StructSize;
I++) {
222 const VectorSplit &VS,
Twine Name) {
223 unsigned NumElements = VS.VecTy->getNumElements();
227 if (VS.NumPacked > 1) {
230 ExtendMask.
resize(NumElements, -1);
231 for (
unsigned I = 0;
I < VS.NumPacked; ++
I)
234 InsertMask.
resize(NumElements);
235 for (
unsigned I = 0;
I < NumElements; ++
I)
240 for (
unsigned I = 0;
I < VS.NumFragments; ++
I) {
241 Value *Fragment = Fragments[
I];
243 unsigned NumPacked = VS.NumPacked;
244 if (
I == VS.NumFragments - 1 && VS.RemainderTy) {
246 NumPacked = RemVecTy->getNumElements();
251 if (NumPacked == 1) {
252 Res = Builder.CreateInsertElement(Res, Fragment,
I * VS.NumPacked,
253 Name +
".upto" +
Twine(
I));
255 if (NumPacked < VS.NumPacked) {
258 ExtendMask.
resize(NumElements, -1);
261 Fragment = Builder.CreateShuffleVector(
266 for (
unsigned J = 0; J < NumPacked; ++J)
267 InsertMask[
I * VS.NumPacked + J] = NumElements + J;
268 Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
269 Name +
".upto" +
Twine(
I));
270 for (
unsigned J = 0; J < NumPacked; ++J)
271 InsertMask[
I * VS.NumPacked + J] =
I * VS.NumPacked + J;
280class ScalarizerVisitor :
public InstVisitor<ScalarizerVisitor, bool> {
282 ScalarizerVisitor(DominatorTree *DT,
const TargetTransformInfo *TTI,
285 ScalarizeVariableInsertExtract(
Options.ScalarizeVariableInsertExtract),
286 ScalarizeLoadStore(
Options.ScalarizeLoadStore),
287 ScalarizeMinBits(
Options.ScalarizeMinBits) {}
293 bool visitInstruction(Instruction &
I) {
return false; }
294 bool visitSelectInst(SelectInst &SI);
295 bool visitICmpInst(ICmpInst &ICI);
296 bool visitFCmpInst(FCmpInst &FCI);
297 bool visitUnaryOperator(UnaryOperator &UO);
298 bool visitBinaryOperator(BinaryOperator &BO);
299 bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
300 bool visitCastInst(CastInst &CI);
301 bool visitBitCastInst(BitCastInst &BCI);
302 bool visitInsertElementInst(InsertElementInst &IEI);
303 bool visitExtractElementInst(ExtractElementInst &EEI);
304 bool visitExtractValueInst(ExtractValueInst &EVI);
305 bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
306 bool visitPHINode(PHINode &
PHI);
307 bool visitLoadInst(LoadInst &LI);
308 bool visitStoreInst(StoreInst &SI);
309 bool visitCallInst(CallInst &ICI);
310 bool visitFreezeInst(FreezeInst &FI);
313 Scatterer scatter(Instruction *Point,
Value *V,
const VectorSplit &VS);
314 void gather(Instruction *
Op,
const ValueVector &CV,
const VectorSplit &VS);
315 void replaceUses(Instruction *
Op,
Value *CV);
316 bool canTransferMetadata(
unsigned Kind);
317 void transferMetadataAndIRFlags(Instruction *
Op,
const ValueVector &CV);
318 std::optional<VectorSplit> getVectorSplit(
Type *Ty);
319 std::optional<VectorLayout> getVectorLayout(
Type *Ty, Align Alignment,
320 const DataLayout &
DL);
323 template<
typename T>
bool splitUnary(Instruction &,
const T &);
324 template<
typename T>
bool splitBinary(Instruction &,
const T &);
326 bool splitCall(CallInst &CI);
335 const TargetTransformInfo *TTI;
337 const bool ScalarizeVariableInsertExtract;
338 const bool ScalarizeLoadStore;
339 const unsigned ScalarizeMinBits;
345 ScalarizerPassOptions Options;
346 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
347 ScalarizerLegacyPass(
const ScalarizerPassOptions &Options);
349 void getAnalysisUsage(AnalysisUsage &AU)
const override;
357void ScalarizerLegacyPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
363char ScalarizerLegacyPass::ID = 0;
365 "Scalarize vector operations",
false,
false)
372 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
375 Tmp.resize(VS.NumFragments,
nullptr);
377 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
379 "Inconsistent vector sizes");
380 if (VS.NumFragments > CachePtr->size())
381 CachePtr->resize(VS.NumFragments,
nullptr);
386Value *Scatterer::operator[](
unsigned Frag) {
396 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
397 V->
getName() +
".i" + Twine(Frag));
401 Type *FragmentTy = VS.getFragmentType(Frag);
404 SmallVector<int>
Mask;
405 for (
unsigned J = 0; J < VecTy->getNumElements(); ++J)
406 Mask.push_back(Frag * VS.NumPacked + J);
409 V->
getName() +
".i" + Twine(Frag));
422 V =
Insert->getOperand(0);
423 if (Frag * VS.NumPacked == J) {
424 CV[Frag] =
Insert->getOperand(1);
428 if (VS.NumPacked == 1 && !CV[J]) {
432 CV[J] =
Insert->getOperand(1);
435 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
436 V->
getName() +
".i" + Twine(Frag));
442bool ScalarizerLegacyPass::runOnFunction(Function &
F) {
446 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
447 const TargetTransformInfo *
TTI =
448 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
450 return Impl.visit(
F);
454 return new ScalarizerLegacyPass(
Options);
457bool ScalarizerVisitor::visit(
Function &
F) {
470 if (
Done &&
I->getType()->isVoidTy()) {
471 I->eraseFromParent();
481Scatterer ScalarizerVisitor::scatter(Instruction *Point,
Value *V,
482 const VectorSplit &VS) {
488 return Scatterer(BB, BB->
begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
505 &Scattered[{
V,
VS.SplitTy}]);
516void ScalarizerVisitor::gather(Instruction *
Op,
const ValueVector &CV,
517 const VectorSplit &VS) {
518 transferMetadataAndIRFlags(
Op, CV);
524 for (
unsigned I = 0,
E = SV.
size();
I !=
E; ++
I) {
526 if (V ==
nullptr || SV[
I] == CV[
I])
531 CV[
I]->takeName(Old);
533 PotentiallyDeadInstrs.emplace_back(Old);
541void ScalarizerVisitor::replaceUses(Instruction *
Op,
Value *CV) {
543 Op->replaceAllUsesWith(CV);
544 PotentiallyDeadInstrs.emplace_back(
Op);
551bool ScalarizerVisitor::canTransferMetadata(
unsigned Tag) {
552 return (
Tag == LLVMContext::MD_tbaa
553 ||
Tag == LLVMContext::MD_fpmath
554 ||
Tag == LLVMContext::MD_tbaa_struct
555 ||
Tag == LLVMContext::MD_invariant_load
556 ||
Tag == LLVMContext::MD_alias_scope
557 ||
Tag == LLVMContext::MD_noalias
558 ||
Tag == LLVMContext::MD_mem_parallel_loop_access
559 ||
Tag == LLVMContext::MD_access_group);
564void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *
Op,
567 Op->getAllMetadataOtherThanDebugLoc(MDs);
568 for (
Value *V : CV) {
570 for (
const auto &MD : MDs)
571 if (canTransferMetadata(MD.first))
572 New->setMetadata(MD.first, MD.second);
573 New->copyIRFlags(
Op);
574 if (
Op->getDebugLoc() && !
New->getDebugLoc())
575 New->setDebugLoc(
Op->getDebugLoc());
581std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(
Type *Ty) {
587 unsigned NumElems =
Split.VecTy->getNumElements();
588 Type *ElemTy =
Split.VecTy->getElementType();
593 Split.NumFragments = NumElems;
594 Split.SplitTy = ElemTy;
597 if (
Split.NumPacked >= NumElems)
603 unsigned RemainderElems = NumElems %
Split.NumPacked;
604 if (RemainderElems > 1)
606 else if (RemainderElems == 1)
607 Split.RemainderTy = ElemTy;
616std::optional<VectorLayout>
617ScalarizerVisitor::getVectorLayout(
Type *Ty, Align Alignment,
618 const DataLayout &
DL) {
619 std::optional<VectorSplit>
VS = getVectorSplit(Ty);
626 if (!
DL.typeSizeEqualsStoreSize(
VS->SplitTy) ||
627 (
VS->RemainderTy && !
DL.typeSizeEqualsStoreSize(
VS->RemainderTy)))
629 Layout.VecAlign = Alignment;
630 Layout.SplitSize =
DL.getTypeStoreSize(
VS->SplitTy);
636template<
typename Splitter>
637bool ScalarizerVisitor::splitUnary(Instruction &
I,
const Splitter &Split) {
638 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
642 std::optional<VectorSplit> OpVS;
643 if (
I.getOperand(0)->getType() ==
I.getType()) {
646 OpVS = getVectorSplit(
I.getOperand(0)->getType());
647 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
652 Scatterer
Op = scatter(&
I,
I.getOperand(0), *OpVS);
653 assert(
Op.size() ==
VS->NumFragments &&
"Mismatched unary operation");
656 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag)
657 Res[Frag] =
Split(Builder,
Op[Frag],
I.getName() +
".i" + Twine(Frag));
658 gather(&
I, Res, *VS);
664template<
typename Splitter>
665bool ScalarizerVisitor::splitBinary(Instruction &
I,
const Splitter &Split) {
666 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
670 std::optional<VectorSplit> OpVS;
671 if (
I.getOperand(0)->getType() ==
I.getType()) {
674 OpVS = getVectorSplit(
I.getOperand(0)->getType());
675 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
680 Scatterer VOp0 = scatter(&
I,
I.getOperand(0), *OpVS);
681 Scatterer VOp1 = scatter(&
I,
I.getOperand(1), *OpVS);
682 assert(VOp0.size() ==
VS->NumFragments &&
"Mismatched binary operation");
683 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched binary operation");
686 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag) {
687 Value *Op0 = VOp0[Frag];
688 Value *Op1 = VOp1[Frag];
689 Res[Frag] =
Split(Builder, Op0, Op1,
I.getName() +
".i" + Twine(Frag));
691 gather(&
I, Res, *VS);
697bool ScalarizerVisitor::splitCall(CallInst &CI) {
700 std::optional<VectorSplit>
VS;
701 if (AreAllVectorsOfMatchingSize)
704 VS = getVectorSplit(CallType);
722 SmallVector<int> OverloadIdx(NumArgs, -1);
729 if (AreAllVectorsOfMatchingSize) {
731 std::optional<VectorSplit> CurrVS =
739 if (!CurrVS || CurrVS->NumPacked !=
VS->NumPacked)
747 for (
unsigned I = 0;
I != NumArgs; ++
I) {
749 if ([[maybe_unused]]
auto *OpVecTy =
751 assert(OpVecTy->getNumElements() ==
VS->VecTy->getNumElements());
752 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->
getType());
753 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
764 Scattered[
I] = scatter(&CI, OpI, *OpVS);
766 OverloadIdx[
I] = Tys.
size();
770 ScalarOperands[
I] = OpI;
784 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
785 bool IsRemainder =
I ==
VS->NumFragments - 1 &&
VS->RemainderTy;
786 ScalarCallOps.clear();
789 Tys[0] =
VS->RemainderTy;
791 for (
unsigned J = 0; J != NumArgs; ++J) {
793 ScalarCallOps.push_back(ScalarOperands[J]);
795 ScalarCallOps.push_back(Scattered[J][
I]);
796 if (IsRemainder && OverloadIdx[J] >= 0)
797 Tys[OverloadIdx[J]] = Scattered[J][
I]->getType();
804 Res[
I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
808 gather(&CI, Res, *VS);
812bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
813 std::optional<VectorSplit>
VS = getVectorSplit(
SI.getType());
817 std::optional<VectorSplit> CondVS;
819 CondVS = getVectorSplit(
SI.getCondition()->getType());
820 if (!CondVS || CondVS->NumPacked !=
VS->NumPacked) {
827 Scatterer VOp1 = scatter(&SI,
SI.getOperand(1), *VS);
828 Scatterer VOp2 = scatter(&SI,
SI.getOperand(2), *VS);
829 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched select");
830 assert(VOp2.size() ==
VS->NumFragments &&
"Mismatched select");
835 Scatterer VOp0 = scatter(&SI,
SI.getOperand(0), *CondVS);
836 assert(VOp0.size() == CondVS->NumFragments &&
"Mismatched select");
837 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
841 Res[
I] = Builder.CreateSelect(Op0, Op1, Op2,
842 SI.getName() +
".i" + Twine(
I));
846 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
849 Res[
I] = Builder.CreateSelect(Op0, Op1, Op2,
850 SI.getName() +
".i" + Twine(
I));
853 gather(&SI, Res, *VS);
857bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
858 return splitBinary(ICI, ICmpSplitter(ICI));
861bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
862 return splitBinary(FCI, FCmpSplitter(FCI));
865bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
866 return splitUnary(UO, UnarySplitter(UO));
869bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
870 return splitBinary(BO, BinarySplitter(BO));
873bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
874 std::optional<VectorSplit>
VS = getVectorSplit(GEPI.
getType());
882 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
885 for (
unsigned I = 0;
I < 1 + NumIndices; ++
I) {
888 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
889 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
893 ScatterOps[
I] = scatter(&GEPI, GEPI.
getOperand(
I), *OpVS);
901 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
902 SmallVector<Value *, 8> SplitOps;
903 SplitOps.
resize(1 + NumIndices);
904 for (
unsigned J = 0; J < 1 + NumIndices; ++J) {
906 SplitOps[J] = ScalarOps[J];
908 SplitOps[J] = ScatterOps[J][
I];
915 NewGEPI->setIsInBounds();
917 gather(&GEPI, Res, *VS);
921bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
922 std::optional<VectorSplit> DestVS = getVectorSplit(CI.
getDestTy());
926 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.
getSrcTy());
927 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
931 Scatterer Op0 = scatter(&CI, CI.
getOperand(0), *SrcVS);
932 assert(Op0.size() == SrcVS->NumFragments &&
"Mismatched cast");
934 Res.
resize(DestVS->NumFragments);
935 for (
unsigned I = 0;
I < DestVS->NumFragments; ++
I)
937 Builder.CreateCast(CI.
getOpcode(), Op0[
I], DestVS->getFragmentType(
I),
939 gather(&CI, Res, *DestVS);
943bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
944 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.
getDestTy());
945 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.
getSrcTy());
946 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
949 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
955 Scatterer Op0 = scatter(&BCI, BCI.
getOperand(0), *SrcVS);
957 Res.
resize(DstVS->NumFragments);
959 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
960 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
963 assert(DstVS->NumFragments == SrcVS->NumFragments);
964 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
965 Res[
I] = Builder.CreateBitCast(Op0[
I], DstVS->getFragmentType(
I),
968 }
else if (SrcSplitBits % DstSplitBits == 0) {
972 MidVS.NumPacked = DstVS->NumPacked;
973 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
975 MidVS.NumPacked * MidVS.NumFragments);
976 MidVS.SplitTy = DstVS->SplitTy;
979 for (
unsigned I = 0;
I < SrcVS->NumFragments; ++
I) {
986 VI->getOpcode() == Instruction::BitCast)
987 V =
VI->getOperand(0);
989 V = Builder.CreateBitCast(V, MidVS.VecTy,
V->getName() +
".cast");
991 Scatterer Mid = scatter(&BCI, V, MidVS);
992 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
993 Res[ResI++] = Mid[J];
995 }
else if (DstSplitBits % SrcSplitBits == 0) {
999 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
1000 MidVS.NumPacked = SrcVS->NumPacked;
1002 MidVS.NumPacked * MidVS.NumFragments);
1003 MidVS.SplitTy = SrcVS->SplitTy;
1006 SmallVector<Value *, 8> ConcatOps;
1007 ConcatOps.
resize(MidVS.NumFragments);
1008 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
1009 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
1010 ConcatOps[J] = Op0[SrcI++];
1013 Res[
I] = Builder.CreateBitCast(V, DstVS->getFragmentType(
I),
1020 gather(&BCI, Res, *DstVS);
1024bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1025 std::optional<VectorSplit>
VS = getVectorSplit(IEI.
getType());
1030 Scatterer Op0 = scatter(&IEI, IEI.
getOperand(0), *VS);
1038 unsigned Idx = CI->getZExtValue();
1039 unsigned Fragment = Idx /
VS->NumPacked;
1040 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1041 if (
I == Fragment) {
1043 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1044 !
VS->RemainderTy->isVectorTy())
1048 Builder.CreateInsertElement(Op0[
I], NewElt, Idx %
VS->NumPacked);
1058 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1061 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1062 Value *ShouldReplace =
1063 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->
getType(),
I),
1064 InsIdx->
getName() +
".is." + Twine(
I));
1066 Res[
I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1071 gather(&IEI, Res, *VS);
1075bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1077 Type *OpTy =
Op->getType();
1093 std::optional<VectorSplit>
VS = getVectorSplit(VecType);
1097 std::optional<VectorSplit> CurrVS =
1105 if (!CurrVS || CurrVS->NumPacked !=
VS->NumPacked)
1109 Scatterer Op0 = scatter(&EVI,
Op, *VS);
1114 Value *ResElem = Builder.CreateExtractValue(
1115 Op0[
OpIdx], Index, EVI.
getName() +
".elem" + Twine(Index));
1120 std::optional<VectorSplit> AVS = getVectorSplit(ActualVecType);
1121 gather(&EVI, Res, *AVS);
1125bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1131 Scatterer Op0 = scatter(&EEI, EEI.
getOperand(0), *VS);
1135 unsigned Idx = CI->getZExtValue();
1136 unsigned Fragment = Idx /
VS->NumPacked;
1137 Value *Res = Op0[Fragment];
1139 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1140 !
VS->RemainderTy->isVectorTy())
1143 Res = Builder.CreateExtractElement(Res, Idx %
VS->NumPacked);
1144 replaceUses(&EEI, Res);
1149 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1153 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1154 Value *ShouldExtract =
1155 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->
getType(),
I),
1156 ExtIdx->
getName() +
".is." + Twine(
I));
1158 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1159 EEI.
getName() +
".upto" + Twine(
I));
1161 replaceUses(&EEI, Res);
1165bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1166 std::optional<VectorSplit>
VS = getVectorSplit(SVI.
getType());
1167 std::optional<VectorSplit> VSOp =
1169 if (!VS || !VSOp ||
VS->NumPacked > 1 || VSOp->NumPacked > 1)
1172 Scatterer Op0 = scatter(&SVI, SVI.
getOperand(0), *VSOp);
1173 Scatterer Op1 = scatter(&SVI, SVI.
getOperand(1), *VSOp);
1177 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1181 else if (
unsigned(Selector) < Op0.size())
1182 Res[
I] = Op0[Selector];
1184 Res[
I] = Op1[Selector - Op0.size()];
1186 gather(&SVI, Res, *VS);
1190bool ScalarizerVisitor::visitPHINode(PHINode &
PHI) {
1191 std::optional<VectorSplit>
VS = getVectorSplit(
PHI.getType());
1200 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1201 Res[
I] = Builder.CreatePHI(
VS->getFragmentType(
I),
NumOps,
1202 PHI.getName() +
".i" + Twine(
I));
1206 Scatterer
Op = scatter(&
PHI,
PHI.getIncomingValue(
I), *VS);
1208 for (
unsigned J = 0; J <
VS->NumFragments; ++J)
1211 gather(&
PHI, Res, *VS);
1215bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1216 if (!ScalarizeLoadStore)
1221 std::optional<VectorLayout> Layout = getVectorLayout(
1229 Res.
resize(Layout->VS.NumFragments);
1231 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1232 Res[
I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(
I), Ptr[
I],
1233 Align(Layout->getFragmentAlign(
I)),
1236 gather(&LI, Res, Layout->VS);
1240bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1241 if (!ScalarizeLoadStore)
1246 Value *FullValue =
SI.getValueOperand();
1247 std::optional<VectorLayout> Layout = getVectorLayout(
1248 FullValue->
getType(),
SI.getAlign(),
SI.getDataLayout());
1253 Scatterer VPtr = scatter(&SI,
SI.getPointerOperand(), Layout->VS);
1254 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1257 Stores.
resize(Layout->VS.NumFragments);
1258 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1262 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(
I));
1264 transferMetadataAndIRFlags(&SI, Stores);
1268bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1269 return splitCall(CI);
1272bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1280bool ScalarizerVisitor::finish() {
1283 if (Gathered.
empty() && Scattered.empty() && !Scalarized)
1285 for (
const auto &GMI : Gathered) {
1288 if (!
Op->use_empty()) {
1298 VectorSplit
VS = *getVectorSplit(Ty);
1311 unsigned NumOfStructElements = Ty->getNumElements();
1313 for (
unsigned I = 0;
I < NumOfStructElements; ++
I) {
1314 for (
auto *CVelem : CV) {
1316 CVelem,
I,
Op->getName() +
".elem" + Twine(
I));
1317 ElemCV[
I].push_back(Elem);
1321 for (
unsigned I = 0;
I < NumOfStructElements; ++
I) {
1322 Type *ElemTy = Ty->getElementType(
I);
1324 "Only Structs of all FixedVectorType supported");
1325 VectorSplit
VS = *getVectorSplit(ElemTy);
1326 assert(
VS.NumFragments == CV.size());
1328 Value *ConcatenatedVector =
1331 Op->getName() +
".insert");
1334 assert(CV.size() == 1 &&
Op->getType() == CV[0]->getType());
1339 Op->replaceAllUsesWith(Res);
1341 PotentiallyDeadInstrs.emplace_back(
Op);
1355 ScalarizerVisitor Impl(DT,
TTI, Options);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
SmallVector< std::pair< Instruction *, ValueVector * >, 16 > GatherList
static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr)
static bool isStructOfMatchingFixedVectors(Type *Ty)
std::map< std::pair< Value *, Type * >, ValueVector > ScatterMap
SmallVector< Value *, 8 > ValueVector
static Value * concatenate(IRBuilder<> &Builder, ArrayRef< Value * > Fragments, const VectorSplit &VS, Twine Name)
Concatenate the given fragments to a single vector value of the type described in VS.
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
Type * getSrcTy() const
Return the source type, as a convenience.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Type * getDestTy() const
Return the destination type, as a convenience.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
LLVM_ABI bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
void visit(Iterator Start, Iterator End)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
bool isPointerTy(const Type *T)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.