41 #define DEBUG_TYPE "scalarize-masked-mem-intrin"
45 class ScalarizeMaskedMemIntrinLegacyPass :
public FunctionPass {
57 return "Scalarize Masked Memory Intrinsics";
78 "Scalarize unsupported masked memory intrinsics",
false,
87 return new ScalarizeMaskedMemIntrinLegacyPass();
95 unsigned NumElts = cast<FixedVectorType>(
Mask->getType())->getNumElements();
96 for (
unsigned i = 0;
i != NumElts; ++
i) {
98 if (!CElt || !isa<ConstantInt>(CElt))
107 return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
149 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
158 Builder.SetInsertPoint(InsertPt);
162 if (isa<Constant>(
Mask) && cast<Constant>(
Mask)->isAllOnesValue()) {
170 const Align AdjustedAlignVal =
175 Value *FirstEltPtr =
Builder.CreateBitCast(Ptr, NewPtrType);
176 unsigned VectorWidth = cast<FixedVectorType>(
VecType)->getNumElements();
179 Value *VResult = Src0;
182 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
183 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
185 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
187 VResult =
Builder.CreateInsertElement(VResult,
Load, Idx);
197 if (VectorWidth != 1) {
199 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
202 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
211 if (VectorWidth != 1) {
215 Builder.getIntN(VectorWidth, 0));
231 CondBlock->
setName(
"cond.load");
234 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
242 IfBlock = NewIfBlock;
245 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
291 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
292 auto *
VecType = cast<VectorType>(Src->getType());
298 Builder.SetInsertPoint(InsertPt);
302 if (isa<Constant>(
Mask) && cast<Constant>(
Mask)->isAllOnesValue()) {
303 Builder.CreateAlignedStore(Src, Ptr, AlignVal);
309 const Align AdjustedAlignVal =
314 Value *FirstEltPtr =
Builder.CreateBitCast(Ptr, NewPtrType);
315 unsigned VectorWidth = cast<FixedVectorType>(
VecType)->getNumElements();
318 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
319 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
322 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
323 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
332 if (VectorWidth != 1) {
334 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
337 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
345 if (VectorWidth != 1) {
349 Builder.getIntN(VectorWidth, 0));
365 CondBlock->
setName(
"cond.store");
369 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
370 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
376 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
425 Builder.SetInsertPoint(InsertPt);
426 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
431 Value *VResult = Src0;
432 unsigned VectorWidth =
VecType->getNumElements();
436 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
437 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
441 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal,
"Load" +
Twine(Idx));
453 if (VectorWidth != 1) {
455 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
458 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
467 if (VectorWidth != 1) {
471 Builder.getIntN(VectorWidth, 0));
487 CondBlock->
setName(
"cond.load");
492 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal,
"Load" +
Twine(Idx));
500 IfBlock = NewIfBlock;
503 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
549 auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
552 isa<VectorType>(Ptrs->
getType()) &&
553 isa<PointerType>(cast<VectorType>(Ptrs->
getType())->getElementType()) &&
554 "Vector of pointers is expected in masked scatter intrinsic");
558 Builder.SetInsertPoint(InsertPt);
561 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
562 unsigned VectorWidth = SrcFVTy->getNumElements();
566 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
567 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
570 Builder.CreateExtractElement(Src, Idx,
"Elt" +
Twine(Idx));
572 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
581 if (VectorWidth != 1) {
583 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
586 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
594 if (VectorWidth != 1) {
598 Builder.getIntN(VectorWidth, 0));
614 CondBlock->
setName(
"cond.store");
619 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
625 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
646 Builder.SetInsertPoint(InsertPt);
649 unsigned VectorWidth =
VecType->getNumElements();
652 Value *VResult = PassThru;
661 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
663 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue()) {
665 ShuffleMask[Idx] = Idx + VectorWidth;
669 InsertElt =
Builder.CreateAlignedLoad(EltTy, NewPtr,
Align(1),
670 "Load" +
Twine(Idx));
671 ShuffleMask[Idx] = Idx;
674 VResult =
Builder.CreateInsertElement(VResult, InsertElt, Idx,
677 VResult =
Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
686 if (VectorWidth != 1) {
688 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
691 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
700 if (VectorWidth != 1) {
704 Builder.getIntN(VectorWidth, 0));
720 CondBlock->
setName(
"cond.load");
728 if ((Idx + 1) != VectorWidth)
729 NewPtr =
Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
735 IfBlock = NewIfBlock;
738 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
745 if ((Idx + 1) != VectorWidth) {
766 auto *
VecType = cast<FixedVectorType>(Src->getType());
772 Builder.SetInsertPoint(InsertPt);
777 unsigned VectorWidth =
VecType->getNumElements();
782 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
783 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
786 Builder.CreateExtractElement(Src, Idx,
"Elt" +
Twine(Idx));
798 if (VectorWidth != 1) {
800 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
803 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
810 if (VectorWidth != 1) {
814 Builder.getIntN(VectorWidth, 0));
830 CondBlock->
setName(
"cond.store");
838 if ((Idx + 1) != VectorWidth)
839 NewPtr =
Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
845 IfBlock = NewIfBlock;
847 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
850 if ((Idx + 1) != VectorWidth) {
868 bool EverMadeChange =
false;
869 bool MadeChange =
true;
870 auto &
DL =
F.getParent()->getDataLayout();
874 bool ModifiedDTOnIteration =
false;
879 if (ModifiedDTOnIteration)
883 EverMadeChange |= MadeChange;
885 return EverMadeChange;
889 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
891 if (
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
892 DT = &DTWP->getDomTree();
911 bool MadeChange =
false;
914 while (CurInstIterator !=
BB.end()) {
915 if (
CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
930 if (isa<ScalableVectorType>(II->
getType()) ||
932 [](
Value *V) { return isa<ScalableVectorType>(V->getType()); }))
938 case Intrinsic::masked_load:
946 case Intrinsic::masked_store:
953 case Intrinsic::masked_gather: {
955 cast<ConstantInt>(CI->
getArgOperand(1))->getMaybeAlignValue();
957 Align Alignment =
DL.getValueOrABITypeAlignment(MA,
965 case Intrinsic::masked_scatter: {
967 cast<ConstantInt>(CI->
getArgOperand(2))->getMaybeAlignValue();
969 Align Alignment =
DL.getValueOrABITypeAlignment(MA,
978 case Intrinsic::masked_expandload:
983 case Intrinsic::masked_compressstore: