57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.getDebugLoc());
102 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
107 if (VectorID == Intrinsic::assume ||
108 VectorID == Intrinsic::lifetime_end ||
109 VectorID == Intrinsic::lifetime_start ||
110 VectorID == Intrinsic::sideeffect ||
111 VectorID == Intrinsic::pseudoprobe) {
116 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
117 VectorID != Intrinsic::pseudoprobe;
121 Ingredient.getDebugLoc());
124 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
125 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
129 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
133 *VPI, Ingredient.getDebugLoc());
137 "inductions must be created earlier");
146 "Only recpies with zero or one defined values expected");
147 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
183 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
204 L(L), TypeInfo(TypeInfo) {}
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
255template <
unsigned Opcode>
260 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
261 "Only Load and Store opcodes supported");
262 constexpr bool IsLoad = (Opcode == Instruction::Load);
269 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
273 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
276 RecipesByAddress[AddrSCEV].push_back(RepR);
281 for (
auto &Group :
Groups) {
296 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
308 if (Candidate->getParent() == SinkTo ||
313 if (!ScalarVFOnly && RepR->isSingleScalar())
316 WorkList.
insert({SinkTo, Candidate});
328 for (
auto &Recipe : *VPBB)
330 InsertIfValidSinkCandidate(VPBB,
Op);
334 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
337 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
342 auto UsersOutsideSinkTo =
344 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
346 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
347 return !U->usesFirstLaneOnly(SinkCandidate);
350 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
352 if (NeedsDuplicating) {
356 if (
auto *SinkCandidateRepR =
362 nullptr , *SinkCandidateRepR,
366 Clone = SinkCandidate->
clone();
376 InsertIfValidSinkCandidate(SinkTo,
Op);
386 if (!EntryBB || EntryBB->size() != 1 ||
396 if (EntryBB->getNumSuccessors() != 2)
401 if (!Succ0 || !Succ1)
404 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
406 if (Succ0->getSingleSuccessor() == Succ1)
408 if (Succ1->getSingleSuccessor() == Succ0)
425 if (!Region1->isReplicator())
427 auto *MiddleBasicBlock =
429 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
434 if (!Region2 || !Region2->isReplicator())
439 if (!Mask1 || Mask1 != Mask2)
442 assert(Mask1 && Mask2 &&
"both region must have conditions");
448 if (TransformedRegions.
contains(Region1))
455 if (!Then1 || !Then2)
475 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
481 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
482 Phi1ToMove.eraseFromParent();
485 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
499 TransformedRegions.
insert(Region1);
502 return !TransformedRegions.
empty();
510 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
511 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
512 auto *BlockInMask = PredRecipe->
getMask();
533 Region->setParent(ParentRegion);
539 RecipeWithoutMask->getDebugLoc());
540 Exiting->appendRecipe(PHIRecipe);
553 if (RepR->isPredicated())
572 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
584 if (!VPBB->getParent())
588 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
597 R.moveBefore(*PredVPBB, PredVPBB->
end());
599 auto *ParentRegion = VPBB->getParent();
600 if (ParentRegion && ParentRegion->getExiting() == VPBB)
601 ParentRegion->setExiting(PredVPBB);
605 return !WorkList.
empty();
612 bool ShouldSimplify =
true;
613 while (ShouldSimplify) {
629 if (!
IV ||
IV->getTruncInst())
644 for (
auto *U : FindMyCast->
users()) {
646 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
647 FoundUserCast = UserCast;
654 FindMyCast = FoundUserCast;
656 if (FindMyCast !=
IV)
671 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
681 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
687 if (ResultTy != StepTy) {
694 Builder.setInsertPoint(VecPreheader);
695 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
697 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
723 WideCanIV->getDebugLoc(), Builder));
724 WideCanIV->eraseFromParent();
736 if (!WidenIV || !WidenIV->isCanonical())
740 WidenIV->dropPoisonGeneratingFlags();
741 WideCanIV->replaceAllUsesWith(WidenIV);
742 WideCanIV->eraseFromParent();
751 if (PHICost > BroadcastCost)
760 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
772 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
773 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
774 WideCanIV->replaceAllUsesWith(NewWideIV);
775 WideCanIV->eraseFromParent();
783 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
785 if (IsConditionalAssume)
788 if (R.mayHaveSideEffects())
792 return all_of(R.definedValues(),
793 [](
VPValue *V) { return V->getNumUsers() == 0; });
813 VPUser *PhiUser = PhiR->getSingleUser();
819 PhiR->replaceAllUsesWith(Start);
820 PhiR->eraseFromParent();
828 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
831 Users.insert_range(V->users());
833 return Users.takeVector();
847 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
884 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
885 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
898 Def->operands(),
true,
900 Clone->insertAfter(Def);
901 Def->replaceAllUsesWith(Clone);
912 PtrIV->replaceAllUsesWith(PtrAdd);
919 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
920 return U->usesScalars(WideIV);
926 Plan,
ID.getKind(),
ID.getInductionOpcode(),
928 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
929 WideIV->getDebugLoc(), Builder);
932 if (!HasOnlyVectorVFs) {
934 "plans containing a scalar VF cannot also include scalable VFs");
935 WideIV->replaceAllUsesWith(Steps);
938 WideIV->replaceUsesWithIf(Steps,
939 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
941 return U.usesFirstLaneOnly(WideIV);
942 return U.usesScalars(WideIV);
958 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
963 if (!Def || Def->getNumOperands() != 2)
971 auto IsWideIVInc = [&]() {
972 auto &
ID = WideIV->getInductionDescriptor();
975 VPValue *IVStep = WideIV->getStepValue();
976 switch (
ID.getInductionOpcode()) {
977 case Instruction::Add:
979 case Instruction::FAdd:
981 case Instruction::FSub:
984 case Instruction::Sub: {
1004 return IsWideIVInc() ? WideIV :
nullptr;
1023 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1034 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1036 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1037 FirstActiveLaneType,
DL);
1038 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1043 if (Incoming != WideIV) {
1045 EndValue =
B.createAdd(EndValue, One,
DL);
1048 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1050 VPIRValue *Start = WideIV->getStartValue();
1051 VPValue *Step = WideIV->getStepValue();
1052 EndValue =
B.createDerivedIV(
1054 Start, EndValue, Step);
1069 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1076 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1079 Start, VectorTC, Step);
1108 assert(EndValue &&
"Must have computed the end value up front");
1113 if (Incoming != WideIV)
1125 auto *Zero = Plan.
getZero(StepTy);
1126 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1131 return B.createNaryOp(
1132 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1134 : Instruction::FAdd,
1135 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1147 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1156 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1157 EndValues[WideIV] = EndValue;
1167 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1168 R.eraseFromParent();
1177 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1179 if (PredVPBB == MiddleVPBB)
1181 Plan, TypeInfo, ExitIRI->getOperand(Idx), EndValues, PSE);
1184 Plan, TypeInfo, ExitIRI->getOperand(Idx), PSE);
1186 ExitIRI->setOperand(Idx, Escape);
1203 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1206 ExpR->replaceAllUsesWith(V->second);
1207 ExpR->eraseFromParent();
1216 while (!WorkList.
empty()) {
1218 if (!Seen.
insert(Cur).second)
1226 R->eraseFromParent();
1233static std::optional<std::pair<bool, unsigned>>
1236 std::optional<std::pair<bool, unsigned>>>(R)
1239 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1241 return std::make_pair(
true,
I->getVectorIntrinsicID());
1243 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1249 I->getVPRecipeID());
1251 .
Default([](
auto *) {
return std::nullopt; });
1269 Value *V =
Op->getUnderlyingValue();
1275 auto FoldToIRValue = [&]() ->
Value * {
1277 if (OpcodeOrIID->first) {
1278 if (R.getNumOperands() != 2)
1280 unsigned ID = OpcodeOrIID->second;
1281 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1284 unsigned Opcode = OpcodeOrIID->second;
1293 return Folder.FoldSelect(
Ops[0],
Ops[1],
1296 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1298 case Instruction::Select:
1299 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1300 case Instruction::ICmp:
1301 case Instruction::FCmp:
1304 case Instruction::GetElementPtr: {
1307 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1317 case Instruction::ExtractElement:
1324 if (
Value *V = FoldToIRValue())
1325 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1331 VPlan *Plan = Def->getParent()->getPlan();
1337 return Def->replaceAllUsesWith(V);
1343 PredPHI->replaceAllUsesWith(
Op);
1356 bool CanCreateNewRecipe =
1363 if (TruncTy == ATy) {
1364 Def->replaceAllUsesWith(
A);
1373 : Instruction::ZExt;
1376 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1378 Ext->setUnderlyingValue(UnderlyingExt);
1380 Def->replaceAllUsesWith(Ext);
1382 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1383 Def->replaceAllUsesWith(Trunc);
1391 for (
VPUser *U :
A->users()) {
1393 for (
VPValue *VPV : R->definedValues())
1407 Def->replaceAllUsesWith(
X);
1408 Def->eraseFromParent();
1414 return Def->replaceAllUsesWith(
1419 return Def->replaceAllUsesWith(
X);
1423 return Def->replaceAllUsesWith(
1428 return Def->replaceAllUsesWith(
1433 return Def->replaceAllUsesWith(
X);
1437 return Def->replaceAllUsesWith(Plan->
getFalse());
1441 return Def->replaceAllUsesWith(
X);
1444 if (CanCreateNewRecipe &&
1449 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1450 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1451 return Def->replaceAllUsesWith(
1452 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1457 return Def->replaceAllUsesWith(Def->getOperand(1));
1462 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1466 return Def->replaceAllUsesWith(Plan->
getFalse());
1469 return Def->replaceAllUsesWith(
X);
1473 if (CanCreateNewRecipe &&
1475 return Def->replaceAllUsesWith(Builder.createNot(
C));
1479 Def->setOperand(0,
C);
1480 Def->setOperand(1,
Y);
1481 Def->setOperand(2,
X);
1486 if (CanCreateNewRecipe &&
1491 return Def->replaceAllUsesWith(
1492 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1495 return Def->replaceAllUsesWith(
A);
1498 return Def->replaceAllUsesWith(
A);
1501 return Def->replaceAllUsesWith(
1508 return Def->replaceAllUsesWith(
1510 Def->getDebugLoc(),
"", NW));
1513 if (CanCreateNewRecipe &&
1521 ->hasNoSignedWrap()};
1522 return Def->replaceAllUsesWith(
1523 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1529 return Def->replaceAllUsesWith(Builder.createNaryOp(
1531 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1536 return Def->replaceAllUsesWith(Builder.createNaryOp(
1538 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1543 return Def->replaceAllUsesWith(
A);
1558 R->setOperand(1,
Y);
1559 R->setOperand(2,
X);
1563 R->replaceAllUsesWith(Cmp);
1568 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1569 Cmp->setDebugLoc(Def->getDebugLoc());
1581 if (
Op->getNumUsers() > 1 ||
1585 }
else if (!UnpairedCmp) {
1586 UnpairedCmp =
Op->getDefiningRecipe();
1590 UnpairedCmp =
nullptr;
1597 if (NewOps.
size() < Def->getNumOperands()) {
1599 return Def->replaceAllUsesWith(NewAnyOf);
1606 if (CanCreateNewRecipe &&
1612 return Def->replaceAllUsesWith(NewCmp);
1620 return Def->replaceAllUsesWith(Def->getOperand(1));
1626 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1627 Def->replaceAllUsesWith(
X);
1637 Def->setOperand(1, Def->getOperand(0));
1638 Def->setOperand(0,
Y);
1645 return Def->replaceAllUsesWith(Def->getOperand(0));
1651 Def->replaceAllUsesWith(
1652 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1657 return Def->replaceAllUsesWith(
X);
1660 return Def->replaceAllUsesWith(
A);
1663 return Def->replaceAllUsesWith(
A);
1669 Def->replaceAllUsesWith(
1670 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1677 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1682 Def->replaceAllUsesWith(
1692 "broadcast operand must be single-scalar");
1693 Def->setOperand(0,
C);
1698 return Def->replaceUsesWithIf(
1699 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1702 if (Def->getNumOperands() == 1) {
1703 Def->replaceAllUsesWith(Def->getOperand(0));
1708 Phi->replaceAllUsesWith(Phi->getOperand(0));
1714 if (Def->getNumOperands() == 1 &&
1716 return Def->replaceAllUsesWith(IRV);
1729 return Def->replaceAllUsesWith(
A);
1732 Def->replaceAllUsesWith(Builder.createNaryOp(
1733 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1747 auto *IVInc = Def->getOperand(0);
1748 if (IVInc->getNumUsers() == 2) {
1753 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1754 Def->replaceAllUsesWith(IVInc);
1756 Inc->replaceAllUsesWith(Phi);
1757 Phi->setOperand(0,
Y);
1773 Steps->replaceAllUsesWith(Steps->getOperand(0));
1781 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1783 return PhiR && PhiR->isInLoop();
1789 return Def->replaceAllUsesWith(
A);
1816 while (!Worklist.
empty()) {
1825 R->replaceAllUsesWith(
1826 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1830static std::optional<Instruction::BinaryOps>
1833 case Intrinsic::masked_udiv:
1834 return Instruction::UDiv;
1835 case Intrinsic::masked_sdiv:
1836 return Instruction::SDiv;
1837 case Intrinsic::masked_urem:
1838 return Instruction::URem;
1839 case Intrinsic::masked_srem:
1840 return Instruction::SRem;
1857 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1861 if (RepR && RepR->getOpcode() == Instruction::Store &&
1864 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1865 true ,
nullptr , *RepR ,
1866 *RepR , RepR->getDebugLoc());
1867 Clone->insertBefore(RepOrWidenR);
1869 VPValue *ExtractOp = Clone->getOperand(0);
1875 Clone->setOperand(0, ExtractOp);
1876 RepR->eraseFromParent();
1888 VPValue *SafeDivisor = Builder.createSelect(
1889 IntrR->getOperand(2), IntrR->getOperand(1),
1891 VPValue *Clone = Builder.createNaryOp(
1892 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1895 IntrR->eraseFromParent();
1904 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1913 return !U->usesScalars(
Op);
1917 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1920 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1921 IntroducesBCastOf(Op)))
1925 auto *IRV = dyn_cast<VPIRValue>(Op);
1926 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1927 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1928 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1933 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1934 true ,
nullptr, *RepOrWidenR);
1935 Clone->insertBefore(RepOrWidenR);
1936 RepOrWidenR->replaceAllUsesWith(Clone);
1938 RepOrWidenR->eraseFromParent();
1974 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1975 UniqueValues.
insert(Blend->getIncomingValue(0));
1976 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1978 UniqueValues.
insert(Blend->getIncomingValue(
I));
1980 if (UniqueValues.
size() == 1) {
1981 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1982 Blend->eraseFromParent();
1986 if (Blend->isNormalized())
1992 unsigned StartIndex = 0;
1993 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1998 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
2005 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2007 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2008 if (
I == StartIndex)
2010 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2011 OperandsWithMask.
push_back(Blend->getMask(
I));
2016 OperandsWithMask, *Blend, Blend->getDebugLoc());
2017 NewBlend->insertBefore(&R);
2019 VPValue *DeadMask = Blend->getMask(StartIndex);
2021 Blend->eraseFromParent();
2026 if (NewBlend->getNumOperands() == 3 &&
2028 VPValue *Inc0 = NewBlend->getOperand(0);
2029 VPValue *Inc1 = NewBlend->getOperand(1);
2030 VPValue *OldMask = NewBlend->getOperand(2);
2031 NewBlend->setOperand(0, Inc1);
2032 NewBlend->setOperand(1, Inc0);
2033 NewBlend->setOperand(2, NewMask);
2060 APInt MaxVal = AlignedTC - 1;
2063 unsigned NewBitWidth =
2069 bool MadeChange =
false;
2078 if (!WideIV || !WideIV->isCanonical() ||
2079 WideIV->hasMoreThanOneUniqueUser() ||
2080 NewIVTy == WideIV->getScalarType())
2085 VPUser *SingleUser = WideIV->getSingleUser();
2093 auto *NewStart = Plan.
getZero(NewIVTy);
2094 WideIV->setStartValue(NewStart);
2096 WideIV->setStepValue(NewStep);
2103 Cmp->setOperand(1, NewBTC);
2117 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2119 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2133 const SCEV *VectorTripCount =
2138 "Trip count SCEV must be computable");
2159 auto *Term = &ExitingVPBB->
back();
2172 for (
unsigned Part = 0; Part < UF; ++Part) {
2178 Extracts[Part] = Ext;
2190 match(Phi->getBackedgeValue(),
2192 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2209 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2216 "Expected incoming values of Phi to be ActiveLaneMasks");
2221 EntryALM->setOperand(2, ALMMultiplier);
2222 LoopALM->setOperand(2, ALMMultiplier);
2226 ExtractFromALM(EntryALM, EntryExtracts);
2231 ExtractFromALM(LoopALM, LoopExtracts);
2233 Not->setOperand(0, LoopExtracts[0]);
2236 for (
unsigned Part = 0; Part < UF; ++Part) {
2237 Phis[Part]->setStartValue(EntryExtracts[Part]);
2238 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2251 auto *Term = &ExitingVPBB->
back();
2263 const SCEV *VectorTripCount =
2269 "Trip count SCEV must be computable");
2288 Term->setOperand(1, Plan.
getTrue());
2293 {}, Term->getDebugLoc());
2295 Term->eraseFromParent();
2330 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2340 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2341 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2359 RecurKind RK = PhiR->getRecurrenceKind();
2366 RecWithFlags->dropPoisonGeneratingFlags();
2372struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2374 return Def == getEmptyKey() || Def == getTombstoneKey();
2385 return GEP->getSourceElementType();
2388 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2389 [](
auto *
I) {
return I->getSourceElementType(); })
2390 .
Default([](
auto *) {
return nullptr; });
2394 static bool canHandle(
const VPSingleDefRecipe *Def) {
2403 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2404 C->second == Instruction::ExtractValue)))
2410 return !
Def->mayReadFromMemory();
2414 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2415 const VPlan *Plan =
Def->getParent()->getPlan();
2416 VPTypeAnalysis TypeInfo(*Plan);
2419 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2422 if (RFlags->hasPredicate())
2425 return hash_combine(Result, SIVSteps->getInductionOpcode());
2430 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2433 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2435 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2437 !
equal(
L->operands(),
R->operands()))
2440 "must have valid opcode info for both recipes");
2442 if (LFlags->hasPredicate() &&
2443 LFlags->getPredicate() !=
2447 if (LSIV->getInductionOpcode() !=
2453 const VPRegionBlock *RegionL =
L->getRegion();
2454 const VPRegionBlock *RegionR =
R->getRegion();
2457 L->getParent() !=
R->getParent())
2459 const VPlan *Plan =
L->getParent()->getPlan();
2460 VPTypeAnalysis TypeInfo(*Plan);
2461 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2477 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2481 if (!VPDT.
dominates(V->getParent(), VPBB))
2486 Def->replaceAllUsesWith(V);
2517 "Expected vector prehader's successor to be the vector loop region");
2525 return !Op->isDefinedOutsideLoopRegions();
2528 R.moveBefore(*Preheader, Preheader->
end());
2546 assert(!RepR->isPredicated() &&
2547 "Expected prior transformation of predicated replicates to "
2548 "replicate regions");
2553 if (!RepR->isSingleScalar())
2565 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2566 auto *UserR = cast<VPRecipeBase>(U);
2567 VPBasicBlock *Parent = UserR->getParent();
2569 if (SinkBB && SinkBB != Parent)
2574 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2575 Parent->getSinglePredecessor() != LoopRegion;
2585 "Defining block must dominate sink block");
2611 VPValue *ResultVPV = R.getVPSingleValue();
2613 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2614 if (!NewResSizeInBits)
2627 (void)OldResSizeInBits;
2635 VPW->dropPoisonGeneratingFlags();
2637 if (OldResSizeInBits != NewResSizeInBits &&
2641 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2643 Ext->insertAfter(&R);
2645 Ext->setOperand(0, ResultVPV);
2646 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2649 "Only ICmps should not need extending the result.");
2659 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2660 auto *
Op = R.getOperand(Idx);
2661 unsigned OpSizeInBits =
2663 if (OpSizeInBits == NewResSizeInBits)
2665 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2666 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2668 R.setOperand(Idx, ProcessedIter->second);
2676 Builder.setInsertPoint(&R);
2678 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2679 ProcessedIter->second = NewOp;
2680 R.setOperand(Idx, NewOp);
2688 std::optional<VPDominatorTree> VPDT;
2705 assert(VPBB->getNumSuccessors() == 2 &&
2706 "Two successors expected for BranchOnCond");
2707 unsigned RemovedIdx;
2718 "There must be a single edge between VPBB and its successor");
2726 VPBB->back().eraseFromParent();
2738 if (Reachable.contains(
B))
2749 for (
VPValue *Def : R.definedValues())
2750 Def->replaceAllUsesWith(&Tmp);
2751 R.eraseFromParent();
2808 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2819 auto *EntryIncrement = Builder.createOverflowingOp(
2821 DL,
"index.part.next");
2827 {EntryIncrement, TC, ALMMultiplier},
DL,
2828 "active.lane.mask.entry");
2835 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2840 Builder.setInsertPoint(OriginalTerminator);
2841 auto *InLoopIncrement = Builder.createOverflowingOp(
2843 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2845 {InLoopIncrement, TC, ALMMultiplier},
DL,
2846 "active.lane.mask.next");
2851 auto *NotMask = Builder.createNot(ALM,
DL);
2858 bool UseActiveLaneMaskForControlFlow) {
2862 assert(WideCanonicalIV &&
2863 "Must have widened canonical IV when tail folding!");
2866 if (UseActiveLaneMaskForControlFlow) {
2875 nullptr,
"active.lane.mask");
2891 template <
typename OpTy>
bool match(OpTy *V)
const {
2902template <
typename Op0_t,
typename Op1_t>
2910 case Intrinsic::masked_udiv:
2911 return Intrinsic::vp_udiv;
2912 case Intrinsic::masked_sdiv:
2913 return Intrinsic::vp_sdiv;
2914 case Intrinsic::masked_urem:
2915 return Intrinsic::vp_urem;
2916 case Intrinsic::masked_srem:
2917 return Intrinsic::vp_srem;
2919 return std::nullopt;
2936 VPValue *Addr, *Mask, *EndPtr;
2939 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2941 EVLEndPtr->insertBefore(&CurRecipe);
2942 EVLEndPtr->setOperand(1, &EVL);
2946 auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
2951 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2953 Reverse->insertBefore(&CurRecipe);
2957 if (
match(&CurRecipe,
2968 Mask = GetVPReverse(Mask);
2969 Addr = AdjustEndPtr(EndPtr);
2972 LoadR->insertBefore(&CurRecipe);
2974 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2986 NewLoad->setOperand(2, Mask);
2987 NewLoad->setOperand(3, &EVL);
2995 StoredVal, EVL, Mask);
2997 if (
match(&CurRecipe,
3001 Mask = GetVPReverse(Mask);
3002 Addr = AdjustEndPtr(EndPtr);
3003 StoredVal = GetVPReverse(ReversedVal);
3005 StoredVal, EVL, Mask);
3009 if (Rdx->isConditional() &&
3014 if (Interleave->getMask() &&
3019 if (
match(&CurRecipe,
3028 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3042 if (
match(&CurRecipe,
3053 {IntrR->getOperand(0),
3054 IntrR->getOperand(1),
3055 Mask ? Mask : Plan->
getTrue(), &EVL},
3056 IntrR->getScalarType(), {}, {},
DL);
3065 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3070 HeaderMask = R.getVPSingleValue();
3082 NewR->insertBefore(R);
3083 for (
auto [Old, New] :
3084 zip_equal(R->definedValues(), NewR->definedValues()))
3085 Old->replaceAllUsesWith(New);
3099 Merge->insertBefore(LogicalAnd);
3100 LogicalAnd->replaceAllUsesWith(
Merge);
3108 R->eraseFromParent();
3124 auto IsAllowedUser =
3125 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3126 VPWidenIntOrFpInductionRecipe,
3127 VPWidenMemIntrinsicRecipe>;
3128 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3129 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3131 return IsAllowedUser(U);
3133 "User of VF that we can't transform to EVL.");
3143 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3144 "increment of the canonical induction.");
3160 MaxEVL = Builder.createScalarZExtOrTrunc(
3164 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3165 VPValue *PrevEVL = Builder.createScalarPhi(
3179 Intrinsic::experimental_vp_splice,
3180 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3184 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3197 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3198 m_VPValue(), m_VPValue()))))
3199 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3200 Plan.getVectorLoopRegion();
3212 VPValue *EVLMask = Builder.createICmp(
3272 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3284 auto *CurrentIteration =
3286 CurrentIteration->insertBefore(*Header, Header->begin());
3287 VPBuilder Builder(Header, Header->getFirstNonPhi());
3290 VPPhi *AVLPhi = Builder.createScalarPhi(
3294 if (MaxSafeElements) {
3304 Builder.setInsertPoint(CanonicalIVIncrement);
3308 OpVPEVL = Builder.createScalarZExtOrTrunc(
3309 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3311 auto *NextIter = Builder.createAdd(
3312 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3313 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3314 CurrentIteration->addOperand(NextIter);
3318 "avl.next", {
true,
false});
3326 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3327 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3341 assert(!CurrentIteration &&
3342 "Found multiple CurrentIteration. Only one expected");
3343 CurrentIteration = PhiR;
3347 if (!CurrentIteration)
3358 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3367 CanIVInc->eraseFromParent();
3376 if (Header->empty())
3385 if (!
match(EVLPhi->getBackedgeValue(),
3398 [[maybe_unused]]
bool FoundAVLNext =
3401 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3409 [[maybe_unused]]
bool FoundIncrement =
match(
3416 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3421 LatchBr->setOperand(
3432 "expected to run before loop regions are created");
3435 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3438 return VPDT.
dominates(Preheader, Parent);
3441 for (
const SCEV *Stride : StridesMap.
values()) {
3444 const APInt *StrideConst;
3467 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3474 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3477 if (NewSCEV != ScevExpr) {
3479 ExpSCEV->replaceAllUsesWith(NewExp);
3490 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3495 while (!Worklist.
empty()) {
3498 if (!Visited.
insert(CurRec).second)
3520 RecWithFlags->isDisjoint()) {
3523 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3524 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3525 RecWithFlags->replaceAllUsesWith(New);
3526 RecWithFlags->eraseFromParent();
3529 RecWithFlags->dropPoisonGeneratingFlags();
3534 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3535 "found instruction with poison generating flags not covered by "
3536 "VPRecipeWithIRFlags");
3541 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3549 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3561 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3562 if (AddrDef && WidenRec->isConsecutive() &&
3563 IsNotHeaderMask(WidenRec->getMask()))
3564 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3566 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3567 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3568 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3578 const bool &EpilogueAllowed) {
3579 if (InterleaveGroups.empty())
3590 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3597 for (
const auto *IG : InterleaveGroups) {
3602 return !IRMemberToRecipe.contains(Member);
3606 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3610 StoredValues.
push_back(StoreR->getStoredValue());
3611 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3617 StoredValues.
push_back(StoreR->getStoredValue());
3621 bool NeedsMaskForGaps =
3622 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3623 (!StoredValues.
empty() && !IG->isFull());
3626 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3635 VPValue *Addr = Start->getAddr();
3644 assert(IG->getIndex(IRInsertPos) != 0 &&
3645 "index of insert position shouldn't be zero");
3649 IG->getIndex(IRInsertPos),
3653 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3659 if (IG->isReverse()) {
3662 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3663 ReversePtr->insertBefore(InsertPosR);
3667 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3669 VPIG->insertBefore(InsertPosR);
3672 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3675 if (!Member->getType()->isVoidTy()) {
3734 AddOp = Instruction::Add;
3735 MulOp = Instruction::Mul;
3737 AddOp =
ID.getInductionOpcode();
3738 MulOp = Instruction::FMul;
3746 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3747 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3756 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3761 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3762 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3778 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3782 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3785 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3788 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3795 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3798 WidePHI->addOperand(
Next);
3826 VPlan *Plan = R->getParent()->getPlan();
3827 VPValue *Start = R->getStartValue();
3828 VPValue *Step = R->getStepValue();
3829 VPValue *VF = R->getVFValue();
3831 assert(R->getInductionDescriptor().getKind() ==
3833 "Not a pointer induction according to InductionDescriptor!");
3836 "Recipe should have been replaced");
3842 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3846 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3849 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3851 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3852 R->replaceAllUsesWith(PtrAdd);
3857 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3859 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3862 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3870 VPValue *Step = R->getStepValue();
3871 VPValue *Index = R->getIndex();
3875 ? Builder.createScalarSExtOrTrunc(
3877 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3879 switch (R->getInductionKind()) {
3882 "Index type does not match StartValue type");
3883 return R->replaceAllUsesWith(Builder.createAdd(
3884 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3887 return R->replaceAllUsesWith(Builder.createPtrAdd(
3888 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3893 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3894 FPBinOp->
getOpcode() == Instruction::FSub) &&
3895 "Original BinOp should be defined for FP induction");
3897 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3898 return R->replaceAllUsesWith(
3899 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3912 if (!R->isReplicator())
3916 R->dissolveToCFGLoop();
3937 assert(Br->getNumOperands() == 2 &&
3938 "BranchOnTwoConds must have exactly 2 conditions");
3942 assert(Successors.size() == 3 &&
3943 "BranchOnTwoConds must have exactly 3 successors");
3948 VPValue *Cond0 = Br->getOperand(0);
3949 VPValue *Cond1 = Br->getOperand(1);
3954 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3967 Br->eraseFromParent();
3990 WidenIVR->replaceAllUsesWith(PtrAdd);
4006 VPValue *CanIV = WideCanIV->getCanonicalIV();
4008 VPValue *Step = WideCanIV->getStepValue();
4011 "Expected unroller to have materialized step for UF != 1");
4016 Step = Builder.createAdd(
4019 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4020 WideCanIV->getNoWrapFlags());
4029 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4030 Select = Builder.createSelect(Blend->getMask(
I),
4031 Blend->getIncomingValue(
I),
Select,
4032 R.getDebugLoc(),
"predphi", *Blend);
4033 Blend->replaceAllUsesWith(
Select);
4038 if (!VEPR->getOffset()) {
4040 "Expected unroller to have materialized offset for UF != 1");
4041 VEPR->materializeOffset();
4056 for (
VPValue *
Op : LastActiveL->operands()) {
4057 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4062 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4063 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4069 Builder.createSub(FirstInactiveLane, One,
4070 LastActiveL->getDebugLoc(),
"last.active.lane");
4080 assert(VPI->isMasked() &&
4081 "Unmasked MaskedCond should be simplified earlier");
4082 VPI->replaceAllUsesWith(Builder.createNaryOp(
4094 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4095 VPI->getDebugLoc());
4096 VPI->replaceAllUsesWith(
Add);
4105 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4108 ToRemove.push_back(BranchOnCountInst);
4123 ? Instruction::UIToFP
4124 : Instruction::Trunc;
4125 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4131 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4137 MulOpc = Instruction::FMul;
4138 Flags = VPI->getFastMathFlags();
4140 MulOpc = Instruction::Mul;
4145 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4147 VPI->replaceAllUsesWith(VectorStep);
4153 R->eraseFromParent();
4161 struct EarlyExitInfo {
4172 if (Pred == MiddleVPBB)
4177 VPValue *CondOfEarlyExitingVPBB;
4178 [[maybe_unused]]
bool Matched =
4179 match(EarlyExitingVPBB->getTerminator(),
4181 assert(Matched &&
"Terminator must be BranchOnCond");
4185 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4186 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4188 TrueSucc == ExitBlock
4189 ? CondOfEarlyExitingVPBB
4190 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4196 "exit condition must dominate the latch");
4205 assert(!Exits.
empty() &&
"must have at least one early exit");
4212 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4214 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4215 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4221 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4222 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4224 Exits[
I].EarlyExitingVPBB) &&
4225 "RPO sort must place dominating exits before dominated ones");
4231 VPValue *Combined = Exits[0].CondToExit;
4232 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4233 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4239 "Early exit store masking not implemented");
4243 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4247 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4255 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4289 for (
auto [Exit, VectorEarlyExitVPBB] :
4290 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4291 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4303 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4304 VPValue *NewIncoming = IncomingVal;
4306 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4311 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4312 ExitIRI->addOperand(NewIncoming);
4315 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4349 bool IsLastDispatch = (
I + 2 == Exits.
size());
4351 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4357 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4360 CurrentBB = FalseBB;
4367 "Unexpected terminator");
4368 auto *IsLatchExitTaken =
4370 LatchExitingBranch->getOperand(1));
4372 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4373 LatchExitingBranch->eraseFromParent();
4374 Builder.setInsertPoint(LatchVPBB);
4376 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4378 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4388 Type *RedTy = Ctx.Types.inferScalarType(Red);
4389 VPValue *VecOp = Red->getVecOp();
4391 assert(!Red->isPartialReduction() &&
4392 "This path does not support partial reductions");
4395 auto IsExtendedRedValidAndClampRange =
4408 "getExtendedReductionCost only supports integer types");
4409 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4410 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4411 Red->getFastMathFlags(),
CostKind);
4412 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4420 IsExtendedRedValidAndClampRange(
4423 Ctx.Types.inferScalarType(
A)))
4442 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4443 Opcode != Instruction::FAdd)
4446 assert(!Red->isPartialReduction() &&
4447 "This path does not support partial reductions");
4448 Type *RedTy = Ctx.Types.inferScalarType(Red);
4451 auto IsMulAccValidAndClampRange =
4458 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4464 (Ext0->getOpcode() != Ext1->getOpcode() ||
4465 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4469 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4471 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4478 ExtCost += Ext0->computeCost(VF, Ctx);
4480 ExtCost += Ext1->computeCost(VF, Ctx);
4482 ExtCost += OuterExt->computeCost(VF, Ctx);
4484 return MulAccCost.
isValid() &&
4485 MulAccCost < ExtCost + MulCost + RedCost;
4490 VPValue *VecOp = Red->getVecOp();
4528 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4529 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4530 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4531 Mul->setOperand(1, ExtB);
4541 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4546 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4553 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4570 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4579 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4580 Ext0->getOpcode() == Ext1->getOpcode() &&
4581 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4583 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
4584 *Ext0, *Ext0, Ext0->getDebugLoc());
4585 NewExt0->insertBefore(Ext0);
4590 Ext->getScalarType(),
nullptr, *Ext1,
4591 *Ext1, Ext1->getDebugLoc());
4594 Mul->setOperand(0, NewExt0);
4595 Mul->setOperand(1, NewExt1);
4596 Red->setOperand(1,
Mul);
4610 assert(!Red->isPartialReduction() &&
4611 "This path does not support partial reductions");
4614 auto IP = std::next(Red->getIterator());
4615 auto *VPBB = Red->getParent();
4625 Red->replaceAllUsesWith(AbstractR);
4655 for (
VPValue *VPV : VPValues) {
4664 if (
User->usesScalars(VPV))
4667 HoistPoint = HoistBlock->
begin();
4671 "All users must be in the vector preheader or dominated by it");
4676 VPV->replaceUsesWithIf(Broadcast,
4677 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4678 return Broadcast != &U && !U.usesScalars(VPV);
4689 return CommonMetadata;
4692template <
unsigned Opcode>
4697 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4698 "Only Load and Store opcodes supported");
4699 constexpr bool IsLoad = (Opcode == Instruction::Load);
4705 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4710 for (
auto Recipes :
Groups) {
4711 if (Recipes.size() < 2)
4719 VPValue *MaskI = RecipeI->getMask();
4720 Type *TypeI = GetLoadStoreValueType(RecipeI);
4726 bool HasComplementaryMask =
false;
4731 VPValue *MaskJ = RecipeJ->getMask();
4732 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4733 if (TypeI == TypeJ) {
4743 if (HasComplementaryMask) {
4744 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4754template <
typename InstType>
4772 for (
auto &Group :
Groups) {
4792 return R->isSingleScalar() == IsSingleScalar;
4794 "all members in group must agree on IsSingleScalar");
4799 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4800 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4802 UnpredicatedLoad->insertBefore(EarliestLoad);
4806 Load->replaceAllUsesWith(UnpredicatedLoad);
4807 Load->eraseFromParent();
4817 if (!StoreLoc || !StoreLoc->AATags.Scope)
4823 StoresToSink.
end());
4827 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4841 for (
auto &Group :
Groups) {
4854 VPValue *SelectedValue = Group[0]->getOperand(0);
4857 bool IsSingleScalar = Group[0]->isSingleScalar();
4858 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4859 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4860 "all members in group must agree on IsSingleScalar");
4861 VPValue *Mask = Group[
I]->getMask();
4863 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4872 StoreWithMinAlign->getUnderlyingInstr(),
4873 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4874 nullptr, *LastStore, CommonMetadata);
4875 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4879 Store->eraseFromParent();
4886 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4887 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4951 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4953 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4960 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4970 DefR->replaceUsesWithIf(
4971 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4973 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4987 for (
VPValue *Def : R.definedValues()) {
5000 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5002 return U->usesScalars(Def) &&
5005 if (
none_of(Def->users(), IsCandidateUnpackUser))
5012 Unpack->insertAfter(&R);
5013 Def->replaceUsesWithIf(Unpack,
5014 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5015 return IsCandidateUnpackUser(&U);
5024 bool RequiresScalarEpilogue,
VPValue *Step,
5025 std::optional<uint64_t> MaxRuntimeStep) {
5036 assert(StepR->getParent() == VectorPHVPBB &&
5037 "Step must be defined in VectorPHVPBB");
5039 InsertPt = std::next(StepR->getIterator());
5041 VPBuilder Builder(VectorPHVPBB, InsertPt);
5047 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5059 if (TailByMasking) {
5060 TC = Builder.createAdd(
5071 Builder.createNaryOp(Instruction::URem, {TC, Step},
5080 if (RequiresScalarEpilogue) {
5082 "requiring scalar epilogue is not supported with fail folding");
5085 R = Builder.createSelect(IsZero, Step, R);
5099 "VF and VFxUF must be materialized together");
5111 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5118 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5122 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5126 VPValue *MulByUF = Builder.createOverflowingOp(
5138 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5146 const SCEV *Expr = ExpSCEV->getSCEV();
5149 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5154 ExpSCEV->eraseFromParent();
5157 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5158 "before any VPIRInstructions");
5161 auto EI = Entry->begin();
5171 return ExpandedSCEVs;
5183 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5187 return Member0Op == OpV;
5191 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5194 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5211 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5214 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5219 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5220 const auto &[
OpIdx, OpV] =
P;
5235 if (!InterleaveR || InterleaveR->
getMask())
5236 return std::nullopt;
5238 Type *GroupElementTy =
nullptr;
5242 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5243 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5245 return std::nullopt;
5250 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5251 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5253 return std::nullopt;
5257 if (IG->getFactor() != IG->getNumMembers())
5258 return std::nullopt;
5264 assert(
Size.isScalable() == VF.isScalable() &&
5265 "if Size is scalable, VF must be scalable and vice versa");
5266 return Size.getKnownMinValue();
5270 unsigned MinVal = VF.getKnownMinValue();
5272 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5275 return std::nullopt;
5283 return RepR && RepR->isSingleScalar();
5290 auto *R = V->getDefiningRecipe();
5299 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5300 WideMember0->setOperand(
5309 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5311 LoadGroup->getMask(),
true,
5312 {}, LoadGroup->getDebugLoc());
5313 L->insertBefore(LoadGroup);
5319 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5320 "must be a single scalar load");
5321 NarrowedOps.
insert(RepR);
5326 VPValue *PtrOp = WideLoad->getAddr();
5328 PtrOp = VecPtr->getOperand(0);
5333 nullptr, {}, *WideLoad);
5334 N->insertBefore(WideLoad);
5339std::unique_ptr<VPlan>
5359 "unexpected branch-on-count");
5363 std::optional<ElementCount> VFToOptimize;
5377 if (R.mayWriteToMemory() && !InterleaveR)
5383 return any_of(V->users(), [&](VPUser *U) {
5384 auto *UR = cast<VPRecipeBase>(U);
5385 return UR->getParent()->getParent() != VectorLoop;
5402 std::optional<ElementCount> NarrowedVF =
5404 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5406 VFToOptimize = NarrowedVF;
5409 if (InterleaveR->getStoredValues().empty())
5414 auto *Member0 = InterleaveR->getStoredValues()[0];
5424 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5427 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5428 return IR && IR->getInterleaveGroup()->isFull() &&
5429 IR->getVPValue(Op.index()) == Op.value();
5438 VFToOptimize->isScalable()))
5443 if (StoreGroups.empty())
5447 bool RequiresScalarEpilogue =
5458 std::unique_ptr<VPlan> NewPlan;
5460 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5461 Plan.
setVF(*VFToOptimize);
5462 NewPlan->removeVF(*VFToOptimize);
5468 for (
auto *StoreGroup : StoreGroups) {
5475 StoreGroup->getDebugLoc());
5476 S->insertBefore(StoreGroup);
5477 StoreGroup->eraseFromParent();
5483 Type *CanIVTy = VectorLoop->getCanonicalIVType();
5489 if (VFToOptimize->isScalable()) {
5492 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
5500 materializeVectorTripCount(Plan, VectorPH,
false,
5501 RequiresScalarEpilogue, Step);
5506 removeDeadRecipes(Plan);
5509 "All VPVectorPointerRecipes should have been removed");
5525 "must have a BranchOnCond");
5528 if (VF.
isScalable() && VScaleForTuning.has_value())
5529 VectorStep *= *VScaleForTuning;
5530 assert(VectorStep > 0 &&
"trip count should not be zero");
5534 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5554 "Cannot handle loops with uncountable early exits");
5561 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5568 if (
any_of(RecurSplice->users(),
5569 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
5650 {},
"vector.recur.extract.for.phi");
5653 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
5667 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5668 VPValue *InvariantCandidate = BinOp->getOperand(1);
5670 std::swap(WidenIVCandidate, InvariantCandidate);
5684 auto *ClonedOp = BinOp->
clone();
5685 if (ClonedOp->getOperand(0) == WidenIV) {
5686 ClonedOp->setOperand(0, ScalarIV);
5688 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5689 ClonedOp->setOperand(1, ScalarIV);
5704 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5705 bool UseMax) -> std::optional<APSInt> {
5707 for (
bool Signed : {
true,
false}) {
5716 return std::nullopt;
5724 PhiR->getRecurrenceKind()))
5733 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5747 !
match(FindLastSelect,
5756 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5762 "IVOfExpressionToSink not being an AddRec must imply "
5763 "FindLastExpression not being an AddRec.");
5774 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5775 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5782 if (IVOfExpressionToSink) {
5783 const SCEV *FindLastExpressionSCEV =
5785 if (
match(FindLastExpressionSCEV,
5788 if (
auto NewSentinel =
5789 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5792 SentinelVal = *NewSentinel;
5793 UseSigned = NewSentinel->isSigned();
5795 IVSCEV = FindLastExpressionSCEV;
5796 IVOfExpressionToSink =
nullptr;
5806 if (AR->hasNoSignedWrap())
5808 else if (AR->hasNoUnsignedWrap())
5818 VPValue *NewFindLastSelect = BackedgeVal;
5820 if (!SentinelVal || IVOfExpressionToSink) {
5823 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5824 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5825 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5826 SelectCond = LoopBuilder.
createNot(SelectCond);
5833 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5836 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5845 VPIRFlags Flags(MinMaxKind,
false,
false,
5851 NewFindLastSelect, Flags, ExitDL);
5854 VPValue *VectorRegionExitingVal = ReducedIV;
5855 if (IVOfExpressionToSink)
5856 VectorRegionExitingVal =
5858 ReducedIV, IVOfExpressionToSink);
5861 VPValue *StartVPV = PhiR->getStartValue();
5868 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5878 AnyOfPhi->insertAfter(PhiR);
5885 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
5898 PhiR->hasUsesOutsideReductionChain());
5899 NewPhiR->insertBefore(PhiR);
5900 PhiR->replaceAllUsesWith(NewPhiR);
5901 PhiR->eraseFromParent();
5908struct ReductionExtend {
5909 Type *SrcType =
nullptr;
5910 ExtendKind Kind = ExtendKind::PR_None;
5916struct ExtendedReductionOperand {
5920 ReductionExtend ExtendA, ExtendB;
5928struct VPPartialReductionChain {
5931 VPWidenRecipe *ReductionBinOp =
nullptr;
5933 ExtendedReductionOperand ExtendedOp;
5940 unsigned AccumulatorOpIdx;
5941 unsigned ScaleFactor;
5954 if (!
Op->hasOneUse() ||
5960 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5961 Op->getOperand(1), NarrowTy);
5963 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5972 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
5974 assert(Ext->getOpcode() ==
5976 "Expected both the LHS and RHS extends to be the same");
5977 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
5980 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
5981 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
5982 auto *
Max = Builder.insert(
5984 {FreezeX, FreezeY}, SrcTy));
5985 auto *Min = Builder.insert(
5987 {FreezeX, FreezeY}, SrcTy));
5990 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6003 if (!
Mul->hasOneUse() ||
6004 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6005 MulLHS->getOpcode() != MulRHS->getOpcode())
6008 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
6009 MulLHS->getOperand(0),
6010 Ext->getScalarType()));
6011 Mul->setOperand(1, MulLHS == MulRHS
6012 ?
Mul->getOperand(0)
6013 : Builder.createWidenCast(MulRHS->getOpcode(),
6014 MulRHS->getOperand(0),
6015 Ext->getScalarType()));
6024 VPValue *VecOp = Red->getVecOp();
6058static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6066 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6082 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6090 Builder.insert(NegRecipe);
6091 ExtendedOp = NegRecipe;
6095 "FSub chain reduction isn't supported");
6098 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp, TypeInfo);
6108 assert((!ExitValue || IsLastInChain) &&
6109 "if we found ExitValue, it must match RdxPhi's backedge value");
6120 PartialRed->insertBefore(WidenRecipe);
6128 E->insertBefore(WidenRecipe);
6129 PartialRed->replaceAllUsesWith(
E);
6142 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6143 StartInst->setOperand(2, NewScaleFactor);
6151 VPValue *OldStartValue = StartInst->getOperand(0);
6152 StartInst->setOperand(0, StartInst->getOperand(1));
6156 assert(RdxResult &&
"Could not find reduction result");
6159 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6160 : Instruction::BinaryOps::Sub;
6166 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6172 const VPPartialReductionChain &Link,
6175 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6176 std::optional<unsigned> BinOpc = std::nullopt;
6178 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6179 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6181 std::optional<llvm::FastMathFlags>
Flags;
6185 auto GetLinkOpcode = [&Link]() ->
unsigned {
6188 return Instruction::Add;
6190 return Instruction::FAdd;
6192 return Link.ReductionBinOp->
getOpcode();
6197 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6198 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6219static std::optional<ExtendedReductionOperand>
6223 "Op should be operand of UpdateR");
6231 if (
Op->hasOneUse() &&
6241 if (LHSInputType != RHSInputType ||
6242 LHSExt->getOpcode() != RHSExt->getOpcode())
6243 return std::nullopt;
6246 return ExtendedReductionOperand{
6248 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6252 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6255 VPValue *CastSource = CastRecipe->getOperand(0);
6256 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6266 if (UpdateR->
getOpcode() == Instruction::Sub)
6267 return std::nullopt;
6268 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6269 UpdateR->
getOpcode() == Instruction::FAdd) {
6273 return ExtendedReductionOperand{
6280 if (!
Op->hasOneUse())
6281 return std::nullopt;
6286 return std::nullopt;
6296 return std::nullopt;
6300 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6303 const APInt *RHSConst =
nullptr;
6309 return std::nullopt;
6313 if (Cast && OuterExtKind &&
6314 getPartialReductionExtendKind(Cast) != OuterExtKind)
6315 return std::nullopt;
6317 Type *RHSInputType = LHSInputType;
6318 ExtendKind RHSExtendKind = LHSExtendKind;
6321 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6324 return ExtendedReductionOperand{
6325 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6332static std::optional<SmallVector<VPPartialReductionChain>>
6340 return std::nullopt;
6351 VPValue *CurrentValue = ExitValue;
6352 while (CurrentValue != RedPhiR) {
6355 return std::nullopt;
6362 std::optional<ExtendedReductionOperand> ExtendedOp =
6363 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6365 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6367 return std::nullopt;
6371 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6374 return std::nullopt;
6379 VPPartialReductionChain Link(
6380 {UpdateR, *ExtendedOp, RK,
6384 CurrentValue = PrevValue;
6389 std::reverse(Chain.
begin(), Chain.
end());
6408 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6409 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6412 if (ChainsByPhi.
empty())
6419 for (
const auto &[
_, Chains] : ChainsByPhi)
6420 for (
const VPPartialReductionChain &Chain : Chains) {
6421 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6422 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6428 auto ExtendUsersValid = [&](
VPValue *Ext) {
6430 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6434 auto IsProfitablePartialReductionChainForVF =
6441 for (
const VPPartialReductionChain &Link : Chain) {
6442 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6443 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6447 PartialCost += LinkCost;
6448 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6450 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6451 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6454 RegularCost += Extend->computeCost(VF, CostCtx);
6456 return PartialCost.
isValid() && PartialCost < RegularCost;
6464 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6465 for (
const VPPartialReductionChain &Chain : Chains) {
6466 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6470 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6472 return PhiR == RedPhiR;
6474 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6480 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6489 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6490 return RepR && RepR->getOpcode() == Instruction::Store;
6501 return IsProfitablePartialReductionChainForVF(Chains, VF);
6507 for (
auto &[Phi, Chains] : ChainsByPhi)
6508 for (
const VPPartialReductionChain &Chain : Chains)
6509 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
6523 if (VPI && VPI->getUnderlyingValue() &&
6535 New->insertBefore(VPI);
6536 if (VPI->getOpcode() == Instruction::Load)
6537 VPI->replaceAllUsesWith(New->getVPSingleValue());
6538 VPI->eraseFromParent();
6543 FinalRedStoresBuilder))
6552 ReplaceWith(Histogram);
6560 ReplaceWith(Recipe);
6583 if (VPI->mayHaveSideEffects())
6587 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
6592 if (VPI->getOpcode() == Instruction::Add &&
6601 I, VPI->operandsWithoutMask(),
true,
6602 nullptr, *VPI, *VPI, VPI->getDebugLoc());
6603 Recipe->insertBefore(VPI);
6604 VPI->replaceAllUsesWith(Recipe);
6605 VPI->eraseFromParent();
6616 switch (Param.ParamKind) {
6617 case VFParamKind::Vector:
6618 case VFParamKind::GlobalPredicate:
6620 case VFParamKind::OMP_Uniform:
6621 return SE->isSCEVable(Types.inferScalarType(Args[Param.ParamPos])) &&
6622 SE->isLoopInvariant(
6623 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6625 case VFParamKind::OMP_Linear:
6626 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6627 m_scev_AffineAddRec(
6628 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
6629 m_SpecificLoop(L)));
6646 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
6647 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
6650 if (It == Mappings.end())
6657struct CallWideningDecision {
6659 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
6682 return CallWideningDecision::KindTy::Scalarize;
6692 return CallWideningDecision::KindTy::Scalarize;
6696 false, VF, CostCtx);
6699 CostCtx.
L, CostCtx.
Types);
6711 return CallWideningDecision::KindTy::Intrinsic;
6715 if (VecFunc && ScalarCost >= VecCallCost)
6716 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
6718 return CallWideningDecision::KindTy::Scalarize;
6729 if (!VPI || !VPI->getUnderlyingValue() ||
6730 VPI->getOpcode() != Instruction::Call)
6735 VPI->op_begin() + CI->arg_size());
6737 CallWideningDecision Decision =
6746 switch (Decision.Kind) {
6747 case CallWideningDecision::KindTy::Intrinsic: {
6751 *VPI, VPI->getDebugLoc());
6754 case CallWideningDecision::KindTy::VectorVariant: {
6758 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
6759 Ops.push_back(Mask);
6761 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
6763 *VPI, VPI->getDebugLoc());
6766 case CallWideningDecision::KindTy::Scalarize:
6778 return !Legacy || *Legacy == Decision.Kind;
6780 "VPlan call widening decision must match legacy decision");
6783 VPI->replaceAllUsesWith(Replacement);
6788 VPI->eraseFromParent();
6810 if (!LoadR || LoadR->isConsecutive())
6828 Align Alignment = LoadR->getAlign();
6831 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
6836 Intrinsic::experimental_vp_strided_load, DataTy,
6837 LoadR->isMasked(), Alignment, Ctx);
6838 return StridedLoadStoreCost < CurrentCost;
6849 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
6854 I32VF = Builder.createScalarZExtOrTrunc(
6866 auto *
Offset = Builder.createOverflowingOp(
6868 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
6869 auto *BasePtr = Builder.createNoWrapPtrAdd(
6875 VPValue *NewPtr = Builder.createVectorPointer(
6877 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
6879 VPValue *Mask = LoadR->getMask();
6882 auto *StridedLoad = Builder.createWidenMemIntrinsic(
6883 Intrinsic::experimental_vp_strided_load,
6884 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
6885 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
const T & front() const
Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
CallWideningKind
Choice for how to widen a call at a given VF.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
std::optional< CallWideningKind > getLegacyCallKind(CallInst *CI, ElementCount VF) const
Returns the legacy call widening decision for CI at VF, or std::nullopt if none was recorded.
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...