57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.getDebugLoc());
102 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
107 if (VectorID == Intrinsic::assume ||
108 VectorID == Intrinsic::lifetime_end ||
109 VectorID == Intrinsic::lifetime_start ||
110 VectorID == Intrinsic::sideeffect ||
111 VectorID == Intrinsic::pseudoprobe) {
116 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
117 VectorID != Intrinsic::pseudoprobe;
121 Ingredient.getDebugLoc());
124 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
125 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
129 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
133 *VPI, Ingredient.getDebugLoc());
137 "inductions must be created earlier");
146 "Only recpies with zero or one defined values expected");
147 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
183 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
204 L(L), TypeInfo(TypeInfo) {}
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
255template <
unsigned Opcode>
260 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
261 "Only Load and Store opcodes supported");
262 constexpr bool IsLoad = (Opcode == Instruction::Load);
269 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
273 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
276 RecipesByAddress[AddrSCEV].push_back(RepR);
281 for (
auto &Group :
Groups) {
296 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
308 if (Candidate->getParent() == SinkTo ||
313 if (!ScalarVFOnly && RepR->isSingleScalar())
316 WorkList.
insert({SinkTo, Candidate});
328 for (
auto &Recipe : *VPBB)
330 InsertIfValidSinkCandidate(VPBB,
Op);
334 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
337 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
342 auto UsersOutsideSinkTo =
344 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
346 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
347 return !U->usesFirstLaneOnly(SinkCandidate);
350 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
352 if (NeedsDuplicating) {
356 if (
auto *SinkCandidateRepR =
362 nullptr , *SinkCandidateRepR,
366 Clone = SinkCandidate->
clone();
376 InsertIfValidSinkCandidate(SinkTo,
Op);
386 if (!EntryBB || EntryBB->size() != 1 ||
396 if (EntryBB->getNumSuccessors() != 2)
401 if (!Succ0 || !Succ1)
404 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
406 if (Succ0->getSingleSuccessor() == Succ1)
408 if (Succ1->getSingleSuccessor() == Succ0)
425 if (!Region1->isReplicator())
427 auto *MiddleBasicBlock =
429 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
434 if (!Region2 || !Region2->isReplicator())
439 if (!Mask1 || Mask1 != Mask2)
442 assert(Mask1 && Mask2 &&
"both region must have conditions");
448 if (TransformedRegions.
contains(Region1))
455 if (!Then1 || !Then2)
475 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
481 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
482 Phi1ToMove.eraseFromParent();
485 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
499 TransformedRegions.
insert(Region1);
502 return !TransformedRegions.
empty();
510 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
511 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
512 auto *BlockInMask = PredRecipe->
getMask();
533 Region->setParent(ParentRegion);
539 RecipeWithoutMask->getDebugLoc());
540 Exiting->appendRecipe(PHIRecipe);
553 if (RepR->isPredicated())
572 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
584 if (!VPBB->getParent())
588 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
597 R.moveBefore(*PredVPBB, PredVPBB->
end());
599 auto *ParentRegion = VPBB->getParent();
600 if (ParentRegion && ParentRegion->getExiting() == VPBB)
601 ParentRegion->setExiting(PredVPBB);
605 return !WorkList.
empty();
612 bool ShouldSimplify =
true;
613 while (ShouldSimplify) {
629 if (!
IV ||
IV->getTruncInst())
644 for (
auto *U : FindMyCast->
users()) {
646 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
647 FoundUserCast = UserCast;
654 FindMyCast = FoundUserCast;
656 if (FindMyCast !=
IV)
671 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
681 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
687 if (ResultTy != StepTy) {
694 Builder.setInsertPoint(VecPreheader);
695 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
697 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
723 WideCanIV->getDebugLoc(), Builder));
724 WideCanIV->eraseFromParent();
736 if (!WidenIV || !WidenIV->isCanonical())
740 WidenIV->dropPoisonGeneratingFlags();
741 WideCanIV->replaceAllUsesWith(WidenIV);
742 WideCanIV->eraseFromParent();
751 if (PHICost > BroadcastCost)
760 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
774 WideCanIV->getDebugLoc());
775 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
776 WideCanIV->replaceAllUsesWith(NewWideIV);
777 WideCanIV->eraseFromParent();
785 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
787 if (IsConditionalAssume)
790 if (R.mayHaveSideEffects())
794 return all_of(R.definedValues(),
795 [](
VPValue *V) { return V->getNumUsers() == 0; });
815 VPUser *PhiUser = PhiR->getSingleUser();
821 PhiR->replaceAllUsesWith(Start);
822 PhiR->eraseFromParent();
830 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
833 Users.insert_range(V->users());
835 return Users.takeVector();
849 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
886 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
887 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
900 Def->operands(),
true,
902 Clone->insertAfter(Def);
903 Def->replaceAllUsesWith(Clone);
914 PtrIV->replaceAllUsesWith(PtrAdd);
921 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
922 return U->usesScalars(WideIV);
928 Plan,
ID.getKind(),
ID.getInductionOpcode(),
930 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
931 WideIV->getDebugLoc(), Builder);
934 if (!HasOnlyVectorVFs) {
936 "plans containing a scalar VF cannot also include scalable VFs");
937 WideIV->replaceAllUsesWith(Steps);
940 WideIV->replaceUsesWithIf(Steps,
941 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
943 return U.usesFirstLaneOnly(WideIV);
944 return U.usesScalars(WideIV);
960 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
965 if (!Def || Def->getNumOperands() != 2)
973 auto IsWideIVInc = [&]() {
974 auto &
ID = WideIV->getInductionDescriptor();
977 VPValue *IVStep = WideIV->getStepValue();
978 switch (
ID.getInductionOpcode()) {
979 case Instruction::Add:
981 case Instruction::FAdd:
983 case Instruction::FSub:
986 case Instruction::Sub: {
1006 return IsWideIVInc() ? WideIV :
nullptr;
1025 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1036 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1038 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1039 FirstActiveLaneType,
DL);
1040 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1045 if (Incoming != WideIV) {
1047 EndValue =
B.createAdd(EndValue, One,
DL);
1050 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1052 VPIRValue *Start = WideIV->getStartValue();
1053 VPValue *Step = WideIV->getStepValue();
1054 EndValue =
B.createDerivedIV(
1056 Start, EndValue, Step);
1071 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1078 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1081 Start, VectorTC, Step);
1110 assert(EndValue &&
"Must have computed the end value up front");
1115 if (Incoming != WideIV)
1127 auto *Zero = Plan.
getZero(StepTy);
1128 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1133 return B.createNaryOp(
1134 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1136 : Instruction::FAdd,
1137 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1149 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1158 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1159 EndValues[WideIV] = EndValue;
1169 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1170 R.eraseFromParent();
1179 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1181 if (PredVPBB == MiddleVPBB)
1183 Plan, TypeInfo, ExitIRI->getOperand(Idx), EndValues, PSE);
1186 Plan, TypeInfo, ExitIRI->getOperand(Idx), PSE);
1188 ExitIRI->setOperand(Idx, Escape);
1205 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1208 ExpR->replaceAllUsesWith(V->second);
1209 ExpR->eraseFromParent();
1218 while (!WorkList.
empty()) {
1220 if (!Seen.
insert(Cur).second)
1228 R->eraseFromParent();
1235static std::optional<std::pair<bool, unsigned>>
1238 std::optional<std::pair<bool, unsigned>>>(R)
1241 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1243 return std::make_pair(
true,
I->getVectorIntrinsicID());
1245 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1251 I->getVPRecipeID());
1253 .
Default([](
auto *) {
return std::nullopt; });
1271 Value *V =
Op->getUnderlyingValue();
1277 auto FoldToIRValue = [&]() ->
Value * {
1279 if (OpcodeOrIID->first) {
1280 if (R.getNumOperands() != 2)
1282 unsigned ID = OpcodeOrIID->second;
1283 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1286 unsigned Opcode = OpcodeOrIID->second;
1295 return Folder.FoldSelect(
Ops[0],
Ops[1],
1298 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1300 case Instruction::Select:
1301 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1302 case Instruction::ICmp:
1303 case Instruction::FCmp:
1306 case Instruction::GetElementPtr: {
1309 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1319 case Instruction::ExtractElement:
1326 if (
Value *V = FoldToIRValue())
1327 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1333 VPlan *Plan = Def->getParent()->getPlan();
1339 return Def->replaceAllUsesWith(V);
1345 PredPHI->replaceAllUsesWith(
Op);
1358 bool CanCreateNewRecipe =
1365 if (TruncTy == ATy) {
1366 Def->replaceAllUsesWith(
A);
1375 : Instruction::ZExt;
1378 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1380 Ext->setUnderlyingValue(UnderlyingExt);
1382 Def->replaceAllUsesWith(Ext);
1384 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1385 Def->replaceAllUsesWith(Trunc);
1393 for (
VPUser *U :
A->users()) {
1395 for (
VPValue *VPV : R->definedValues())
1409 Def->replaceAllUsesWith(
X);
1410 Def->eraseFromParent();
1416 return Def->replaceAllUsesWith(
1421 return Def->replaceAllUsesWith(
X);
1425 return Def->replaceAllUsesWith(
1430 return Def->replaceAllUsesWith(
1435 return Def->replaceAllUsesWith(
X);
1439 return Def->replaceAllUsesWith(Plan->
getFalse());
1443 return Def->replaceAllUsesWith(
X);
1446 if (CanCreateNewRecipe &&
1451 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1452 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1453 return Def->replaceAllUsesWith(
1454 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1459 return Def->replaceAllUsesWith(Def->getOperand(1));
1464 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1468 return Def->replaceAllUsesWith(Plan->
getFalse());
1471 return Def->replaceAllUsesWith(
X);
1475 if (CanCreateNewRecipe &&
1477 return Def->replaceAllUsesWith(Builder.createNot(
C));
1481 Def->setOperand(0,
C);
1482 Def->setOperand(1,
Y);
1483 Def->setOperand(2,
X);
1488 return Def->replaceAllUsesWith(
A);
1491 return Def->replaceAllUsesWith(
A);
1494 return Def->replaceAllUsesWith(
1501 return Def->replaceAllUsesWith(
1503 Def->getDebugLoc(),
"", NW));
1506 if (CanCreateNewRecipe &&
1514 ->hasNoSignedWrap()};
1515 return Def->replaceAllUsesWith(
1516 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1522 return Def->replaceAllUsesWith(Builder.createNaryOp(
1524 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1529 return Def->replaceAllUsesWith(Builder.createNaryOp(
1531 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1536 return Def->replaceAllUsesWith(
A);
1551 R->setOperand(1,
Y);
1552 R->setOperand(2,
X);
1556 R->replaceAllUsesWith(Cmp);
1561 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1562 Cmp->setDebugLoc(Def->getDebugLoc());
1574 if (
Op->getNumUsers() > 1 ||
1578 }
else if (!UnpairedCmp) {
1579 UnpairedCmp =
Op->getDefiningRecipe();
1583 UnpairedCmp =
nullptr;
1590 if (NewOps.
size() < Def->getNumOperands()) {
1592 return Def->replaceAllUsesWith(NewAnyOf);
1599 if (CanCreateNewRecipe &&
1605 return Def->replaceAllUsesWith(NewCmp);
1613 return Def->replaceAllUsesWith(Def->getOperand(1));
1619 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1620 Def->replaceAllUsesWith(
X);
1630 Def->setOperand(1, Def->getOperand(0));
1631 Def->setOperand(0,
Y);
1638 return Def->replaceAllUsesWith(Def->getOperand(0));
1644 Def->replaceAllUsesWith(
1645 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1649 return Def->replaceAllUsesWith(
A);
1655 Def->replaceAllUsesWith(
1656 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1663 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1668 Def->replaceAllUsesWith(
1678 "broadcast operand must be single-scalar");
1679 Def->setOperand(0,
C);
1684 return Def->replaceUsesWithIf(
1685 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1688 if (Def->getNumOperands() == 1) {
1689 Def->replaceAllUsesWith(Def->getOperand(0));
1694 Phi->replaceAllUsesWith(Phi->getOperand(0));
1700 if (Def->getNumOperands() == 1 &&
1702 return Def->replaceAllUsesWith(IRV);
1715 return Def->replaceAllUsesWith(
A);
1718 Def->replaceAllUsesWith(Builder.createNaryOp(
1719 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1733 auto *IVInc = Def->getOperand(0);
1734 if (IVInc->getNumUsers() == 2) {
1739 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1740 Def->replaceAllUsesWith(IVInc);
1742 Inc->replaceAllUsesWith(Phi);
1743 Phi->setOperand(0,
Y);
1759 Steps->replaceAllUsesWith(Steps->getOperand(0));
1767 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1769 return PhiR && PhiR->isInLoop();
1775 Def->replaceAllUsesWith(
A);
1781 return U->usesScalars(A) || Def == U;
1783 return Def->replaceAllUsesWith(
A);
1787 return Def->replaceAllUsesWith(
A);
1814 while (!Worklist.
empty()) {
1823 R->replaceAllUsesWith(
1824 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1828static std::optional<Instruction::BinaryOps>
1831 case Intrinsic::masked_udiv:
1832 return Instruction::UDiv;
1833 case Intrinsic::masked_sdiv:
1834 return Instruction::SDiv;
1835 case Intrinsic::masked_urem:
1836 return Instruction::URem;
1837 case Intrinsic::masked_srem:
1838 return Instruction::SRem;
1855 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1859 if (RepR && RepR->getOpcode() == Instruction::Store &&
1862 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1863 true ,
nullptr , *RepR ,
1864 *RepR , RepR->getDebugLoc());
1865 Clone->insertBefore(RepOrWidenR);
1867 VPValue *ExtractOp = Clone->getOperand(0);
1873 Clone->setOperand(0, ExtractOp);
1874 RepR->eraseFromParent();
1886 VPValue *SafeDivisor = Builder.createSelect(
1887 IntrR->getOperand(2), IntrR->getOperand(1),
1889 VPValue *Clone = Builder.createNaryOp(
1890 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1893 IntrR->eraseFromParent();
1902 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1911 return !U->usesScalars(
Op);
1915 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1918 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1919 IntroducesBCastOf(Op)))
1923 auto *IRV = dyn_cast<VPIRValue>(Op);
1924 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1925 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1926 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1931 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1932 true ,
nullptr, *RepOrWidenR);
1933 Clone->insertBefore(RepOrWidenR);
1934 RepOrWidenR->replaceAllUsesWith(Clone);
1936 RepOrWidenR->eraseFromParent();
1972 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1973 UniqueValues.
insert(Blend->getIncomingValue(0));
1974 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1976 UniqueValues.
insert(Blend->getIncomingValue(
I));
1978 if (UniqueValues.
size() == 1) {
1979 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1980 Blend->eraseFromParent();
1984 if (Blend->isNormalized())
1990 unsigned StartIndex = 0;
1991 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1996 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
2003 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2005 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2006 if (
I == StartIndex)
2008 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2009 OperandsWithMask.
push_back(Blend->getMask(
I));
2014 OperandsWithMask, *Blend, Blend->getDebugLoc());
2015 NewBlend->insertBefore(&R);
2017 VPValue *DeadMask = Blend->getMask(StartIndex);
2019 Blend->eraseFromParent();
2024 if (NewBlend->getNumOperands() == 3 &&
2026 VPValue *Inc0 = NewBlend->getOperand(0);
2027 VPValue *Inc1 = NewBlend->getOperand(1);
2028 VPValue *OldMask = NewBlend->getOperand(2);
2029 NewBlend->setOperand(0, Inc1);
2030 NewBlend->setOperand(1, Inc0);
2031 NewBlend->setOperand(2, NewMask);
2058 APInt MaxVal = AlignedTC - 1;
2061 unsigned NewBitWidth =
2067 bool MadeChange =
false;
2076 if (!WideIV || !WideIV->isCanonical() ||
2077 WideIV->hasMoreThanOneUniqueUser() ||
2078 NewIVTy == WideIV->getScalarType())
2083 VPUser *SingleUser = WideIV->getSingleUser();
2091 auto *NewStart = Plan.
getZero(NewIVTy);
2092 WideIV->setStartValue(NewStart);
2094 WideIV->setStepValue(NewStep);
2101 Cmp->setOperand(1, NewBTC);
2115 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2117 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2131 const SCEV *VectorTripCount =
2136 "Trip count SCEV must be computable");
2157 auto *Term = &ExitingVPBB->
back();
2170 for (
unsigned Part = 0; Part < UF; ++Part) {
2176 Extracts[Part] = Ext;
2188 match(Phi->getBackedgeValue(),
2190 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2207 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2214 "Expected incoming values of Phi to be ActiveLaneMasks");
2219 EntryALM->setOperand(2, ALMMultiplier);
2220 LoopALM->setOperand(2, ALMMultiplier);
2224 ExtractFromALM(EntryALM, EntryExtracts);
2229 ExtractFromALM(LoopALM, LoopExtracts);
2231 Not->setOperand(0, LoopExtracts[0]);
2234 for (
unsigned Part = 0; Part < UF; ++Part) {
2235 Phis[Part]->setStartValue(EntryExtracts[Part]);
2236 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2249 auto *Term = &ExitingVPBB->
back();
2261 const SCEV *VectorTripCount =
2267 "Trip count SCEV must be computable");
2286 Term->setOperand(1, Plan.
getTrue());
2291 {}, Term->getDebugLoc());
2293 Term->eraseFromParent();
2328 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2338 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2339 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2357 RecurKind RK = PhiR->getRecurrenceKind();
2364 RecWithFlags->dropPoisonGeneratingFlags();
2370struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2372 return Def == getEmptyKey() || Def == getTombstoneKey();
2383 return GEP->getSourceElementType();
2386 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2387 [](
auto *
I) {
return I->getSourceElementType(); })
2388 .
Default([](
auto *) {
return nullptr; });
2392 static bool canHandle(
const VPSingleDefRecipe *Def) {
2401 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2402 C->second == Instruction::ExtractValue)))
2408 return !
Def->mayReadFromMemory();
2412 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2413 const VPlan *Plan =
Def->getParent()->getPlan();
2414 VPTypeAnalysis TypeInfo(*Plan);
2417 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2420 if (RFlags->hasPredicate())
2423 return hash_combine(Result, SIVSteps->getInductionOpcode());
2428 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2431 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2433 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2435 !
equal(
L->operands(),
R->operands()))
2438 "must have valid opcode info for both recipes");
2440 if (LFlags->hasPredicate() &&
2441 LFlags->getPredicate() !=
2445 if (LSIV->getInductionOpcode() !=
2451 const VPRegionBlock *RegionL =
L->getRegion();
2452 const VPRegionBlock *RegionR =
R->getRegion();
2455 L->getParent() !=
R->getParent())
2457 const VPlan *Plan =
L->getParent()->getPlan();
2458 VPTypeAnalysis TypeInfo(*Plan);
2459 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2475 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2479 if (!VPDT.
dominates(V->getParent(), VPBB))
2484 Def->replaceAllUsesWith(V);
2515 "Expected vector prehader's successor to be the vector loop region");
2523 return !Op->isDefinedOutsideLoopRegions();
2526 R.moveBefore(*Preheader, Preheader->
end());
2544 assert(!RepR->isPredicated() &&
2545 "Expected prior transformation of predicated replicates to "
2546 "replicate regions");
2551 if (!RepR->isSingleScalar())
2563 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2564 auto *UserR = cast<VPRecipeBase>(U);
2565 VPBasicBlock *Parent = UserR->getParent();
2567 if (SinkBB && SinkBB != Parent)
2572 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2573 Parent->getSinglePredecessor() != LoopRegion;
2583 "Defining block must dominate sink block");
2609 VPValue *ResultVPV = R.getVPSingleValue();
2611 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2612 if (!NewResSizeInBits)
2625 (void)OldResSizeInBits;
2633 VPW->dropPoisonGeneratingFlags();
2635 if (OldResSizeInBits != NewResSizeInBits &&
2639 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2641 Ext->insertAfter(&R);
2643 Ext->setOperand(0, ResultVPV);
2644 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2647 "Only ICmps should not need extending the result.");
2657 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2658 auto *
Op = R.getOperand(Idx);
2659 unsigned OpSizeInBits =
2661 if (OpSizeInBits == NewResSizeInBits)
2663 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2664 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2666 R.setOperand(Idx, ProcessedIter->second);
2674 Builder.setInsertPoint(&R);
2676 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2677 ProcessedIter->second = NewOp;
2678 R.setOperand(Idx, NewOp);
2686 std::optional<VPDominatorTree> VPDT;
2703 assert(VPBB->getNumSuccessors() == 2 &&
2704 "Two successors expected for BranchOnCond");
2705 unsigned RemovedIdx;
2716 "There must be a single edge between VPBB and its successor");
2724 VPBB->back().eraseFromParent();
2736 if (Reachable.contains(
B))
2747 for (
VPValue *Def : R.definedValues())
2748 Def->replaceAllUsesWith(&Tmp);
2749 R.eraseFromParent();
2806 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2817 auto *EntryIncrement = Builder.createOverflowingOp(
2819 DL,
"index.part.next");
2825 {EntryIncrement, TC, ALMMultiplier},
DL,
2826 "active.lane.mask.entry");
2833 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2838 Builder.setInsertPoint(OriginalTerminator);
2839 auto *InLoopIncrement = Builder.createOverflowingOp(
2841 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2843 {InLoopIncrement, TC, ALMMultiplier},
DL,
2844 "active.lane.mask.next");
2849 auto *NotMask = Builder.createNot(ALM,
DL);
2856 bool UseActiveLaneMaskForControlFlow) {
2860 assert(WideCanonicalIV &&
2861 "Must have widened canonical IV when tail folding!");
2864 if (UseActiveLaneMaskForControlFlow) {
2873 nullptr,
"active.lane.mask");
2889 template <
typename OpTy>
bool match(OpTy *V)
const {
2900template <
typename Op0_t,
typename Op1_t>
2908 case Intrinsic::masked_udiv:
2909 return Intrinsic::vp_udiv;
2910 case Intrinsic::masked_sdiv:
2911 return Intrinsic::vp_sdiv;
2912 case Intrinsic::masked_urem:
2913 return Intrinsic::vp_urem;
2914 case Intrinsic::masked_srem:
2915 return Intrinsic::vp_srem;
2917 return std::nullopt;
2934 VPValue *Addr, *Mask, *EndPtr;
2937 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2939 EVLEndPtr->insertBefore(&CurRecipe);
2940 EVLEndPtr->setOperand(1, &EVL);
2944 auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
2949 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2951 Reverse->insertBefore(&CurRecipe);
2955 if (
match(&CurRecipe,
2966 Mask = GetVPReverse(Mask);
2967 Addr = AdjustEndPtr(EndPtr);
2970 LoadR->insertBefore(&CurRecipe);
2972 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2980 StoredVal, EVL, Mask);
2982 if (
match(&CurRecipe,
2986 Mask = GetVPReverse(Mask);
2987 Addr = AdjustEndPtr(EndPtr);
2988 StoredVal = GetVPReverse(ReversedVal);
2990 StoredVal, EVL, Mask);
2994 if (Rdx->isConditional() &&
2999 if (Interleave->getMask() &&
3004 if (
match(&CurRecipe,
3013 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3027 if (
match(&CurRecipe,
3038 {IntrR->getOperand(0),
3039 IntrR->getOperand(1),
3040 Mask ? Mask : Plan->
getTrue(), &EVL},
3041 IntrR->getResultType(), {}, {},
DL);
3050 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3055 HeaderMask = R.getVPSingleValue();
3067 NewR->insertBefore(R);
3068 for (
auto [Old, New] :
3069 zip_equal(R->definedValues(), NewR->definedValues()))
3070 Old->replaceAllUsesWith(New);
3084 Merge->insertBefore(LogicalAnd);
3085 LogicalAnd->replaceAllUsesWith(
Merge);
3093 R->eraseFromParent();
3110 "User of VF that we can't transform to EVL.");
3120 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3121 "increment of the canonical induction.");
3137 MaxEVL = Builder.createScalarZExtOrTrunc(
3141 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3142 VPValue *PrevEVL = Builder.createScalarPhi(
3156 Intrinsic::experimental_vp_splice,
3157 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3161 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3174 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3175 m_VPValue(), m_VPValue()))))
3176 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3177 Plan.getVectorLoopRegion();
3189 VPValue *EVLMask = Builder.createICmp(
3249 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3261 auto *CurrentIteration =
3263 CurrentIteration->insertBefore(*Header, Header->begin());
3264 VPBuilder Builder(Header, Header->getFirstNonPhi());
3267 VPPhi *AVLPhi = Builder.createScalarPhi(
3271 if (MaxSafeElements) {
3281 Builder.setInsertPoint(CanonicalIVIncrement);
3285 OpVPEVL = Builder.createScalarZExtOrTrunc(
3286 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3288 auto *NextIter = Builder.createAdd(
3289 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3290 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3291 CurrentIteration->addOperand(NextIter);
3295 "avl.next", {
true,
false});
3303 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3304 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3318 assert(!CurrentIteration &&
3319 "Found multiple CurrentIteration. Only one expected");
3320 CurrentIteration = PhiR;
3324 if (!CurrentIteration)
3335 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3344 CanIVInc->eraseFromParent();
3353 if (Header->empty())
3362 if (!
match(EVLPhi->getBackedgeValue(),
3375 [[maybe_unused]]
bool FoundAVLNext =
3378 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3386 [[maybe_unused]]
bool FoundIncrement =
match(
3393 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3398 LatchBr->setOperand(
3409 return R->getRegion() ||
3413 for (
const SCEV *Stride : StridesMap.
values()) {
3416 const APInt *StrideConst;
3439 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3446 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3449 if (NewSCEV != ScevExpr) {
3451 ExpSCEV->replaceAllUsesWith(NewExp);
3462 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3467 while (!Worklist.
empty()) {
3470 if (!Visited.
insert(CurRec).second)
3492 RecWithFlags->isDisjoint()) {
3495 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3496 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3497 RecWithFlags->replaceAllUsesWith(New);
3498 RecWithFlags->eraseFromParent();
3501 RecWithFlags->dropPoisonGeneratingFlags();
3506 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3507 "found instruction with poison generating flags not covered by "
3508 "VPRecipeWithIRFlags");
3513 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3521 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3533 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3534 if (AddrDef && WidenRec->isConsecutive() &&
3535 IsNotHeaderMask(WidenRec->getMask()))
3536 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3538 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3539 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3540 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3550 const bool &EpilogueAllowed) {
3551 if (InterleaveGroups.empty())
3562 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3569 for (
const auto *IG : InterleaveGroups) {
3574 return !IRMemberToRecipe.contains(Member);
3578 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3582 StoredValues.
push_back(StoreR->getStoredValue());
3583 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3589 StoredValues.
push_back(StoreR->getStoredValue());
3593 bool NeedsMaskForGaps =
3594 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3595 (!StoredValues.
empty() && !IG->isFull());
3598 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3607 VPValue *Addr = Start->getAddr();
3616 assert(IG->getIndex(IRInsertPos) != 0 &&
3617 "index of insert position shouldn't be zero");
3621 IG->getIndex(IRInsertPos),
3625 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3631 if (IG->isReverse()) {
3634 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3635 ReversePtr->insertBefore(InsertPosR);
3639 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3641 VPIG->insertBefore(InsertPosR);
3644 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3647 if (!Member->getType()->isVoidTy()) {
3706 AddOp = Instruction::Add;
3707 MulOp = Instruction::Mul;
3709 AddOp =
ID.getInductionOpcode();
3710 MulOp = Instruction::FMul;
3718 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3719 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3728 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3733 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3734 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3750 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3754 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3757 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3760 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3767 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3770 WidePHI->addOperand(
Next);
3798 VPlan *Plan = R->getParent()->getPlan();
3799 VPValue *Start = R->getStartValue();
3800 VPValue *Step = R->getStepValue();
3801 VPValue *VF = R->getVFValue();
3803 assert(R->getInductionDescriptor().getKind() ==
3805 "Not a pointer induction according to InductionDescriptor!");
3808 "Recipe should have been replaced");
3814 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3818 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3821 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3823 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3824 R->replaceAllUsesWith(PtrAdd);
3829 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3831 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3834 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3842 VPValue *Step = R->getStepValue();
3843 VPValue *Index = R->getIndex();
3847 ? Builder.createScalarSExtOrTrunc(
3849 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3851 switch (R->getInductionKind()) {
3854 "Index type does not match StartValue type");
3855 return R->replaceAllUsesWith(Builder.createAdd(
3856 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3859 return R->replaceAllUsesWith(Builder.createPtrAdd(
3860 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3865 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3866 FPBinOp->
getOpcode() == Instruction::FSub) &&
3867 "Original BinOp should be defined for FP induction");
3869 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3870 return R->replaceAllUsesWith(
3871 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3884 if (!R->isReplicator())
3888 R->dissolveToCFGLoop();
3909 assert(Br->getNumOperands() == 2 &&
3910 "BranchOnTwoConds must have exactly 2 conditions");
3914 assert(Successors.size() == 3 &&
3915 "BranchOnTwoConds must have exactly 3 successors");
3920 VPValue *Cond0 = Br->getOperand(0);
3921 VPValue *Cond1 = Br->getOperand(1);
3926 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3939 Br->eraseFromParent();
3962 WidenIVR->replaceAllUsesWith(PtrAdd);
3978 VPValue *CanIV = WideCanIV->getCanonicalIV();
3980 VPValue *Step = WideCanIV->getStepValue();
3983 "Expected unroller to have materialized step for UF != 1");
3988 Step = Builder.createAdd(
3991 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv");
4000 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4001 Select = Builder.createSelect(Blend->getMask(
I),
4002 Blend->getIncomingValue(
I),
Select,
4003 R.getDebugLoc(),
"predphi", *Blend);
4004 Blend->replaceAllUsesWith(
Select);
4009 if (!VEPR->getOffset()) {
4011 "Expected unroller to have materialized offset for UF != 1");
4012 VEPR->materializeOffset();
4027 for (
VPValue *
Op : LastActiveL->operands()) {
4028 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4033 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4034 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4040 Builder.createSub(FirstInactiveLane, One,
4041 LastActiveL->getDebugLoc(),
"last.active.lane");
4051 assert(VPI->isMasked() &&
4052 "Unmasked MaskedCond should be simplified earlier");
4053 VPI->replaceAllUsesWith(Builder.createNaryOp(
4065 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4066 VPI->getDebugLoc());
4067 VPI->replaceAllUsesWith(
Add);
4076 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4079 ToRemove.push_back(BranchOnCountInst);
4094 ? Instruction::UIToFP
4095 : Instruction::Trunc;
4096 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4102 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4108 MulOpc = Instruction::FMul;
4109 Flags = VPI->getFastMathFlags();
4111 MulOpc = Instruction::Mul;
4116 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4118 VPI->replaceAllUsesWith(VectorStep);
4124 R->eraseFromParent();
4132 struct EarlyExitInfo {
4143 if (Pred == MiddleVPBB)
4148 VPValue *CondOfEarlyExitingVPBB;
4149 [[maybe_unused]]
bool Matched =
4150 match(EarlyExitingVPBB->getTerminator(),
4152 assert(Matched &&
"Terminator must be BranchOnCond");
4156 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4157 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4159 TrueSucc == ExitBlock
4160 ? CondOfEarlyExitingVPBB
4161 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4167 "exit condition must dominate the latch");
4176 assert(!Exits.
empty() &&
"must have at least one early exit");
4183 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4185 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4186 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4192 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4193 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4195 Exits[
I].EarlyExitingVPBB) &&
4196 "RPO sort must place dominating exits before dominated ones");
4202 VPValue *Combined = Exits[0].CondToExit;
4203 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4204 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4210 "Early exit store masking not implemented");
4214 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4218 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4226 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4260 for (
auto [Exit, VectorEarlyExitVPBB] :
4261 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4262 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4274 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4275 VPValue *NewIncoming = IncomingVal;
4277 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4282 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4283 ExitIRI->addOperand(NewIncoming);
4286 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4320 bool IsLastDispatch = (
I + 2 == Exits.
size());
4322 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4328 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4331 CurrentBB = FalseBB;
4338 "Unexpected terminator");
4339 auto *IsLatchExitTaken =
4341 LatchExitingBranch->getOperand(1));
4343 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4344 LatchExitingBranch->eraseFromParent();
4345 Builder.setInsertPoint(LatchVPBB);
4347 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4349 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4359 Type *RedTy = Ctx.Types.inferScalarType(Red);
4360 VPValue *VecOp = Red->getVecOp();
4362 assert(!Red->isPartialReduction() &&
4363 "This path does not support partial reductions");
4366 auto IsExtendedRedValidAndClampRange =
4379 "getExtendedReductionCost only supports integer types");
4380 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4381 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4382 Red->getFastMathFlags(),
CostKind);
4383 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4391 IsExtendedRedValidAndClampRange(
4394 Ctx.Types.inferScalarType(
A)))
4413 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4414 Opcode != Instruction::FAdd)
4417 assert(!Red->isPartialReduction() &&
4418 "This path does not support partial reductions");
4419 Type *RedTy = Ctx.Types.inferScalarType(Red);
4422 auto IsMulAccValidAndClampRange =
4429 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4435 (Ext0->getOpcode() != Ext1->getOpcode() ||
4436 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4440 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4442 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4449 ExtCost += Ext0->computeCost(VF, Ctx);
4451 ExtCost += Ext1->computeCost(VF, Ctx);
4453 ExtCost += OuterExt->computeCost(VF, Ctx);
4455 return MulAccCost.
isValid() &&
4456 MulAccCost < ExtCost + MulCost + RedCost;
4461 VPValue *VecOp = Red->getVecOp();
4499 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4500 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4501 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4502 Mul->setOperand(1, ExtB);
4512 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4517 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4524 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4541 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4550 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4551 Ext0->getOpcode() == Ext1->getOpcode() &&
4552 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4554 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4555 *Ext0, *Ext0, Ext0->getDebugLoc());
4556 NewExt0->insertBefore(Ext0);
4561 Ext->getResultType(),
nullptr, *Ext1,
4562 *Ext1, Ext1->getDebugLoc());
4565 Mul->setOperand(0, NewExt0);
4566 Mul->setOperand(1, NewExt1);
4567 Red->setOperand(1,
Mul);
4581 assert(!Red->isPartialReduction() &&
4582 "This path does not support partial reductions");
4585 auto IP = std::next(Red->getIterator());
4586 auto *VPBB = Red->getParent();
4596 Red->replaceAllUsesWith(AbstractR);
4626 for (
VPValue *VPV : VPValues) {
4635 if (
User->usesScalars(VPV))
4638 HoistPoint = HoistBlock->
begin();
4642 "All users must be in the vector preheader or dominated by it");
4647 VPV->replaceUsesWithIf(Broadcast,
4648 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4649 return Broadcast != &U && !U.usesScalars(VPV);
4660 return CommonMetadata;
4663template <
unsigned Opcode>
4668 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4669 "Only Load and Store opcodes supported");
4670 constexpr bool IsLoad = (Opcode == Instruction::Load);
4676 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4681 for (
auto Recipes :
Groups) {
4682 if (Recipes.size() < 2)
4690 VPValue *MaskI = RecipeI->getMask();
4691 Type *TypeI = GetLoadStoreValueType(RecipeI);
4697 bool HasComplementaryMask =
false;
4702 VPValue *MaskJ = RecipeJ->getMask();
4703 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4704 if (TypeI == TypeJ) {
4714 if (HasComplementaryMask) {
4715 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4725template <
typename InstType>
4743 for (
auto &Group :
Groups) {
4763 return R->isSingleScalar() == IsSingleScalar;
4765 "all members in group must agree on IsSingleScalar");
4770 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4771 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4773 UnpredicatedLoad->insertBefore(EarliestLoad);
4777 Load->replaceAllUsesWith(UnpredicatedLoad);
4778 Load->eraseFromParent();
4788 if (!StoreLoc || !StoreLoc->AATags.Scope)
4794 StoresToSink.
end());
4798 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4812 for (
auto &Group :
Groups) {
4825 VPValue *SelectedValue = Group[0]->getOperand(0);
4828 bool IsSingleScalar = Group[0]->isSingleScalar();
4829 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4830 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4831 "all members in group must agree on IsSingleScalar");
4832 VPValue *Mask = Group[
I]->getMask();
4834 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4843 StoreWithMinAlign->getUnderlyingInstr(),
4844 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4845 nullptr, *LastStore, CommonMetadata);
4846 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4850 Store->eraseFromParent();
4857 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4858 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4922 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4924 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4931 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4941 DefR->replaceUsesWithIf(
4942 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4944 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4958 for (
VPValue *Def : R.definedValues()) {
4971 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4973 return U->usesScalars(Def) &&
4976 if (
none_of(Def->users(), IsCandidateUnpackUser))
4983 Unpack->insertAfter(&R);
4984 Def->replaceUsesWithIf(Unpack,
4985 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4986 return IsCandidateUnpackUser(&U);
4995 bool RequiresScalarEpilogue,
VPValue *Step,
4996 std::optional<uint64_t> MaxRuntimeStep) {
5007 assert(StepR->getParent() == VectorPHVPBB &&
5008 "Step must be defined in VectorPHVPBB");
5010 InsertPt = std::next(StepR->getIterator());
5012 VPBuilder Builder(VectorPHVPBB, InsertPt);
5018 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5030 if (TailByMasking) {
5031 TC = Builder.createAdd(
5042 Builder.createNaryOp(Instruction::URem, {TC, Step},
5051 if (RequiresScalarEpilogue) {
5053 "requiring scalar epilogue is not supported with fail folding");
5056 R = Builder.createSelect(IsZero, Step, R);
5070 "VF and VFxUF must be materialized together");
5082 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5089 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5093 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5097 VPValue *MulByUF = Builder.createOverflowingOp(
5109 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5117 const SCEV *Expr = ExpSCEV->getSCEV();
5120 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5125 ExpSCEV->eraseFromParent();
5128 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5129 "before any VPIRInstructions");
5132 auto EI = Entry->begin();
5142 return ExpandedSCEVs;
5154 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5158 return Member0Op == OpV;
5162 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5165 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5182 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5185 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5190 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5191 const auto &[
OpIdx, OpV] =
P;
5206 if (!InterleaveR || InterleaveR->
getMask())
5207 return std::nullopt;
5209 Type *GroupElementTy =
nullptr;
5213 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5214 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5216 return std::nullopt;
5221 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5222 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5224 return std::nullopt;
5228 if (IG->getFactor() != IG->getNumMembers())
5229 return std::nullopt;
5235 assert(
Size.isScalable() == VF.isScalable() &&
5236 "if Size is scalable, VF must be scalable and vice versa");
5237 return Size.getKnownMinValue();
5241 unsigned MinVal = VF.getKnownMinValue();
5243 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5246 return std::nullopt;
5254 return RepR && RepR->isSingleScalar();
5261 auto *R = V->getDefiningRecipe();
5270 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5271 WideMember0->setOperand(
5280 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5282 LoadGroup->getMask(),
true,
5283 {}, LoadGroup->getDebugLoc());
5284 L->insertBefore(LoadGroup);
5290 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5291 "must be a single scalar load");
5292 NarrowedOps.
insert(RepR);
5297 VPValue *PtrOp = WideLoad->getAddr();
5299 PtrOp = VecPtr->getOperand(0);
5304 nullptr, {}, *WideLoad);
5305 N->insertBefore(WideLoad);
5310std::unique_ptr<VPlan>
5330 "unexpected branch-on-count");
5334 std::optional<ElementCount> VFToOptimize;
5348 if (R.mayWriteToMemory() && !InterleaveR)
5354 return any_of(V->users(), [&](VPUser *U) {
5355 auto *UR = cast<VPRecipeBase>(U);
5356 return UR->getParent()->getParent() != VectorLoop;
5373 std::optional<ElementCount> NarrowedVF =
5375 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5377 VFToOptimize = NarrowedVF;
5380 if (InterleaveR->getStoredValues().empty())
5385 auto *Member0 = InterleaveR->getStoredValues()[0];
5395 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5398 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5399 return IR && IR->getInterleaveGroup()->isFull() &&
5400 IR->getVPValue(Op.index()) == Op.value();
5409 VFToOptimize->isScalable()))
5414 if (StoreGroups.empty())
5418 bool RequiresScalarEpilogue =
5429 std::unique_ptr<VPlan> NewPlan;
5431 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5432 Plan.
setVF(*VFToOptimize);
5433 NewPlan->removeVF(*VFToOptimize);
5439 for (
auto *StoreGroup : StoreGroups) {
5446 StoreGroup->getDebugLoc());
5447 S->insertBefore(StoreGroup);
5448 StoreGroup->eraseFromParent();
5454 Type *CanIVTy = VectorLoop->getCanonicalIVType();
5460 if (VFToOptimize->isScalable()) {
5463 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
5471 materializeVectorTripCount(Plan, VectorPH,
false,
5472 RequiresScalarEpilogue, Step);
5477 removeDeadRecipes(Plan);
5480 "All VPVectorPointerRecipes should have been removed");
5496 "must have a BranchOnCond");
5499 if (VF.
isScalable() && VScaleForTuning.has_value())
5500 VectorStep *= *VScaleForTuning;
5501 assert(VectorStep > 0 &&
"trip count should not be zero");
5505 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5525 "Cannot handle loops with uncountable early exits");
5532 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5539 if (
any_of(RecurSplice->users(),
5540 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
5621 {},
"vector.recur.extract.for.phi");
5624 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
5638 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5639 VPValue *InvariantCandidate = BinOp->getOperand(1);
5641 std::swap(WidenIVCandidate, InvariantCandidate);
5655 auto *ClonedOp = BinOp->
clone();
5656 if (ClonedOp->getOperand(0) == WidenIV) {
5657 ClonedOp->setOperand(0, ScalarIV);
5659 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5660 ClonedOp->setOperand(1, ScalarIV);
5675 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5676 bool UseMax) -> std::optional<APSInt> {
5678 for (
bool Signed : {
true,
false}) {
5687 return std::nullopt;
5695 PhiR->getRecurrenceKind()))
5704 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5718 !
match(FindLastSelect,
5727 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5733 "IVOfExpressionToSink not being an AddRec must imply "
5734 "FindLastExpression not being an AddRec.");
5745 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5746 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5753 if (IVOfExpressionToSink) {
5754 const SCEV *FindLastExpressionSCEV =
5756 if (
match(FindLastExpressionSCEV,
5759 if (
auto NewSentinel =
5760 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5763 SentinelVal = *NewSentinel;
5764 UseSigned = NewSentinel->isSigned();
5766 IVSCEV = FindLastExpressionSCEV;
5767 IVOfExpressionToSink =
nullptr;
5777 if (AR->hasNoSignedWrap())
5779 else if (AR->hasNoUnsignedWrap())
5789 VPValue *NewFindLastSelect = BackedgeVal;
5791 if (!SentinelVal || IVOfExpressionToSink) {
5794 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5795 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5796 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5797 SelectCond = LoopBuilder.
createNot(SelectCond);
5804 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5807 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5816 VPIRFlags Flags(MinMaxKind,
false,
false,
5822 NewFindLastSelect, Flags, ExitDL);
5825 VPValue *VectorRegionExitingVal = ReducedIV;
5826 if (IVOfExpressionToSink)
5827 VectorRegionExitingVal =
5829 ReducedIV, IVOfExpressionToSink);
5832 VPValue *StartVPV = PhiR->getStartValue();
5839 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5849 AnyOfPhi->insertAfter(PhiR);
5856 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
5869 PhiR->hasUsesOutsideReductionChain());
5870 NewPhiR->insertBefore(PhiR);
5871 PhiR->replaceAllUsesWith(NewPhiR);
5872 PhiR->eraseFromParent();
5879struct ReductionExtend {
5880 Type *SrcType =
nullptr;
5881 ExtendKind Kind = ExtendKind::PR_None;
5887struct ExtendedReductionOperand {
5891 ReductionExtend ExtendA, ExtendB;
5899struct VPPartialReductionChain {
5902 VPWidenRecipe *ReductionBinOp =
nullptr;
5904 ExtendedReductionOperand ExtendedOp;
5911 unsigned AccumulatorOpIdx;
5912 unsigned ScaleFactor;
5925 if (!
Op->hasOneUse() ||
5931 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5932 Op->getOperand(1), NarrowTy);
5934 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5943 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
5945 assert(Ext->getOpcode() ==
5947 "Expected both the LHS and RHS extends to be the same");
5948 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
5951 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
5952 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
5953 auto *
Max = Builder.insert(
5955 {FreezeX, FreezeY}, SrcTy));
5956 auto *Min = Builder.insert(
5958 {FreezeX, FreezeY}, SrcTy));
5961 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
5974 if (!
Mul->hasOneUse() ||
5975 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
5976 MulLHS->getOpcode() != MulRHS->getOpcode())
5979 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
5980 MulLHS->getOperand(0),
5981 Ext->getResultType()));
5982 Mul->setOperand(1, MulLHS == MulRHS
5983 ?
Mul->getOperand(0)
5984 : Builder.createWidenCast(MulRHS->getOpcode(),
5985 MulRHS->getOperand(0),
5986 Ext->getResultType()));
5995 VPValue *VecOp = Red->getVecOp();
6029static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6037 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6053 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6061 Builder.insert(NegRecipe);
6062 ExtendedOp = NegRecipe;
6066 "FSub chain reduction isn't supported");
6069 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp, TypeInfo);
6079 assert((!ExitValue || IsLastInChain) &&
6080 "if we found ExitValue, it must match RdxPhi's backedge value");
6091 PartialRed->insertBefore(WidenRecipe);
6099 E->insertBefore(WidenRecipe);
6100 PartialRed->replaceAllUsesWith(
E);
6113 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6114 StartInst->setOperand(2, NewScaleFactor);
6122 VPValue *OldStartValue = StartInst->getOperand(0);
6123 StartInst->setOperand(0, StartInst->getOperand(1));
6127 assert(RdxResult &&
"Could not find reduction result");
6130 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6131 : Instruction::BinaryOps::Sub;
6137 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6143 const VPPartialReductionChain &Link,
6146 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6147 std::optional<unsigned> BinOpc = std::nullopt;
6149 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6150 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6152 std::optional<llvm::FastMathFlags>
Flags;
6156 auto GetLinkOpcode = [&Link]() ->
unsigned {
6159 return Instruction::Add;
6161 return Instruction::FAdd;
6163 return Link.ReductionBinOp->
getOpcode();
6168 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6169 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6190static std::optional<ExtendedReductionOperand>
6194 "Op should be operand of UpdateR");
6202 if (
Op->hasOneUse() &&
6212 if (LHSInputType != RHSInputType ||
6213 LHSExt->getOpcode() != RHSExt->getOpcode())
6214 return std::nullopt;
6217 return ExtendedReductionOperand{
6219 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6223 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6226 VPValue *CastSource = CastRecipe->getOperand(0);
6227 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6237 if (UpdateR->
getOpcode() == Instruction::Sub)
6238 return std::nullopt;
6239 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6240 UpdateR->
getOpcode() == Instruction::FAdd) {
6244 return ExtendedReductionOperand{
6251 if (!
Op->hasOneUse())
6252 return std::nullopt;
6257 return std::nullopt;
6267 return std::nullopt;
6271 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6274 const APInt *RHSConst =
nullptr;
6280 return std::nullopt;
6284 if (Cast && OuterExtKind &&
6285 getPartialReductionExtendKind(Cast) != OuterExtKind)
6286 return std::nullopt;
6288 Type *RHSInputType = LHSInputType;
6289 ExtendKind RHSExtendKind = LHSExtendKind;
6292 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6295 return ExtendedReductionOperand{
6296 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6303static std::optional<SmallVector<VPPartialReductionChain>>
6311 return std::nullopt;
6322 VPValue *CurrentValue = ExitValue;
6323 while (CurrentValue != RedPhiR) {
6326 return std::nullopt;
6333 std::optional<ExtendedReductionOperand> ExtendedOp =
6334 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6336 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6338 return std::nullopt;
6342 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6345 return std::nullopt;
6350 VPPartialReductionChain Link(
6351 {UpdateR, *ExtendedOp, RK,
6355 CurrentValue = PrevValue;
6360 std::reverse(Chain.
begin(), Chain.
end());
6379 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6380 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6383 if (ChainsByPhi.
empty())
6390 for (
const auto &[
_, Chains] : ChainsByPhi)
6391 for (
const VPPartialReductionChain &Chain : Chains) {
6392 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6393 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6399 auto ExtendUsersValid = [&](
VPValue *Ext) {
6401 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6405 auto IsProfitablePartialReductionChainForVF =
6412 for (
const VPPartialReductionChain &Link : Chain) {
6413 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6414 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6418 PartialCost += LinkCost;
6419 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6421 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6422 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6425 RegularCost += Extend->computeCost(VF, CostCtx);
6427 return PartialCost.
isValid() && PartialCost < RegularCost;
6435 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6436 for (
const VPPartialReductionChain &Chain : Chains) {
6437 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6441 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6443 return PhiR == RedPhiR;
6445 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6451 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6460 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6461 return RepR && RepR->getOpcode() == Instruction::Store;
6472 return IsProfitablePartialReductionChainForVF(Chains, VF);
6478 for (
auto &[Phi, Chains] : ChainsByPhi)
6479 for (
const VPPartialReductionChain &Chain : Chains)
6480 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
6494 if (VPI && VPI->getUnderlyingValue() &&
6506 New->insertBefore(VPI);
6507 if (VPI->getOpcode() == Instruction::Load)
6508 VPI->replaceAllUsesWith(New->getVPSingleValue());
6509 VPI->eraseFromParent();
6514 FinalRedStoresBuilder))
6523 ReplaceWith(Histogram);
6531 ReplaceWith(Recipe);
6554 if (VPI->mayHaveSideEffects())
6558 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
6563 if (VPI->getOpcode() == Instruction::Add &&
6572 I, VPI->operandsWithoutMask(),
true,
6573 nullptr, *VPI, *VPI, VPI->getDebugLoc());
6574 Recipe->insertBefore(VPI);
6575 VPI->replaceAllUsesWith(Recipe);
6576 VPI->eraseFromParent();
6587 switch (Param.ParamKind) {
6588 case VFParamKind::Vector:
6589 case VFParamKind::GlobalPredicate:
6591 case VFParamKind::OMP_Uniform:
6592 return SE->isSCEVable(Types.inferScalarType(Args[Param.ParamPos])) &&
6593 SE->isLoopInvariant(
6594 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6596 case VFParamKind::OMP_Linear:
6597 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6598 m_scev_AffineAddRec(
6599 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
6600 m_SpecificLoop(L)));
6617 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
6618 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
6621 if (It == Mappings.end())
6628struct CallWideningDecision {
6630 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
6653 return CallWideningDecision::KindTy::Scalarize;
6663 return CallWideningDecision::KindTy::Scalarize;
6667 false, VF, CostCtx);
6670 CostCtx.
L, CostCtx.
Types);
6682 return CallWideningDecision::KindTy::Intrinsic;
6686 if (VecFunc && ScalarCost >= VecCallCost)
6687 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
6689 return CallWideningDecision::KindTy::Scalarize;
6700 if (!VPI || !VPI->getUnderlyingValue() ||
6701 VPI->getOpcode() != Instruction::Call)
6706 VPI->op_begin() + CI->arg_size());
6708 CallWideningDecision Decision =
6717 switch (Decision.Kind) {
6718 case CallWideningDecision::KindTy::Intrinsic: {
6722 *VPI, VPI->getDebugLoc());
6725 case CallWideningDecision::KindTy::VectorVariant: {
6729 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
6730 Ops.push_back(Mask);
6732 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
6734 *VPI, VPI->getDebugLoc());
6737 case CallWideningDecision::KindTy::Scalarize:
6749 return !Legacy || *Legacy == Decision.Kind;
6751 "VPlan call widening decision must match legacy decision");
6754 VPI->replaceAllUsesWith(Replacement);
6759 VPI->eraseFromParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
const T & front() const
Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
CallWideningKind
Choice for how to widen a call at a given VF.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
std::optional< CallWideningKind > getLegacyCallKind(CallInst *CI, ElementCount VF) const
Returns the legacy call widening decision for CI at VF, or std::nullopt if none was recorded.
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...