57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.getDebugLoc());
102 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
107 if (VectorID == Intrinsic::assume ||
108 VectorID == Intrinsic::lifetime_end ||
109 VectorID == Intrinsic::lifetime_start ||
110 VectorID == Intrinsic::sideeffect ||
111 VectorID == Intrinsic::pseudoprobe) {
116 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
117 VectorID != Intrinsic::pseudoprobe;
121 Ingredient.getDebugLoc());
124 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
125 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
129 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
133 *VPI, Ingredient.getDebugLoc());
137 "inductions must be created earlier");
146 "Only recpies with zero or one defined values expected");
147 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
183 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
204 L(L), TypeInfo(TypeInfo) {}
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
255template <
unsigned Opcode>
260 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
261 "Only Load and Store opcodes supported");
262 constexpr bool IsLoad = (Opcode == Instruction::Load);
269 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
273 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
276 RecipesByAddress[AddrSCEV].push_back(RepR);
281 for (
auto &Group :
Groups) {
296 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
308 if (Candidate->getParent() == SinkTo ||
313 if (!ScalarVFOnly && RepR->isSingleScalar())
316 WorkList.
insert({SinkTo, Candidate});
328 for (
auto &Recipe : *VPBB)
330 InsertIfValidSinkCandidate(VPBB,
Op);
334 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
337 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
342 auto UsersOutsideSinkTo =
344 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
346 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
347 return !U->usesFirstLaneOnly(SinkCandidate);
350 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
352 if (NeedsDuplicating) {
356 if (
auto *SinkCandidateRepR =
362 nullptr , *SinkCandidateRepR,
366 Clone = SinkCandidate->
clone();
376 InsertIfValidSinkCandidate(SinkTo,
Op);
386 if (!EntryBB || EntryBB->size() != 1 ||
396 if (EntryBB->getNumSuccessors() != 2)
401 if (!Succ0 || !Succ1)
404 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
406 if (Succ0->getSingleSuccessor() == Succ1)
408 if (Succ1->getSingleSuccessor() == Succ0)
425 if (!Region1->isReplicator())
427 auto *MiddleBasicBlock =
429 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
434 if (!Region2 || !Region2->isReplicator())
439 if (!Mask1 || Mask1 != Mask2)
442 assert(Mask1 && Mask2 &&
"both region must have conditions");
448 if (TransformedRegions.
contains(Region1))
455 if (!Then1 || !Then2)
475 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
481 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
482 Phi1ToMove.eraseFromParent();
485 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
499 TransformedRegions.
insert(Region1);
502 return !TransformedRegions.
empty();
510 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
511 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
512 auto *BlockInMask = PredRecipe->
getMask();
533 Region->setParent(ParentRegion);
539 RecipeWithoutMask->getDebugLoc());
540 Exiting->appendRecipe(PHIRecipe);
553 if (RepR->isPredicated())
572 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
584 if (!VPBB->getParent())
588 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
597 R.moveBefore(*PredVPBB, PredVPBB->
end());
599 auto *ParentRegion = VPBB->getParent();
600 if (ParentRegion && ParentRegion->getExiting() == VPBB)
601 ParentRegion->setExiting(PredVPBB);
605 return !WorkList.
empty();
612 bool ShouldSimplify =
true;
613 while (ShouldSimplify) {
629 if (!
IV ||
IV->getTruncInst())
644 for (
auto *U : FindMyCast->
users()) {
646 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
647 FoundUserCast = UserCast;
651 FindMyCast = FoundUserCast;
667 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
677 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
683 if (ResultTy != StepTy) {
690 Builder.setInsertPoint(VecPreheader);
691 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
693 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
711 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
725 WidenOriginalIV->dropPoisonGeneratingFlags();
726 WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
727 WidenNewIV->eraseFromParent();
734 "Lanes other than first lane being used should imply that not just "
745 nullptr, Plan.
getZero(CanonicalIVTy),
748 WidenNewIV->eraseFromParent();
770 if (PHICost > BroadcastCost)
779 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
793 WideCanIV->getDebugLoc());
795 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
796 WideCanIV->replaceAllUsesWith(NewWideIV);
797 WideCanIV->eraseFromParent();
805 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
807 if (IsConditionalAssume)
810 if (R.mayHaveSideEffects())
814 return all_of(R.definedValues(),
815 [](
VPValue *V) { return V->getNumUsers() == 0; });
835 VPUser *PhiUser = PhiR->getSingleUser();
841 PhiR->replaceAllUsesWith(Start);
842 PhiR->eraseFromParent();
850 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
853 Users.insert_range(V->users());
855 return Users.takeVector();
869 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
906 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
907 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
920 Def->operands(),
true,
922 Clone->insertAfter(Def);
923 Def->replaceAllUsesWith(Clone);
934 PtrIV->replaceAllUsesWith(PtrAdd);
941 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
942 return U->usesScalars(WideIV);
948 Plan,
ID.getKind(),
ID.getInductionOpcode(),
950 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
951 WideIV->getDebugLoc(), Builder);
954 if (!HasOnlyVectorVFs) {
956 "plans containing a scalar VF cannot also include scalable VFs");
957 WideIV->replaceAllUsesWith(Steps);
960 WideIV->replaceUsesWithIf(Steps,
961 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
963 return U.usesFirstLaneOnly(WideIV);
964 return U.usesScalars(WideIV);
980 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
985 if (!Def || Def->getNumOperands() != 2)
993 auto IsWideIVInc = [&]() {
994 auto &
ID = WideIV->getInductionDescriptor();
997 VPValue *IVStep = WideIV->getStepValue();
998 switch (
ID.getInductionOpcode()) {
999 case Instruction::Add:
1001 case Instruction::FAdd:
1003 case Instruction::FSub:
1006 case Instruction::Sub: {
1026 return IsWideIVInc() ? WideIV :
nullptr;
1046 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1059 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1060 FirstActiveLaneType,
DL);
1061 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1066 if (Incoming != WideIV) {
1068 EndValue =
B.createAdd(EndValue, One,
DL);
1071 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1073 VPIRValue *Start = WideIV->getStartValue();
1074 VPValue *Step = WideIV->getStepValue();
1075 EndValue =
B.createDerivedIV(
1077 Start, EndValue, Step);
1092 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1099 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1102 Start, VectorTC, Step);
1131 assert(EndValue &&
"Must have computed the end value up front");
1136 if (Incoming != WideIV)
1147 auto *Zero = Plan.
getZero(StepTy);
1148 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1153 return B.createNaryOp(
1154 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1156 : Instruction::FAdd,
1157 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1169 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1178 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1179 EndValues[WideIV] = EndValue;
1189 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1190 R.eraseFromParent();
1199 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1201 if (PredVPBB == MiddleVPBB)
1203 ExitIRI->getOperand(Idx),
1207 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1209 ExitIRI->setOperand(Idx, Escape);
1226 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1229 ExpR->replaceAllUsesWith(V->second);
1230 ExpR->eraseFromParent();
1239 while (!WorkList.
empty()) {
1241 if (!Seen.
insert(Cur).second)
1249 R->eraseFromParent();
1256static std::optional<std::pair<bool, unsigned>>
1259 std::optional<std::pair<bool, unsigned>>>(R)
1262 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1264 return std::make_pair(
true,
I->getVectorIntrinsicID());
1266 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1272 I->getVPRecipeID());
1274 .
Default([](
auto *) {
return std::nullopt; });
1292 Value *V =
Op->getUnderlyingValue();
1298 auto FoldToIRValue = [&]() ->
Value * {
1300 if (OpcodeOrIID->first) {
1301 if (R.getNumOperands() != 2)
1303 unsigned ID = OpcodeOrIID->second;
1304 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1307 unsigned Opcode = OpcodeOrIID->second;
1316 return Folder.FoldSelect(
Ops[0],
Ops[1],
1319 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1321 case Instruction::Select:
1322 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1323 case Instruction::ICmp:
1324 case Instruction::FCmp:
1327 case Instruction::GetElementPtr: {
1330 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1340 case Instruction::ExtractElement:
1347 if (
Value *V = FoldToIRValue())
1348 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1354 VPlan *Plan = Def->getParent()->getPlan();
1360 return Def->replaceAllUsesWith(V);
1366 PredPHI->replaceAllUsesWith(
Op);
1379 bool CanCreateNewRecipe =
1386 if (TruncTy == ATy) {
1387 Def->replaceAllUsesWith(
A);
1396 : Instruction::ZExt;
1399 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1401 Ext->setUnderlyingValue(UnderlyingExt);
1403 Def->replaceAllUsesWith(Ext);
1405 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1406 Def->replaceAllUsesWith(Trunc);
1414 for (
VPUser *U :
A->users()) {
1416 for (
VPValue *VPV : R->definedValues())
1430 Def->replaceAllUsesWith(
X);
1431 Def->eraseFromParent();
1437 return Def->replaceAllUsesWith(
1442 return Def->replaceAllUsesWith(
X);
1446 return Def->replaceAllUsesWith(
1451 return Def->replaceAllUsesWith(
1456 return Def->replaceAllUsesWith(
X);
1460 return Def->replaceAllUsesWith(Plan->
getFalse());
1464 return Def->replaceAllUsesWith(
X);
1467 if (CanCreateNewRecipe &&
1472 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1473 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1474 return Def->replaceAllUsesWith(
1475 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1480 return Def->replaceAllUsesWith(Def->getOperand(1));
1485 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1489 return Def->replaceAllUsesWith(Plan->
getFalse());
1492 return Def->replaceAllUsesWith(
X);
1496 if (CanCreateNewRecipe &&
1498 return Def->replaceAllUsesWith(Builder.createNot(
C));
1502 Def->setOperand(0,
C);
1503 Def->setOperand(1,
Y);
1504 Def->setOperand(2,
X);
1509 return Def->replaceAllUsesWith(
A);
1512 return Def->replaceAllUsesWith(
A);
1515 return Def->replaceAllUsesWith(
1522 return Def->replaceAllUsesWith(
1524 Def->getDebugLoc(),
"", NW));
1527 if (CanCreateNewRecipe &&
1535 ->hasNoSignedWrap()};
1536 return Def->replaceAllUsesWith(
1537 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1543 return Def->replaceAllUsesWith(Builder.createNaryOp(
1545 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1550 return Def->replaceAllUsesWith(Builder.createNaryOp(
1552 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1557 return Def->replaceAllUsesWith(
A);
1572 R->setOperand(1,
Y);
1573 R->setOperand(2,
X);
1577 R->replaceAllUsesWith(Cmp);
1582 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1583 Cmp->setDebugLoc(Def->getDebugLoc());
1595 if (
Op->getNumUsers() > 1 ||
1599 }
else if (!UnpairedCmp) {
1600 UnpairedCmp =
Op->getDefiningRecipe();
1604 UnpairedCmp =
nullptr;
1611 if (NewOps.
size() < Def->getNumOperands()) {
1613 return Def->replaceAllUsesWith(NewAnyOf);
1620 if (CanCreateNewRecipe &&
1626 return Def->replaceAllUsesWith(NewCmp);
1634 return Def->replaceAllUsesWith(Def->getOperand(1));
1640 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1641 Def->replaceAllUsesWith(
X);
1651 Def->setOperand(1, Def->getOperand(0));
1652 Def->setOperand(0,
Y);
1659 return Def->replaceAllUsesWith(Def->getOperand(0));
1665 Def->replaceAllUsesWith(
1666 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1670 return Def->replaceAllUsesWith(
A);
1676 Def->replaceAllUsesWith(
1677 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1684 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1689 Def->replaceAllUsesWith(
1699 "broadcast operand must be single-scalar");
1700 Def->setOperand(0,
C);
1705 if (Def->getNumOperands() == 1) {
1706 Def->replaceAllUsesWith(Def->getOperand(0));
1711 Phi->replaceAllUsesWith(Phi->getOperand(0));
1717 if (Def->getNumOperands() == 1 &&
1719 return Def->replaceAllUsesWith(IRV);
1732 return Def->replaceAllUsesWith(
A);
1735 Def->replaceAllUsesWith(Builder.createNaryOp(
1736 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1750 auto *IVInc = Def->getOperand(0);
1751 if (IVInc->getNumUsers() == 2) {
1756 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1757 Def->replaceAllUsesWith(IVInc);
1759 Inc->replaceAllUsesWith(Phi);
1760 Phi->setOperand(0,
Y);
1776 Steps->replaceAllUsesWith(Steps->getOperand(0));
1784 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1786 return PhiR && PhiR->isInLoop();
1792 Def->replaceAllUsesWith(
A);
1801 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1802 return Def->replaceAllUsesWith(
A);
1806 return Def->replaceAllUsesWith(
A);
1833 while (!Worklist.
empty()) {
1842 R->replaceAllUsesWith(
1843 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1861 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1865 if (RepR && RepR->getOpcode() == Instruction::Store &&
1868 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1869 true ,
nullptr , *RepR ,
1870 *RepR , RepR->getDebugLoc());
1871 Clone->insertBefore(RepOrWidenR);
1873 VPValue *ExtractOp = Clone->getOperand(0);
1879 Clone->setOperand(0, ExtractOp);
1880 RepR->eraseFromParent();
1889 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1898 return !U->usesScalars(
Op);
1902 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1905 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1906 IntroducesBCastOf(Op)))
1910 auto *IRV = dyn_cast<VPIRValue>(Op);
1911 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1912 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1913 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1918 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1919 true ,
nullptr, *RepOrWidenR);
1920 Clone->insertBefore(RepOrWidenR);
1921 RepOrWidenR->replaceAllUsesWith(Clone);
1923 RepOrWidenR->eraseFromParent();
1959 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1960 UniqueValues.
insert(Blend->getIncomingValue(0));
1961 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1963 UniqueValues.
insert(Blend->getIncomingValue(
I));
1965 if (UniqueValues.
size() == 1) {
1966 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1967 Blend->eraseFromParent();
1971 if (Blend->isNormalized())
1977 unsigned StartIndex = 0;
1978 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1983 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1990 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1992 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1993 if (
I == StartIndex)
1995 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1996 OperandsWithMask.
push_back(Blend->getMask(
I));
2001 OperandsWithMask, *Blend, Blend->getDebugLoc());
2002 NewBlend->insertBefore(&R);
2004 VPValue *DeadMask = Blend->getMask(StartIndex);
2006 Blend->eraseFromParent();
2011 if (NewBlend->getNumOperands() == 3 &&
2013 VPValue *Inc0 = NewBlend->getOperand(0);
2014 VPValue *Inc1 = NewBlend->getOperand(1);
2015 VPValue *OldMask = NewBlend->getOperand(2);
2016 NewBlend->setOperand(0, Inc1);
2017 NewBlend->setOperand(1, Inc0);
2018 NewBlend->setOperand(2, NewMask);
2045 APInt MaxVal = AlignedTC - 1;
2048 unsigned NewBitWidth =
2054 bool MadeChange =
false;
2063 if (!WideIV || !WideIV->isCanonical() ||
2064 WideIV->hasMoreThanOneUniqueUser() ||
2065 NewIVTy == WideIV->getScalarType())
2070 VPUser *SingleUser = WideIV->getSingleUser();
2078 auto *NewStart = Plan.
getZero(NewIVTy);
2079 WideIV->setStartValue(NewStart);
2081 WideIV->setStepValue(NewStep);
2088 Cmp->setOperand(1, NewBTC);
2102 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2104 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2118 const SCEV *VectorTripCount =
2123 "Trip count SCEV must be computable");
2144 auto *Term = &ExitingVPBB->
back();
2157 for (
unsigned Part = 0; Part < UF; ++Part) {
2163 Extracts[Part] = Ext;
2175 match(Phi->getBackedgeValue(),
2177 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2194 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2201 "Expected incoming values of Phi to be ActiveLaneMasks");
2206 EntryALM->setOperand(2, ALMMultiplier);
2207 LoopALM->setOperand(2, ALMMultiplier);
2211 ExtractFromALM(EntryALM, EntryExtracts);
2216 ExtractFromALM(LoopALM, LoopExtracts);
2218 Not->setOperand(0, LoopExtracts[0]);
2221 for (
unsigned Part = 0; Part < UF; ++Part) {
2222 Phis[Part]->setStartValue(EntryExtracts[Part]);
2223 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2236 auto *Term = &ExitingVPBB->
back();
2248 const SCEV *VectorTripCount =
2254 "Trip count SCEV must be computable");
2273 Term->setOperand(1, Plan.
getTrue());
2278 {}, Term->getDebugLoc());
2280 Term->eraseFromParent();
2315 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2325 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2326 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2344 RecurKind RK = PhiR->getRecurrenceKind();
2351 RecWithFlags->dropPoisonGeneratingFlags();
2357struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2359 return Def == getEmptyKey() || Def == getTombstoneKey();
2370 return GEP->getSourceElementType();
2373 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2374 [](
auto *
I) {
return I->getSourceElementType(); })
2375 .
Default([](
auto *) {
return nullptr; });
2379 static bool canHandle(
const VPSingleDefRecipe *Def) {
2388 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2389 C->second == Instruction::ExtractValue)))
2395 return !
Def->mayReadFromMemory();
2399 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2400 const VPlan *Plan =
Def->getParent()->getPlan();
2401 VPTypeAnalysis TypeInfo(*Plan);
2404 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2407 if (RFlags->hasPredicate())
2410 return hash_combine(Result, SIVSteps->getInductionOpcode());
2415 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2418 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2420 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2422 !
equal(
L->operands(),
R->operands()))
2425 "must have valid opcode info for both recipes");
2427 if (LFlags->hasPredicate() &&
2428 LFlags->getPredicate() !=
2432 if (LSIV->getInductionOpcode() !=
2438 const VPRegionBlock *RegionL =
L->getRegion();
2439 const VPRegionBlock *RegionR =
R->getRegion();
2442 L->getParent() !=
R->getParent())
2444 const VPlan *Plan =
L->getParent()->getPlan();
2445 VPTypeAnalysis TypeInfo(*Plan);
2446 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2462 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2466 if (!VPDT.
dominates(V->getParent(), VPBB))
2471 Def->replaceAllUsesWith(V);
2490 "Expected vector prehader's successor to be the vector loop region");
2497 return !Op->isDefinedOutsideLoopRegions();
2500 R.moveBefore(*Preheader, Preheader->
end());
2518 assert(!RepR->isPredicated() &&
2519 "Expected prior transformation of predicated replicates to "
2520 "replicate regions");
2525 if (!RepR->isSingleScalar())
2537 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2538 auto *UserR = cast<VPRecipeBase>(U);
2539 VPBasicBlock *Parent = UserR->getParent();
2541 if (SinkBB && SinkBB != Parent)
2546 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2547 Parent->getSinglePredecessor() != LoopRegion;
2557 "Defining block must dominate sink block");
2583 VPValue *ResultVPV = R.getVPSingleValue();
2585 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2586 if (!NewResSizeInBits)
2599 (void)OldResSizeInBits;
2607 VPW->dropPoisonGeneratingFlags();
2609 if (OldResSizeInBits != NewResSizeInBits &&
2613 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2615 Ext->insertAfter(&R);
2617 Ext->setOperand(0, ResultVPV);
2618 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2621 "Only ICmps should not need extending the result.");
2631 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2632 auto *
Op = R.getOperand(Idx);
2633 unsigned OpSizeInBits =
2635 if (OpSizeInBits == NewResSizeInBits)
2637 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2638 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2640 R.setOperand(Idx, ProcessedIter->second);
2648 Builder.setInsertPoint(&R);
2650 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2651 ProcessedIter->second = NewOp;
2652 R.setOperand(Idx, NewOp);
2660 std::optional<VPDominatorTree> VPDT;
2677 assert(VPBB->getNumSuccessors() == 2 &&
2678 "Two successors expected for BranchOnCond");
2679 unsigned RemovedIdx;
2690 "There must be a single edge between VPBB and its successor");
2698 VPBB->back().eraseFromParent();
2710 if (Reachable.contains(
B))
2721 for (
VPValue *Def : R.definedValues())
2722 Def->replaceAllUsesWith(&Tmp);
2723 R.eraseFromParent();
2782 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2793 auto *EntryIncrement = Builder.createOverflowingOp(
2795 DL,
"index.part.next");
2801 {EntryIncrement, TC, ALMMultiplier},
DL,
2802 "active.lane.mask.entry");
2809 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2814 Builder.setInsertPoint(OriginalTerminator);
2815 auto *InLoopIncrement = Builder.createOverflowingOp(
2817 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2819 {InLoopIncrement, TC, ALMMultiplier},
DL,
2820 "active.lane.mask.next");
2825 auto *NotMask = Builder.createNot(ALM,
DL);
2832 bool UseActiveLaneMaskForControlFlow) {
2836 assert(WideCanonicalIV &&
2837 "Must have widened canonical IV when tail folding!");
2840 if (UseActiveLaneMaskForControlFlow) {
2849 nullptr,
"active.lane.mask");
2865 template <
typename OpTy>
bool match(OpTy *V)
const {
2876template <
typename Op0_t,
typename Op1_t>
2895 VPValue *Addr, *Mask, *EndPtr;
2898 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2900 EVLEndPtr->insertBefore(&CurRecipe);
2901 EVLEndPtr->setOperand(1, &EVL);
2905 auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
2910 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2912 Reverse->insertBefore(&CurRecipe);
2916 if (
match(&CurRecipe,
2927 Mask = GetVPReverse(Mask);
2928 Addr = AdjustEndPtr(EndPtr);
2931 LoadR->insertBefore(&CurRecipe);
2933 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2941 StoredVal, EVL, Mask);
2943 if (
match(&CurRecipe,
2947 Mask = GetVPReverse(Mask);
2948 Addr = AdjustEndPtr(EndPtr);
2949 StoredVal = GetVPReverse(ReversedVal);
2951 StoredVal, EVL, Mask);
2955 if (Rdx->isConditional() &&
2960 if (Interleave->getMask() &&
2965 if (
match(&CurRecipe,
2974 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2988 if (
match(&CurRecipe,
3002 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3007 HeaderMask = R.getVPSingleValue();
3019 NewR->insertBefore(R);
3020 for (
auto [Old, New] :
3021 zip_equal(R->definedValues(), NewR->definedValues()))
3022 Old->replaceAllUsesWith(New);
3036 Merge->insertBefore(LogicalAnd);
3037 LogicalAnd->replaceAllUsesWith(
Merge);
3045 R->eraseFromParent();
3062 "User of VF that we can't transform to EVL.");
3072 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3073 "increment of the canonical induction.");
3089 MaxEVL = Builder.createScalarZExtOrTrunc(
3093 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3094 VPValue *PrevEVL = Builder.createScalarPhi(
3108 Intrinsic::experimental_vp_splice,
3109 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3113 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3126 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3127 m_VPValue(), m_VPValue()))))
3128 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3129 Plan.getVectorLoopRegion();
3141 VPValue *EVLMask = Builder.createICmp(
3201 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3213 auto *CurrentIteration =
3215 CurrentIteration->insertBefore(*Header, Header->begin());
3216 VPBuilder Builder(Header, Header->getFirstNonPhi());
3219 VPPhi *AVLPhi = Builder.createScalarPhi(
3223 if (MaxSafeElements) {
3233 Builder.setInsertPoint(CanonicalIVIncrement);
3237 OpVPEVL = Builder.createScalarZExtOrTrunc(
3238 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3240 auto *NextIter = Builder.createAdd(
3241 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3242 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3243 CurrentIteration->addOperand(NextIter);
3247 "avl.next", {
true,
false});
3255 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3256 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3270 assert(!CurrentIteration &&
3271 "Found multiple CurrentIteration. Only one expected");
3272 CurrentIteration = PhiR;
3276 if (!CurrentIteration)
3287 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3296 CanIVInc->eraseFromParent();
3305 if (Header->empty())
3314 if (!
match(EVLPhi->getBackedgeValue(),
3327 [[maybe_unused]]
bool FoundAVLNext =
3330 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3338 [[maybe_unused]]
bool FoundIncrement =
match(
3345 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3350 LatchBr->setOperand(
3361 return R->getRegion() ||
3365 for (
const SCEV *Stride : StridesMap.
values()) {
3368 const APInt *StrideConst;
3391 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3398 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3401 if (NewSCEV != ScevExpr) {
3403 ExpSCEV->replaceAllUsesWith(NewExp);
3412 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3416 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3421 while (!Worklist.
empty()) {
3424 if (!Visited.
insert(CurRec).second)
3446 RecWithFlags->isDisjoint()) {
3449 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3450 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3451 RecWithFlags->replaceAllUsesWith(New);
3452 RecWithFlags->eraseFromParent();
3455 RecWithFlags->dropPoisonGeneratingFlags();
3460 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3461 "found instruction with poison generating flags not covered by "
3462 "VPRecipeWithIRFlags");
3467 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3480 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3481 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3482 if (AddrDef && WidenRec->isConsecutive() &&
3483 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3484 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3486 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3490 InterleaveRec->getInterleaveGroup();
3491 bool NeedPredication =
false;
3493 NeedPredication |= BlockNeedsPredication(Member->getParent());
3495 if (NeedPredication)
3496 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3507 const bool &EpilogueAllowed) {
3508 if (InterleaveGroups.empty())
3518 IRMemberToRecipe[&MemR.getIngredient()] = &MemR;
3525 for (
const auto *IG : InterleaveGroups) {
3526 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3530 StoredValues.
push_back(StoreR->getStoredValue());
3531 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3537 StoredValues.
push_back(StoreR->getStoredValue());
3541 bool NeedsMaskForGaps =
3542 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3543 (!StoredValues.
empty() && !IG->isFull());
3546 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3554 VPValue *Addr = Start->getAddr();
3563 assert(IG->getIndex(IRInsertPos) != 0 &&
3564 "index of insert position shouldn't be zero");
3568 IG->getIndex(IRInsertPos),
3572 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3578 if (IG->isReverse()) {
3581 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3582 ReversePtr->insertBefore(InsertPos);
3586 InsertPos->getMask(), NeedsMaskForGaps,
3587 InterleaveMD, InsertPos->getDebugLoc());
3588 VPIG->insertBefore(InsertPos);
3591 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3594 if (!Member->getType()->isVoidTy()) {
3653 AddOp = Instruction::Add;
3654 MulOp = Instruction::Mul;
3656 AddOp =
ID.getInductionOpcode();
3657 MulOp = Instruction::FMul;
3665 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3666 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3675 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3680 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3681 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3697 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3701 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3704 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3707 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3714 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3717 WidePHI->addOperand(
Next);
3745 VPlan *Plan = R->getParent()->getPlan();
3746 VPValue *Start = R->getStartValue();
3747 VPValue *Step = R->getStepValue();
3748 VPValue *VF = R->getVFValue();
3750 assert(R->getInductionDescriptor().getKind() ==
3752 "Not a pointer induction according to InductionDescriptor!");
3755 "Recipe should have been replaced");
3761 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3765 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3768 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3770 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3771 R->replaceAllUsesWith(PtrAdd);
3776 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3778 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3781 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3789 VPValue *Step = R->getStepValue();
3790 VPValue *Index = R->getIndex();
3794 ? Builder.createScalarSExtOrTrunc(
3796 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3798 switch (R->getInductionKind()) {
3801 "Index type does not match StartValue type");
3802 return R->replaceAllUsesWith(Builder.createAdd(
3803 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3806 return R->replaceAllUsesWith(Builder.createPtrAdd(
3807 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3812 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3813 FPBinOp->
getOpcode() == Instruction::FSub) &&
3814 "Original BinOp should be defined for FP induction");
3816 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3817 return R->replaceAllUsesWith(
3818 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3831 if (!R->isReplicator())
3835 R->dissolveToCFGLoop();
3856 assert(Br->getNumOperands() == 2 &&
3857 "BranchOnTwoConds must have exactly 2 conditions");
3861 assert(Successors.size() == 3 &&
3862 "BranchOnTwoConds must have exactly 3 successors");
3867 VPValue *Cond0 = Br->getOperand(0);
3868 VPValue *Cond1 = Br->getOperand(1);
3873 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3886 Br->eraseFromParent();
3909 WidenIVR->replaceAllUsesWith(PtrAdd);
3928 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3929 Select = Builder.createSelect(Blend->getMask(
I),
3930 Blend->getIncomingValue(
I),
Select,
3931 R.getDebugLoc(),
"predphi", *Blend);
3932 Blend->replaceAllUsesWith(
Select);
3937 if (!VEPR->getOffset()) {
3939 "Expected unroller to have materialized offset for UF != 1");
3940 VEPR->materializeOffset();
3955 for (
VPValue *
Op : LastActiveL->operands()) {
3956 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3961 VPValue *FirstInactiveLane = Builder.createNaryOp(
3963 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3969 Builder.createSub(FirstInactiveLane, One,
3970 LastActiveL->getDebugLoc(),
"last.active.lane");
3980 assert(VPI->isMasked() &&
3981 "Unmasked MaskedCond should be simplified earlier");
3982 VPI->replaceAllUsesWith(Builder.createNaryOp(
3994 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
3995 VPI->getDebugLoc());
3996 VPI->replaceAllUsesWith(
Add);
4005 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4008 ToRemove.push_back(BranchOnCountInst);
4023 ? Instruction::UIToFP
4024 : Instruction::Trunc;
4025 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4031 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4037 MulOpc = Instruction::FMul;
4038 Flags = VPI->getFastMathFlags();
4040 MulOpc = Instruction::Mul;
4045 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4047 VPI->replaceAllUsesWith(VectorStep);
4053 R->eraseFromParent();
4061 struct EarlyExitInfo {
4072 if (Pred == MiddleVPBB)
4077 VPValue *CondOfEarlyExitingVPBB;
4078 [[maybe_unused]]
bool Matched =
4079 match(EarlyExitingVPBB->getTerminator(),
4081 assert(Matched &&
"Terminator must be BranchOnCond");
4085 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4086 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4088 TrueSucc == ExitBlock
4089 ? CondOfEarlyExitingVPBB
4090 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4096 "exit condition must dominate the latch");
4105 assert(!Exits.
empty() &&
"must have at least one early exit");
4112 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4114 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4115 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4121 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4122 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4124 Exits[
I].EarlyExitingVPBB) &&
4125 "RPO sort must place dominating exits before dominated ones");
4131 VPValue *Combined = Exits[0].CondToExit;
4132 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4133 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4139 "Early exit store masking not implemented");
4143 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4147 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4155 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4189 for (
auto [Exit, VectorEarlyExitVPBB] :
4190 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4191 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4203 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4204 VPValue *NewIncoming = IncomingVal;
4206 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4211 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4212 ExitIRI->addOperand(NewIncoming);
4215 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4249 bool IsLastDispatch = (
I + 2 == Exits.
size());
4251 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4257 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4260 CurrentBB = FalseBB;
4267 "Unexpected terminator");
4268 auto *IsLatchExitTaken =
4270 LatchExitingBranch->getOperand(1));
4272 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4273 LatchExitingBranch->eraseFromParent();
4274 Builder.setInsertPoint(LatchVPBB);
4276 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4278 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4289 Type *RedTy = Ctx.Types.inferScalarType(Red);
4290 VPValue *VecOp = Red->getVecOp();
4292 assert(!Red->isPartialReduction() &&
4293 "This path does not support partial reductions");
4296 auto IsExtendedRedValidAndClampRange =
4309 "getExtendedReductionCost only supports integer types");
4310 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4311 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4312 Red->getFastMathFlags(),
CostKind);
4313 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4321 IsExtendedRedValidAndClampRange(
4324 Ctx.Types.inferScalarType(
A)))
4343 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4344 Opcode != Instruction::FAdd)
4347 assert(!Red->isPartialReduction() &&
4348 "This path does not support partial reductions");
4349 Type *RedTy = Ctx.Types.inferScalarType(Red);
4352 auto IsMulAccValidAndClampRange =
4359 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4365 (Ext0->getOpcode() != Ext1->getOpcode() ||
4366 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4370 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4372 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4379 ExtCost += Ext0->computeCost(VF, Ctx);
4381 ExtCost += Ext1->computeCost(VF, Ctx);
4383 ExtCost += OuterExt->computeCost(VF, Ctx);
4385 return MulAccCost.
isValid() &&
4386 MulAccCost < ExtCost + MulCost + RedCost;
4391 VPValue *VecOp = Red->getVecOp();
4429 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4430 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4431 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4432 Mul->setOperand(1, ExtB);
4442 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4447 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4454 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4471 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4480 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4481 Ext0->getOpcode() == Ext1->getOpcode() &&
4482 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4484 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4485 *Ext0, *Ext0, Ext0->getDebugLoc());
4486 NewExt0->insertBefore(Ext0);
4491 Ext->getResultType(),
nullptr, *Ext1,
4492 *Ext1, Ext1->getDebugLoc());
4495 Mul->setOperand(0, NewExt0);
4496 Mul->setOperand(1, NewExt1);
4497 Red->setOperand(1,
Mul);
4511 assert(!Red->isPartialReduction() &&
4512 "This path does not support partial reductions");
4515 auto IP = std::next(Red->getIterator());
4516 auto *VPBB = Red->getParent();
4526 Red->replaceAllUsesWith(AbstractR);
4556 for (
VPValue *VPV : VPValues) {
4565 if (
User->usesScalars(VPV))
4568 HoistPoint = HoistBlock->
begin();
4572 "All users must be in the vector preheader or dominated by it");
4577 VPV->replaceUsesWithIf(Broadcast,
4578 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4579 return Broadcast != &U && !U.usesScalars(VPV);
4596 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4597 RepR->getOpcode() != Instruction::Load)
4600 VPValue *Addr = RepR->getOperand(0);
4603 if (!
Loc.AATags.Scope)
4608 if (R.mayWriteToMemory()) {
4610 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4618 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4622 const AAMDNodes &LoadAA = LoadLoc.AATags;
4638 return CommonMetadata;
4641template <
unsigned Opcode>
4646 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4647 "Only Load and Store opcodes supported");
4648 constexpr bool IsLoad = (Opcode == Instruction::Load);
4654 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4659 for (
auto Recipes :
Groups) {
4660 if (Recipes.size() < 2)
4668 VPValue *MaskI = RecipeI->getMask();
4669 Type *TypeI = GetLoadStoreValueType(RecipeI);
4675 bool HasComplementaryMask =
false;
4680 VPValue *MaskJ = RecipeJ->getMask();
4681 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4682 if (TypeI == TypeJ) {
4692 if (HasComplementaryMask) {
4693 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4703template <
typename InstType>
4721 for (
auto &Group :
Groups) {
4741 return R->isSingleScalar() == IsSingleScalar;
4743 "all members in group must agree on IsSingleScalar");
4748 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4749 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4751 UnpredicatedLoad->insertBefore(EarliestLoad);
4755 Load->replaceAllUsesWith(UnpredicatedLoad);
4756 Load->eraseFromParent();
4766 if (!StoreLoc || !StoreLoc->AATags.Scope)
4772 StoresToSink.
end());
4776 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4790 for (
auto &Group :
Groups) {
4803 VPValue *SelectedValue = Group[0]->getOperand(0);
4806 bool IsSingleScalar = Group[0]->isSingleScalar();
4807 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4808 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4809 "all members in group must agree on IsSingleScalar");
4810 VPValue *Mask = Group[
I]->getMask();
4812 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4821 StoreWithMinAlign->getUnderlyingInstr(),
4822 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4823 nullptr, *LastStore, CommonMetadata);
4824 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4828 Store->eraseFromParent();
4835 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4836 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4901 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4903 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4910 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4920 DefR->replaceUsesWithIf(
4921 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4923 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4937 for (
VPValue *Def : R.definedValues()) {
4950 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4952 return U->usesScalars(Def) &&
4955 if (
none_of(Def->users(), IsCandidateUnpackUser))
4962 Unpack->insertAfter(&R);
4963 Def->replaceUsesWithIf(Unpack,
4964 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4965 return IsCandidateUnpackUser(&U);
4974 bool RequiresScalarEpilogue,
VPValue *Step,
4975 std::optional<uint64_t> MaxRuntimeStep) {
4986 assert(StepR->getParent() == VectorPHVPBB &&
4987 "Step must be defined in VectorPHVPBB");
4989 InsertPt = std::next(StepR->getIterator());
4991 VPBuilder Builder(VectorPHVPBB, InsertPt);
4997 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5009 if (TailByMasking) {
5010 TC = Builder.createAdd(
5021 Builder.createNaryOp(Instruction::URem, {TC, Step},
5030 if (RequiresScalarEpilogue) {
5032 "requiring scalar epilogue is not supported with fail folding");
5035 R = Builder.createSelect(IsZero, Step, R);
5049 "VF and VFxUF must be materialized together");
5061 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5068 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5072 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5076 VPValue *MulByUF = Builder.createOverflowingOp(
5088 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5096 const SCEV *Expr = ExpSCEV->getSCEV();
5099 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5104 ExpSCEV->eraseFromParent();
5107 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5108 "before any VPIRInstructions");
5111 auto EI = Entry->begin();
5121 return ExpandedSCEVs;
5133 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5137 return Member0Op == OpV;
5141 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5144 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5161 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5164 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5169 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5170 const auto &[
OpIdx, OpV] =
P;
5185 if (!InterleaveR || InterleaveR->
getMask())
5186 return std::nullopt;
5188 Type *GroupElementTy =
nullptr;
5192 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5193 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5195 return std::nullopt;
5200 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5201 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5203 return std::nullopt;
5207 if (IG->getFactor() != IG->getNumMembers())
5208 return std::nullopt;
5214 assert(
Size.isScalable() == VF.isScalable() &&
5215 "if Size is scalable, VF must be scalable and vice versa");
5216 return Size.getKnownMinValue();
5220 unsigned MinVal = VF.getKnownMinValue();
5222 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5225 return std::nullopt;
5233 return RepR && RepR->isSingleScalar();
5240 auto *R = V->getDefiningRecipe();
5249 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5250 WideMember0->setOperand(
5259 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5261 LoadGroup->getMask(),
true,
5262 {}, LoadGroup->getDebugLoc());
5263 L->insertBefore(LoadGroup);
5269 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5270 "must be a single scalar load");
5271 NarrowedOps.
insert(RepR);
5276 VPValue *PtrOp = WideLoad->getAddr();
5278 PtrOp = VecPtr->getOperand(0);
5283 nullptr, {}, *WideLoad);
5284 N->insertBefore(WideLoad);
5289std::unique_ptr<VPlan>
5309 "unexpected branch-on-count");
5313 std::optional<ElementCount> VFToOptimize;
5327 if (R.mayWriteToMemory() && !InterleaveR)
5342 std::optional<ElementCount> NarrowedVF =
5344 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5346 VFToOptimize = NarrowedVF;
5349 if (InterleaveR->getStoredValues().empty())
5354 auto *Member0 = InterleaveR->getStoredValues()[0];
5364 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5367 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5368 return IR && IR->getInterleaveGroup()->isFull() &&
5369 IR->getVPValue(Op.index()) == Op.value();
5378 VFToOptimize->isScalable()))
5383 if (StoreGroups.
empty())
5387 bool RequiresScalarEpilogue =
5398 std::unique_ptr<VPlan> NewPlan;
5400 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5401 Plan.
setVF(*VFToOptimize);
5402 NewPlan->removeVF(*VFToOptimize);
5408 for (
auto *StoreGroup : StoreGroups) {
5415 StoreGroup->getDebugLoc());
5416 S->insertBefore(StoreGroup);
5417 StoreGroup->eraseFromParent();
5429 if (VFToOptimize->isScalable()) {
5441 RequiresScalarEpilogue, Step);
5449 "All VPVectorPointerRecipes should have been removed");
5465 "must have a BranchOnCond");
5468 if (VF.
isScalable() && VScaleForTuning.has_value())
5469 VectorStep *= *VScaleForTuning;
5470 assert(VectorStep > 0 &&
"trip count should not be zero");
5474 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5481 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5494 "Cannot handle loops with uncountable early exits");
5501 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5508 if (
any_of(RecurSplice->users(),
5509 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
5583 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5590 {},
"vector.recur.extract.for.phi");
5593 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
5607 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5608 VPValue *InvariantCandidate = BinOp->getOperand(1);
5610 std::swap(WidenIVCandidate, InvariantCandidate);
5624 auto *ClonedOp = BinOp->
clone();
5625 if (ClonedOp->getOperand(0) == WidenIV) {
5626 ClonedOp->setOperand(0, ScalarIV);
5628 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5629 ClonedOp->setOperand(1, ScalarIV);
5644 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5645 bool UseMax) -> std::optional<APSInt> {
5647 for (
bool Signed : {
true,
false}) {
5656 return std::nullopt;
5664 PhiR->getRecurrenceKind()))
5673 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5687 !
match(FindLastSelect,
5696 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5702 "IVOfExpressionToSink not being an AddRec must imply "
5703 "FindLastExpression not being an AddRec.");
5714 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5715 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5722 if (IVOfExpressionToSink) {
5723 const SCEV *FindLastExpressionSCEV =
5725 if (
match(FindLastExpressionSCEV,
5728 if (
auto NewSentinel =
5729 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5732 SentinelVal = *NewSentinel;
5733 UseSigned = NewSentinel->isSigned();
5735 IVSCEV = FindLastExpressionSCEV;
5736 IVOfExpressionToSink =
nullptr;
5746 if (AR->hasNoSignedWrap())
5748 else if (AR->hasNoUnsignedWrap())
5758 VPValue *NewFindLastSelect = BackedgeVal;
5760 if (!SentinelVal || IVOfExpressionToSink) {
5763 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5764 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5765 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5766 SelectCond = LoopBuilder.
createNot(SelectCond);
5773 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5776 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5785 VPIRFlags Flags(MinMaxKind,
false,
false,
5791 NewFindLastSelect, Flags, ExitDL);
5794 VPValue *VectorRegionExitingVal = ReducedIV;
5795 if (IVOfExpressionToSink)
5796 VectorRegionExitingVal =
5798 ReducedIV, IVOfExpressionToSink);
5801 VPValue *StartVPV = PhiR->getStartValue();
5808 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5818 AnyOfPhi->insertAfter(PhiR);
5825 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
5838 PhiR->hasUsesOutsideReductionChain());
5839 NewPhiR->insertBefore(PhiR);
5840 PhiR->replaceAllUsesWith(NewPhiR);
5841 PhiR->eraseFromParent();
5848struct ReductionExtend {
5849 Type *SrcType =
nullptr;
5850 ExtendKind Kind = ExtendKind::PR_None;
5856struct ExtendedReductionOperand {
5860 ReductionExtend ExtendA, ExtendB;
5868struct VPPartialReductionChain {
5871 VPWidenRecipe *ReductionBinOp =
nullptr;
5873 ExtendedReductionOperand ExtendedOp;
5880 unsigned AccumulatorOpIdx;
5881 unsigned ScaleFactor;
5894 if (!
Op->hasOneUse() ||
5900 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5901 Op->getOperand(1), NarrowTy);
5903 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5912 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
5914 assert(Ext->getOpcode() ==
5916 "Expected both the LHS and RHS extends to be the same");
5917 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
5920 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
5921 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
5922 auto *
Max = Builder.insert(
5924 {FreezeX, FreezeY}, SrcTy));
5925 auto *Min = Builder.insert(
5927 {FreezeX, FreezeY}, SrcTy));
5930 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
5943 if (!
Mul->hasOneUse() ||
5944 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
5945 MulLHS->getOpcode() != MulRHS->getOpcode())
5948 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
5949 MulLHS->getOperand(0),
5950 Ext->getResultType()));
5951 Mul->setOperand(1, MulLHS == MulRHS
5952 ?
Mul->getOperand(0)
5953 : Builder.createWidenCast(MulRHS->getOpcode(),
5954 MulRHS->getOperand(0),
5955 Ext->getResultType()));
5964 VPValue *VecOp = Red->getVecOp();
5998static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6006 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6022 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6030 Builder.insert(NegRecipe);
6031 ExtendedOp = NegRecipe;
6035 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp, TypeInfo);
6045 assert((!ExitValue || IsLastInChain) &&
6046 "if we found ExitValue, it must match RdxPhi's backedge value");
6057 PartialRed->insertBefore(WidenRecipe);
6065 E->insertBefore(WidenRecipe);
6066 PartialRed->replaceAllUsesWith(
E);
6079 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6080 StartInst->setOperand(2, NewScaleFactor);
6088 VPValue *OldStartValue = StartInst->getOperand(0);
6089 StartInst->setOperand(0, StartInst->getOperand(1));
6093 assert(RdxResult &&
"Could not find reduction result");
6096 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6102 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6108 const VPPartialReductionChain &Link,
6111 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6112 std::optional<unsigned> BinOpc = std::nullopt;
6114 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6115 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6117 std::optional<llvm::FastMathFlags>
Flags;
6122 ? (unsigned)Instruction::Add
6123 : Link.ReductionBinOp->getOpcode();
6125 Opcode, ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType, RdxType,
6126 VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6149static std::optional<ExtendedReductionOperand>
6153 "Op should be operand of UpdateR");
6161 if (
Op->hasOneUse() &&
6171 if (LHSInputType != RHSInputType ||
6172 LHSExt->getOpcode() != RHSExt->getOpcode())
6173 return std::nullopt;
6176 return ExtendedReductionOperand{
6178 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6182 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6185 VPValue *CastSource = CastRecipe->getOperand(0);
6186 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6196 if (UpdateR->
getOpcode() == Instruction::Sub)
6197 return std::nullopt;
6198 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6199 UpdateR->
getOpcode() == Instruction::FAdd) {
6203 return ExtendedReductionOperand{
6210 if (!
Op->hasOneUse())
6211 return std::nullopt;
6216 return std::nullopt;
6226 return std::nullopt;
6230 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6233 const APInt *RHSConst =
nullptr;
6239 return std::nullopt;
6243 if (Cast && OuterExtKind &&
6244 getPartialReductionExtendKind(Cast) != OuterExtKind)
6245 return std::nullopt;
6247 Type *RHSInputType = LHSInputType;
6248 ExtendKind RHSExtendKind = LHSExtendKind;
6251 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6254 return ExtendedReductionOperand{
6255 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6262static std::optional<SmallVector<VPPartialReductionChain>>
6270 return std::nullopt;
6281 VPValue *CurrentValue = ExitValue;
6282 while (CurrentValue != RedPhiR) {
6285 return std::nullopt;
6292 std::optional<ExtendedReductionOperand> ExtendedOp =
6293 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6295 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6297 return std::nullopt;
6301 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6304 return std::nullopt;
6309 VPPartialReductionChain Link(
6310 {UpdateR, *ExtendedOp, RK,
6314 CurrentValue = PrevValue;
6319 std::reverse(Chain.
begin(), Chain.
end());
6338 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6339 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6342 if (ChainsByPhi.
empty())
6349 for (
const auto &[
_, Chains] : ChainsByPhi)
6350 for (
const VPPartialReductionChain &Chain : Chains) {
6351 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6352 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6358 auto ExtendUsersValid = [&](
VPValue *Ext) {
6360 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6364 auto IsProfitablePartialReductionChainForVF =
6371 for (
const VPPartialReductionChain &Link : Chain) {
6372 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6373 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6377 PartialCost += LinkCost;
6378 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6380 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6381 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6384 RegularCost += Extend->computeCost(VF, CostCtx);
6386 return PartialCost.
isValid() && PartialCost < RegularCost;
6394 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6395 for (
const VPPartialReductionChain &Chain : Chains) {
6396 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6400 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6402 return PhiR == RedPhiR;
6404 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6410 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6419 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6420 return RepR && RepR->getOpcode() == Instruction::Store;
6431 return IsProfitablePartialReductionChainForVF(Chains, VF);
6437 for (
auto &[Phi, Chains] : ChainsByPhi)
6438 for (
const VPPartialReductionChain &Chain : Chains)
6439 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
6453 if (VPI && VPI->getUnderlyingValue() &&
6465 New->insertBefore(VPI);
6466 if (VPI->getOpcode() == Instruction::Load)
6467 VPI->replaceAllUsesWith(New->getVPSingleValue());
6468 VPI->eraseFromParent();
6473 FinalRedStoresBuilder))
6482 ReplaceWith(Histogram);
6490 ReplaceWith(Recipe);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
auto members() const
Return an iterator range over the non-null members of this group, in index order.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPValues defined by a VPRegionBlock, like the canonical IV.
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...