57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
84 Ingredient.getDebugLoc());
87 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
88 nullptr ,
false ,
false , *VPI,
89 Ingredient.getDebugLoc());
92 Ingredient.getDebugLoc());
100 *VPI, CI->getDebugLoc());
103 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
107 *VPI, Ingredient.getDebugLoc());
111 "inductions must be created earlier");
120 "Only recpies with zero or one defined values expected");
121 Ingredient.eraseFromParent();
138 if (
A->getOpcode() != Instruction::Store ||
139 B->getOpcode() != Instruction::Store)
149 const APInt *Distance;
155 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
157 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
163 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
165 auto VFs =
B->getParent()->getPlan()->vectorFactors();
169 return Distance->
abs().
uge(
177 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
178 L(L), TypeInfo(TypeInfo) {}
185 return ExcludeRecipes.contains(&R) ||
186 (Store && isNoAliasViaDistance(Store, &GroupLeader));
199 std::optional<SinkStoreInfo> SinkInfo = {}) {
200 bool CheckReads = SinkInfo.has_value();
207 "Expected at most one successor in block chain");
210 if (SinkInfo && SinkInfo->shouldSkip(R))
214 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
234template <
unsigned Opcode>
239 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
240 "Only Load and Store opcodes supported");
241 constexpr bool IsLoad = (Opcode == Instruction::Load);
249 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
253 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
256 RecipesByAddress[AddrSCEV].push_back(RepR);
261 for (
auto &Group :
Groups) {
280 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
285 return RepR && RepR->getOpcode() == Instruction::Alloca;
294 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
310 if (!ScalarVFOnly && RepR->isSingleScalar())
313 WorkList.
insert({SinkTo, Candidate});
325 for (
auto &Recipe : *VPBB)
327 InsertIfValidSinkCandidate(VPBB,
Op);
331 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
334 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
339 auto UsersOutsideSinkTo =
341 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
343 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
344 return !U->usesFirstLaneOnly(SinkCandidate);
347 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
349 if (NeedsDuplicating) {
353 if (
auto *SinkCandidateRepR =
359 nullptr , *SinkCandidateRepR,
363 Clone = SinkCandidate->
clone();
373 InsertIfValidSinkCandidate(SinkTo,
Op);
383 if (!EntryBB || EntryBB->size() != 1 ||
393 if (EntryBB->getNumSuccessors() != 2)
398 if (!Succ0 || !Succ1)
401 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
403 if (Succ0->getSingleSuccessor() == Succ1)
405 if (Succ1->getSingleSuccessor() == Succ0)
422 if (!Region1->isReplicator())
424 auto *MiddleBasicBlock =
426 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
431 if (!Region2 || !Region2->isReplicator())
436 if (!Mask1 || Mask1 != Mask2)
439 assert(Mask1 && Mask2 &&
"both region must have conditions");
445 if (TransformedRegions.
contains(Region1))
452 if (!Then1 || !Then2)
472 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
478 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
479 Phi1ToMove.eraseFromParent();
482 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
496 TransformedRegions.
insert(Region1);
499 return !TransformedRegions.
empty();
506 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
507 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
508 auto *BlockInMask = PredRecipe->
getMask();
527 RecipeWithoutMask->getDebugLoc());
551 if (RepR->isPredicated())
570 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
584 if (!VPBB->getParent())
588 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
597 R.moveBefore(*PredVPBB, PredVPBB->
end());
599 auto *ParentRegion = VPBB->getParent();
600 if (ParentRegion && ParentRegion->getExiting() == VPBB)
601 ParentRegion->setExiting(PredVPBB);
602 for (
auto *Succ :
to_vector(VPBB->successors())) {
608 return !WorkList.
empty();
615 bool ShouldSimplify =
true;
616 while (ShouldSimplify) {
632 if (!
IV ||
IV->getTruncInst())
647 for (
auto *U : FindMyCast->
users()) {
649 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
650 FoundUserCast = UserCast;
654 FindMyCast = FoundUserCast;
679 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
693 WidenOriginalIV->dropPoisonGeneratingFlags();
706 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
708 if (IsConditionalAssume)
711 if (R.mayHaveSideEffects())
715 return all_of(R.definedValues(),
716 [](
VPValue *V) { return V->getNumUsers() == 0; });
732 if (!PhiR || PhiR->getNumOperands() != 2)
734 VPUser *PhiUser = PhiR->getSingleUser();
738 if (PhiUser !=
Incoming->getDefiningRecipe() ||
741 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
742 PhiR->eraseFromParent();
743 Incoming->getDefiningRecipe()->eraseFromParent();
758 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
768 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
774 if (ResultTy != StepTy) {
781 Builder.setInsertPoint(VecPreheader);
782 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
784 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
790 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
795 Users.insert_range(V->users());
797 return Users.takeVector();
811 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
848 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
849 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
857 Def->operands(),
true,
859 Clone->insertAfter(Def);
860 Def->replaceAllUsesWith(Clone);
871 PtrIV->replaceAllUsesWith(PtrAdd);
878 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
879 return U->usesScalars(WideIV);
885 Plan,
ID.getKind(),
ID.getInductionOpcode(),
887 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
888 WideIV->getDebugLoc(), Builder);
891 if (!HasOnlyVectorVFs) {
893 "plans containing a scalar VF cannot also include scalable VFs");
894 WideIV->replaceAllUsesWith(Steps);
897 WideIV->replaceUsesWithIf(Steps,
898 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
900 return U.usesFirstLaneOnly(WideIV);
901 return U.usesScalars(WideIV);
917 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
922 if (!Def || Def->getNumOperands() != 2)
930 auto IsWideIVInc = [&]() {
931 auto &
ID = WideIV->getInductionDescriptor();
934 VPValue *IVStep = WideIV->getStepValue();
935 switch (
ID.getInductionOpcode()) {
936 case Instruction::Add:
938 case Instruction::FAdd:
940 case Instruction::FSub:
943 case Instruction::Sub: {
963 return IsWideIVInc() ? WideIV :
nullptr;
983 if (WideIntOrFp && WideIntOrFp->getTruncInst())
996 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
997 FirstActiveLaneType,
DL);
998 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1005 EndValue =
B.createAdd(EndValue, One,
DL);
1008 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1010 VPIRValue *Start = WideIV->getStartValue();
1011 VPValue *Step = WideIV->getStepValue();
1012 EndValue =
B.createDerivedIV(
1014 Start, EndValue, Step);
1029 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1036 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1039 Start, VectorTC, Step);
1068 assert(EndValue &&
"Must have computed the end value up front");
1084 auto *Zero = Plan.
getZero(StepTy);
1085 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1090 return B.createNaryOp(
1091 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1093 : Instruction::FAdd,
1094 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1106 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1115 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1116 EndValues[WideIV] = EndValue;
1126 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1127 R.eraseFromParent();
1136 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1138 if (PredVPBB == MiddleVPBB)
1140 ExitIRI->getOperand(Idx),
1144 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1146 ExitIRI->setOperand(Idx, Escape);
1163 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1166 ExpR->replaceAllUsesWith(V->second);
1167 ExpR->eraseFromParent();
1176 while (!WorkList.
empty()) {
1178 if (!Seen.
insert(Cur).second)
1186 R->eraseFromParent();
1193static std::optional<std::pair<bool, unsigned>>
1196 std::optional<std::pair<bool, unsigned>>>(R)
1199 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1201 return std::make_pair(
true,
I->getVectorIntrinsicID());
1203 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1207 return std::make_pair(
false,
1210 .
Default([](
auto *) {
return std::nullopt; });
1228 Value *V =
Op->getUnderlyingValue();
1234 auto FoldToIRValue = [&]() ->
Value * {
1236 if (OpcodeOrIID->first) {
1237 if (R.getNumOperands() != 2)
1239 unsigned ID = OpcodeOrIID->second;
1240 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1243 unsigned Opcode = OpcodeOrIID->second;
1252 return Folder.FoldSelect(
Ops[0],
Ops[1],
1255 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1257 case Instruction::Select:
1258 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1259 case Instruction::ICmp:
1260 case Instruction::FCmp:
1263 case Instruction::GetElementPtr: {
1266 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1276 case Instruction::ExtractElement:
1283 if (
Value *V = FoldToIRValue())
1284 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1290 VPlan *Plan = Def->getParent()->getPlan();
1296 return Def->replaceAllUsesWith(V);
1302 PredPHI->replaceAllUsesWith(
Op);
1315 bool CanCreateNewRecipe =
1322 if (TruncTy == ATy) {
1323 Def->replaceAllUsesWith(
A);
1332 : Instruction::ZExt;
1335 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1337 Ext->setUnderlyingValue(UnderlyingExt);
1339 Def->replaceAllUsesWith(Ext);
1341 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1342 Def->replaceAllUsesWith(Trunc);
1350 for (
VPUser *U :
A->users()) {
1352 for (
VPValue *VPV : R->definedValues())
1366 Def->replaceAllUsesWith(
X);
1367 Def->eraseFromParent();
1373 return Def->replaceAllUsesWith(
1378 return Def->replaceAllUsesWith(
X);
1382 return Def->replaceAllUsesWith(
1387 return Def->replaceAllUsesWith(
1392 return Def->replaceAllUsesWith(
X);
1396 return Def->replaceAllUsesWith(Plan->
getFalse());
1400 return Def->replaceAllUsesWith(
X);
1403 if (CanCreateNewRecipe &&
1408 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1409 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1410 return Def->replaceAllUsesWith(
1411 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1416 return Def->replaceAllUsesWith(Def->getOperand(1));
1421 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1425 return Def->replaceAllUsesWith(Plan->
getFalse());
1428 return Def->replaceAllUsesWith(
X);
1432 if (CanCreateNewRecipe &&
1434 return Def->replaceAllUsesWith(Builder.createNot(
C));
1438 Def->setOperand(0,
C);
1439 Def->setOperand(1,
Y);
1440 Def->setOperand(2,
X);
1445 return Def->replaceAllUsesWith(
A);
1448 return Def->replaceAllUsesWith(
A);
1451 return Def->replaceAllUsesWith(
1458 return Def->replaceAllUsesWith(
1460 Def->getDebugLoc(),
"", NW));
1466 return Def->replaceAllUsesWith(Builder.createNaryOp(
1468 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1473 const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
1474 bool IsInReplicateRegion = ParentRegion && ParentRegion->
isReplicator();
1475 if (CanCreateNewRecipe && !IsInReplicateRegion &&
1477 return Def->replaceAllUsesWith(Builder.createNaryOp(
1479 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1484 return Def->replaceAllUsesWith(
A);
1499 R->setOperand(1,
Y);
1500 R->setOperand(2,
X);
1504 R->replaceAllUsesWith(Cmp);
1509 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1510 Cmp->setDebugLoc(Def->getDebugLoc());
1522 if (
Op->getNumUsers() > 1 ||
1526 }
else if (!UnpairedCmp) {
1527 UnpairedCmp =
Op->getDefiningRecipe();
1531 UnpairedCmp =
nullptr;
1538 if (NewOps.
size() < Def->getNumOperands()) {
1540 return Def->replaceAllUsesWith(NewAnyOf);
1547 if (CanCreateNewRecipe &&
1553 return Def->replaceAllUsesWith(NewCmp);
1561 return Def->replaceAllUsesWith(Def->getOperand(1));
1567 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1568 Def->replaceAllUsesWith(
X);
1578 Def->setOperand(1, Def->getOperand(0));
1579 Def->setOperand(0,
Y);
1584 if (Phi->getOperand(0) == Phi->getOperand(1))
1585 Phi->replaceAllUsesWith(Phi->getOperand(0));
1592 return Def->replaceAllUsesWith(Def->getOperand(0));
1598 Def->replaceAllUsesWith(
1599 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1603 return Def->replaceAllUsesWith(
A);
1609 Def->replaceAllUsesWith(
1610 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1617 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1622 Def->replaceAllUsesWith(
1632 "broadcast operand must be single-scalar");
1633 Def->setOperand(0,
C);
1638 if (Def->getNumOperands() == 1)
1639 Def->replaceAllUsesWith(Def->getOperand(0));
1644 if (Def->getNumOperands() == 1 &&
1646 return Def->replaceAllUsesWith(IRV);
1659 return Def->replaceAllUsesWith(
A);
1662 Def->replaceAllUsesWith(Builder.createNaryOp(
1663 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1671 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1672 Phi->getSingleUser() == Def) {
1673 Phi->setOperand(0,
Y);
1674 Def->replaceAllUsesWith(Phi);
1689 Steps->replaceAllUsesWith(Steps->getOperand(0));
1697 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1699 return PhiR && PhiR->isInLoop();
1705 Def->replaceAllUsesWith(
A);
1714 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1715 return Def->replaceAllUsesWith(
A);
1719 return Def->replaceAllUsesWith(
A);
1746 while (!Worklist.
empty()) {
1755 R->replaceAllUsesWith(
1756 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1775 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1784 !WidenStoreR->isConsecutive()) {
1785 assert(!WidenStoreR->isReverse() &&
1786 "Not consecutive memory recipes shouldn't be reversed");
1787 VPValue *Mask = WidenStoreR->getMask();
1796 {WidenStoreR->getOperand(1)});
1801 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1802 true ,
nullptr , {},
1804 ScalarStore->insertBefore(WidenStoreR);
1805 WidenStoreR->eraseFromParent();
1813 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1814 true ,
nullptr , *RepR ,
1815 *RepR , RepR->getDebugLoc());
1816 Clone->insertBefore(RepOrWidenR);
1818 VPValue *ExtractOp = Clone->getOperand(0);
1824 Clone->setOperand(0, ExtractOp);
1825 RepR->eraseFromParent();
1834 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1843 return !U->usesScalars(
Op);
1847 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1850 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1851 IntroducesBCastOf(Op)))
1855 auto *IRV = dyn_cast<VPIRValue>(Op);
1856 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1857 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1858 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1863 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1864 true ,
nullptr, *RepOrWidenR);
1865 Clone->insertBefore(RepOrWidenR);
1866 RepOrWidenR->replaceAllUsesWith(Clone);
1868 RepOrWidenR->eraseFromParent();
1904 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1905 UniqueValues.
insert(Blend->getIncomingValue(0));
1906 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1908 UniqueValues.
insert(Blend->getIncomingValue(
I));
1910 if (UniqueValues.
size() == 1) {
1911 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1912 Blend->eraseFromParent();
1916 if (Blend->isNormalized())
1922 unsigned StartIndex = 0;
1923 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1928 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1935 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1937 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1938 if (
I == StartIndex)
1940 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1941 OperandsWithMask.
push_back(Blend->getMask(
I));
1946 OperandsWithMask, *Blend, Blend->getDebugLoc());
1947 NewBlend->insertBefore(&R);
1949 VPValue *DeadMask = Blend->getMask(StartIndex);
1951 Blend->eraseFromParent();
1956 if (NewBlend->getNumOperands() == 3 &&
1958 VPValue *Inc0 = NewBlend->getOperand(0);
1959 VPValue *Inc1 = NewBlend->getOperand(1);
1960 VPValue *OldMask = NewBlend->getOperand(2);
1961 NewBlend->setOperand(0, Inc1);
1962 NewBlend->setOperand(1, Inc0);
1963 NewBlend->setOperand(2, NewMask);
1990 APInt MaxVal = AlignedTC - 1;
1993 unsigned NewBitWidth =
1999 bool MadeChange =
false;
2008 if (!WideIV || !WideIV->isCanonical() ||
2009 WideIV->hasMoreThanOneUniqueUser() ||
2010 NewIVTy == WideIV->getScalarType())
2015 VPUser *SingleUser = WideIV->getSingleUser();
2023 auto *NewStart = Plan.
getZero(NewIVTy);
2024 WideIV->setStartValue(NewStart);
2026 WideIV->setStepValue(NewStep);
2033 Cmp->setOperand(1, NewBTC);
2047 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2049 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2062 const SCEV *VectorTripCount =
2067 "Trip count SCEV must be computable");
2088 auto *Term = &ExitingVPBB->
back();
2101 for (
unsigned Part = 0; Part < UF; ++Part) {
2107 Extracts[Part] = Ext;
2119 match(Phi->getBackedgeValue(),
2121 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2138 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2145 "Expected incoming values of Phi to be ActiveLaneMasks");
2150 EntryALM->setOperand(2, ALMMultiplier);
2151 LoopALM->setOperand(2, ALMMultiplier);
2155 ExtractFromALM(EntryALM, EntryExtracts);
2160 ExtractFromALM(LoopALM, LoopExtracts);
2162 Not->setOperand(0, LoopExtracts[0]);
2165 for (
unsigned Part = 0; Part < UF; ++Part) {
2166 Phis[Part]->setStartValue(EntryExtracts[Part]);
2167 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2180 auto *Term = &ExitingVPBB->
back();
2189 const SCEV *VectorTripCount =
2195 "Trip count SCEV must be computable");
2220 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2221 return R->isCanonical();
2222 return isa<VPCanonicalIVPHIRecipe, VPCurrentIterationPHIRecipe,
2223 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2229 R->getScalarType());
2231 HeaderR.eraseFromParent();
2235 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2236 HeaderR.eraseFromParent();
2246 B->setParent(
nullptr);
2255 if (Exits.
size() != 1) {
2257 "BranchOnTwoConds needs 2 remaining exits");
2259 Term->getOperand(0));
2268 Term->setOperand(1, Plan.
getTrue());
2273 {}, {}, Term->getDebugLoc());
2277 Term->eraseFromParent();
2312 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2322 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2323 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2352 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2355 if (SinkCandidate == Previous)
2359 !Seen.
insert(SinkCandidate).second ||
2372 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2375 "only recipes with a single defined value expected");
2390 if (SinkCandidate == FOR)
2393 SinkCandidate->moveAfter(Previous);
2394 Previous = SinkCandidate;
2418 [&VPDT, HoistPoint](
VPUser *U) {
2419 auto *R = cast<VPRecipeBase>(U);
2420 return HoistPoint == R ||
2421 VPDT.properlyDominates(HoistPoint, R);
2423 "HoistPoint must dominate all users of FOR");
2425 auto NeedsHoisting = [HoistPoint, &VPDT,
2427 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2428 if (!HoistCandidate)
2433 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2434 "CFG in VPlan should still be flat, without replicate regions");
2436 if (!Visited.
insert(HoistCandidate).second)
2448 return HoistCandidate;
2457 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2460 "only recipes with a single defined value expected");
2472 if (
auto *R = NeedsHoisting(
Op)) {
2475 if (R->getNumDefinedValues() != 1)
2489 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2509 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2512 while (
auto *PrevPhi =
2514 assert(PrevPhi->getParent() == FOR->getParent());
2516 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2535 {FOR, FOR->getBackedgeValue()});
2540 RecurSplice->setOperand(0, FOR);
2546 for (
VPUser *U : RecurSplice->users()) {
2556 VPValue *PenultimateIndex =
B.createSub(LastActiveLane, One);
2557 VPValue *PenultimateLastIter =
2559 {PenultimateIndex, FOR->getBackedgeValue()});
2564 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2577 RecurKind RK = PhiR->getRecurrenceKind();
2584 RecWithFlags->dropPoisonGeneratingFlags();
2590struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2592 return Def == getEmptyKey() || Def == getTombstoneKey();
2603 return GEP->getSourceElementType();
2606 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2607 [](
auto *
I) {
return I->getSourceElementType(); })
2608 .
Default([](
auto *) {
return nullptr; });
2612 static bool canHandle(
const VPSingleDefRecipe *Def) {
2621 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2622 C->second == Instruction::ExtractValue)))
2628 return !
Def->mayReadFromMemory();
2632 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2633 const VPlan *Plan =
Def->getParent()->getPlan();
2634 VPTypeAnalysis TypeInfo(*Plan);
2637 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2640 if (RFlags->hasPredicate())
2646 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2649 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2651 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2653 !
equal(
L->operands(),
R->operands()))
2656 "must have valid opcode info for both recipes");
2658 if (LFlags->hasPredicate() &&
2659 LFlags->getPredicate() !=
2665 const VPRegionBlock *RegionL =
L->getRegion();
2666 const VPRegionBlock *RegionR =
R->getRegion();
2669 L->getParent() !=
R->getParent())
2671 const VPlan *Plan =
L->getParent()->getPlan();
2672 VPTypeAnalysis TypeInfo(*Plan);
2673 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2688 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2692 if (!VPDT.
dominates(V->getParent(), VPBB))
2697 Def->replaceAllUsesWith(V);
2716 "Expected vector prehader's successor to be the vector loop region");
2723 return !Op->isDefinedOutsideLoopRegions();
2726 R.moveBefore(*Preheader, Preheader->
end());
2753 if (Def->getNumUsers() == 0)
2762 auto *UserR = cast<VPRecipeBase>(U);
2763 VPBasicBlock *Parent = UserR->getParent();
2766 if (UserR->isPhi() || Parent->getEnclosingLoopRegion())
2769 if (SinkBB && SinkBB != Parent)
2783 "Defining block must dominate sink block");
2809 VPValue *ResultVPV = R.getVPSingleValue();
2811 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2812 if (!NewResSizeInBits)
2825 (void)OldResSizeInBits;
2833 VPW->dropPoisonGeneratingFlags();
2835 if (OldResSizeInBits != NewResSizeInBits &&
2839 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2841 Ext->insertAfter(&R);
2843 Ext->setOperand(0, ResultVPV);
2844 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2847 "Only ICmps should not need extending the result.");
2857 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2858 auto *
Op = R.getOperand(Idx);
2859 unsigned OpSizeInBits =
2861 if (OpSizeInBits == NewResSizeInBits)
2863 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2864 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2866 R.setOperand(Idx, ProcessedIter->second);
2874 Builder.setInsertPoint(&R);
2876 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2877 ProcessedIter->second = NewOp;
2878 R.setOperand(Idx, NewOp);
2893 assert(VPBB->getNumSuccessors() == 2 &&
2894 "Two successors expected for BranchOnCond");
2895 unsigned RemovedIdx;
2906 "There must be a single edge between VPBB and its successor");
2915 VPBB->back().eraseFromParent();
2969 VPValue *StartV = CanonicalIVPHI->getStartValue();
2971 auto *CanonicalIVIncrement =
2974 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2975 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2986 auto *EntryIncrement = Builder.createOverflowingOp(
2988 DL,
"index.part.next");
2994 {EntryIncrement, TC, ALMMultiplier},
DL,
2995 "active.lane.mask.entry");
3001 LaneMaskPhi->insertAfter(CanonicalIVPHI);
3006 Builder.setInsertPoint(OriginalTerminator);
3007 auto *InLoopIncrement = Builder.createOverflowingOp(
3009 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
3011 {InLoopIncrement, TC, ALMMultiplier},
DL,
3012 "active.lane.mask.next");
3017 auto *NotMask = Builder.createNot(ALM,
DL);
3024 bool UseActiveLaneMaskForControlFlow) {
3026 auto *FoundWidenCanonicalIVUser =
find_if(
3028 assert(FoundWidenCanonicalIVUser &&
3029 "Must have widened canonical IV when tail folding!");
3031 auto *WideCanonicalIV =
3034 if (UseActiveLaneMaskForControlFlow) {
3043 nullptr,
"active.lane.mask");
3059 template <
typename OpTy>
bool match(OpTy *V)
const {
3070template <
typename Op0_t,
typename Op1_t>
3089 VPValue *Addr, *Mask, *EndPtr;
3092 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3094 EVLEndPtr->insertBefore(&CurRecipe);
3095 EVLEndPtr->setOperand(1, &EVL);
3099 if (
match(&CurRecipe,
3113 LoadR->insertBefore(&CurRecipe);
3115 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3124 StoredVal, EVL, Mask);
3126 if (
match(&CurRecipe,
3132 Intrinsic::experimental_vp_reverse,
3133 {ReversedVal, Plan->
getTrue(), &EVL},
3137 AdjustEndPtr(EndPtr), NewReverse, EVL,
3142 if (Rdx->isConditional() &&
3147 if (Interleave->getMask() &&
3152 if (
match(&CurRecipe,
3161 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3181 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3186 HeaderMask = R.getVPSingleValue();
3198 NewR->insertBefore(R);
3199 for (
auto [Old, New] :
3200 zip_equal(R->definedValues(), NewR->definedValues()))
3201 Old->replaceAllUsesWith(New);
3215 Merge->insertBefore(LogicalAnd);
3216 LogicalAnd->replaceAllUsesWith(
Merge);
3224 R->eraseFromParent();
3241 "User of VF that we can't transform to EVL.");
3247 [&LoopRegion, &Plan](
VPUser *U) {
3249 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3250 m_Specific(&Plan.getVFxUF()))) ||
3251 isa<VPWidenPointerInductionRecipe>(U);
3253 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3254 "increment of the canonical induction.");
3270 MaxEVL = Builder.createScalarZExtOrTrunc(
3274 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3275 VPValue *PrevEVL = Builder.createScalarPhi(
3289 Intrinsic::experimental_vp_splice,
3290 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3294 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3311 VPValue *EVLMask = Builder.createICmp(
3372 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3380 VPValue *StartV = CanonicalIVPHI->getStartValue();
3383 auto *CurrentIteration =
3385 CurrentIteration->insertAfter(CanonicalIVPHI);
3386 VPBuilder Builder(Header, Header->getFirstNonPhi());
3389 VPPhi *AVLPhi = Builder.createScalarPhi(
3393 if (MaxSafeElements) {
3403 auto *CanonicalIVIncrement =
3405 Builder.setInsertPoint(CanonicalIVIncrement);
3409 OpVPEVL = Builder.createScalarZExtOrTrunc(
3410 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3412 auto *NextIter = Builder.createAdd(OpVPEVL, CurrentIteration,
3413 CanonicalIVIncrement->getDebugLoc(),
3414 "current.iteration.next",
3415 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3416 CanonicalIVIncrement->hasNoSignedWrap()});
3417 CurrentIteration->addOperand(NextIter);
3421 "avl.next", {
true,
false});
3429 CanonicalIVPHI->replaceAllUsesWith(CurrentIteration);
3430 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3444 assert(!CurrentIteration &&
3445 "Found multiple CurrentIteration. Only one expected");
3446 CurrentIteration = PhiR;
3450 if (!CurrentIteration)
3461 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3467 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3470 "Unexpected canonical iv");
3476 CanonicalIV->eraseFromParent();
3496 if (!
match(EVLPhi->getBackedgeValue(),
3506 [[maybe_unused]]
bool FoundAVLNext =
3509 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3521 "Expected BranchOnCond with ICmp comparing CanIV increment with vector "
3526 LatchBr->setOperand(
3537 return R->getRegion() ||
3541 for (
const SCEV *Stride : StridesMap.
values()) {
3544 const APInt *StrideConst;
3567 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3574 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3577 if (NewSCEV != ScevExpr) {
3579 ExpSCEV->replaceAllUsesWith(NewExp);
3588 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3592 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3597 while (!Worklist.
empty()) {
3600 if (!Visited.
insert(CurRec).second)
3622 RecWithFlags->isDisjoint()) {
3625 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3626 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3627 RecWithFlags->replaceAllUsesWith(New);
3628 RecWithFlags->eraseFromParent();
3631 RecWithFlags->dropPoisonGeneratingFlags();
3636 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3637 "found instruction with poison generating flags not covered by "
3638 "VPRecipeWithIRFlags");
3643 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3655 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3656 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3657 if (AddrDef && WidenRec->isConsecutive() &&
3658 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3659 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3661 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3665 InterleaveRec->getInterleaveGroup();
3666 bool NeedPredication =
false;
3668 I < NumMembers; ++
I) {
3671 NeedPredication |= BlockNeedsPredication(Member->getParent());
3674 if (NeedPredication)
3675 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3687 if (InterleaveGroups.empty())
3694 for (
const auto *IG : InterleaveGroups) {
3700 StoredValues.
push_back(StoreR->getStoredValue());
3701 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3708 StoredValues.
push_back(StoreR->getStoredValue());
3712 bool NeedsMaskForGaps =
3713 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3714 (!StoredValues.
empty() && !IG->isFull());
3726 VPValue *Addr = Start->getAddr();
3735 assert(IG->getIndex(IRInsertPos) != 0 &&
3736 "index of insert position shouldn't be zero");
3740 IG->getIndex(IRInsertPos),
3744 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3750 if (IG->isReverse()) {
3753 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3754 ReversePtr->insertBefore(InsertPos);
3758 InsertPos->getMask(), NeedsMaskForGaps,
3759 InterleaveMD, InsertPos->getDebugLoc());
3760 VPIG->insertBefore(InsertPos);
3763 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3766 if (!Member->getType()->isVoidTy()) {
3825 AddOp = Instruction::Add;
3826 MulOp = Instruction::Mul;
3828 AddOp =
ID.getInductionOpcode();
3829 MulOp = Instruction::FMul;
3837 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3838 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3840 Flags.dropPoisonGeneratingFlags();
3849 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3854 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3855 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3861 WidePHI->insertBefore(WidenIVR);
3872 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3876 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3879 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3882 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3889 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3892 WidePHI->addOperand(
Next);
3920 VPlan *Plan = R->getParent()->getPlan();
3921 VPValue *Start = R->getStartValue();
3922 VPValue *Step = R->getStepValue();
3923 VPValue *VF = R->getVFValue();
3925 assert(R->getInductionDescriptor().getKind() ==
3927 "Not a pointer induction according to InductionDescriptor!");
3930 "Recipe should have been replaced");
3936 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3940 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3943 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3945 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3946 R->replaceAllUsesWith(PtrAdd);
3951 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3953 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3956 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3965 if (!R->isReplicator())
3969 R->dissolveToCFGLoop();
3990 assert(Br->getNumOperands() == 2 &&
3991 "BranchOnTwoConds must have exactly 2 conditions");
3995 assert(Successors.size() == 3 &&
3996 "BranchOnTwoConds must have exactly 3 successors");
4001 VPValue *Cond0 = Br->getOperand(0);
4002 VPValue *Cond1 = Br->getOperand(1);
4007 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4020 Br->eraseFromParent();
4043 WidenIVR->replaceAllUsesWith(PtrAdd);
4056 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4057 Select = Builder.createSelect(Blend->getMask(
I),
4058 Blend->getIncomingValue(
I),
Select,
4059 R.getDebugLoc(),
"predphi", *Blend);
4060 Blend->replaceAllUsesWith(
Select);
4065 if (!VEPR->getOffset()) {
4067 "Expected unroller to have materialized offset for UF != 1");
4068 VEPR->materializeOffset();
4083 for (
VPValue *
Op : LastActiveL->operands()) {
4084 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4089 VPValue *FirstInactiveLane = Builder.createNaryOp(
4091 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4097 Builder.createSub(FirstInactiveLane, One,
4098 LastActiveL->getDebugLoc(),
"last.active.lane");
4108 assert(VPI->isMasked() &&
4109 "Unmasked MaskedCond should be simplified earlier");
4110 VPI->replaceAllUsesWith(Builder.createNaryOp(
4120 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4123 ToRemove.push_back(BranchOnCountInst);
4138 ? Instruction::UIToFP
4139 : Instruction::Trunc;
4140 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4146 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4152 MulOpc = Instruction::FMul;
4153 Flags = VPI->getFastMathFlags();
4155 MulOpc = Instruction::Mul;
4160 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4162 VPI->replaceAllUsesWith(VectorStep);
4168 R->eraseFromParent();
4176 struct EarlyExitInfo {
4187 if (Pred == MiddleVPBB)
4192 VPValue *CondOfEarlyExitingVPBB;
4193 [[maybe_unused]]
bool Matched =
4194 match(EarlyExitingVPBB->getTerminator(),
4196 assert(Matched &&
"Terminator must be BranchOnCond");
4200 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4201 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4203 TrueSucc == ExitBlock
4204 ? CondOfEarlyExitingVPBB
4205 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4211 "exit condition must dominate the latch");
4220 assert(!Exits.
empty() &&
"must have at least one early exit");
4227 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4229 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4230 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4236 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4237 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4239 Exits[
I].EarlyExitingVPBB) &&
4240 "RPO sort must place dominating exits before dominated ones");
4246 VPValue *Combined = Exits[0].CondToExit;
4247 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4248 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4254 "Early exit store masking not implemented");
4258 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4262 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4270 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4304 for (
auto [Exit, VectorEarlyExitVPBB] :
4305 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4306 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4318 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4319 VPValue *NewIncoming = IncomingVal;
4321 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4326 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4327 ExitIRI->addOperand(NewIncoming);
4330 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4364 bool IsLastDispatch = (
I + 2 == Exits.
size());
4366 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4372 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4375 CurrentBB = FalseBB;
4382 "Unexpected terminator");
4383 auto *IsLatchExitTaken =
4385 LatchExitingBranch->getOperand(1));
4387 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4388 LatchExitingBranch->eraseFromParent();
4389 Builder.setInsertPoint(LatchVPBB);
4391 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4393 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4404 Type *RedTy = Ctx.Types.inferScalarType(Red);
4405 VPValue *VecOp = Red->getVecOp();
4408 auto IsExtendedRedValidAndClampRange =
4420 if (Red->isPartialReduction()) {
4425 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4426 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4429 ? std::optional{Red->getFastMathFlags()}
4433 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4434 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4435 Red->getFastMathFlags(),
CostKind);
4437 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4447 IsExtendedRedValidAndClampRange(
4450 Ctx.Types.inferScalarType(
A)))
4469 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4470 Opcode != Instruction::FAdd)
4473 Type *RedTy = Ctx.Types.inferScalarType(Red);
4476 auto IsMulAccValidAndClampRange =
4483 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4486 if (Red->isPartialReduction()) {
4488 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4491 MulAccCost = Ctx.TTI.getPartialReductionCost(
4492 Opcode, SrcTy, SrcTy2, RedTy, VF,
4501 ? std::optional{Red->getFastMathFlags()}
4507 (Ext0->getOpcode() != Ext1->getOpcode() ||
4508 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4512 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4514 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4522 ExtCost += Ext0->computeCost(VF, Ctx);
4524 ExtCost += Ext1->computeCost(VF, Ctx);
4526 ExtCost += OuterExt->computeCost(VF, Ctx);
4528 return MulAccCost.
isValid() &&
4529 MulAccCost < ExtCost + MulCost + RedCost;
4534 VPValue *VecOp = Red->getVecOp();
4541 assert(Opcode == Instruction::FAdd &&
4542 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4551 if (RecipeA && RecipeB &&
4552 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4574 if (!ExtA || ExtB || !
isa<VPIRValue>(ValB) || Red->isPartialReduction())
4576 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
4590 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4591 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4592 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4593 Mul->setOperand(1, ExtB);
4603 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4608 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4615 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4624 if (!Red->isPartialReduction() &&
4633 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4642 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4643 Ext0->getOpcode() == Ext1->getOpcode() &&
4644 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4646 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4647 *Ext0, *Ext0, Ext0->getDebugLoc());
4648 NewExt0->insertBefore(Ext0);
4653 Ext->getResultType(),
nullptr, *Ext1,
4654 *Ext1, Ext1->getDebugLoc());
4657 Mul->setOperand(0, NewExt0);
4658 Mul->setOperand(1, NewExt1);
4659 Red->setOperand(1,
Mul);
4672 auto IP = std::next(Red->getIterator());
4673 auto *VPBB = Red->getParent();
4683 Red->replaceAllUsesWith(AbstractR);
4713 for (
VPValue *VPV : VPValues) {
4722 if (
User->usesScalars(VPV))
4725 HoistPoint = HoistBlock->
begin();
4729 "All users must be in the vector preheader or dominated by it");
4734 VPV->replaceUsesWithIf(Broadcast,
4735 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4736 return Broadcast != &U && !U.usesScalars(VPV);
4753 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4754 RepR->getOpcode() != Instruction::Load)
4757 VPValue *Addr = RepR->getOperand(0);
4760 if (!
Loc.AATags.Scope)
4765 if (R.mayWriteToMemory()) {
4767 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4775 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4779 const AAMDNodes &LoadAA = LoadLoc.AATags;
4795 return CommonMetadata;
4798template <
unsigned Opcode>
4803 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4804 "Only Load and Store opcodes supported");
4805 constexpr bool IsLoad = (Opcode == Instruction::Load);
4811 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4816 for (
auto Recipes :
Groups) {
4817 if (Recipes.size() < 2)
4825 VPValue *MaskI = RecipeI->getMask();
4826 Type *TypeI = GetLoadStoreValueType(RecipeI);
4832 bool HasComplementaryMask =
false;
4837 VPValue *MaskJ = RecipeJ->getMask();
4838 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4839 if (TypeI == TypeJ) {
4849 if (HasComplementaryMask) {
4850 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4860template <
typename InstType>
4878 for (
auto &Group :
Groups) {
4898 return R->isSingleScalar() == IsSingleScalar;
4900 "all members in group must agree on IsSingleScalar");
4905 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4906 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4908 UnpredicatedLoad->insertBefore(EarliestLoad);
4912 Load->replaceAllUsesWith(UnpredicatedLoad);
4913 Load->eraseFromParent();
4923 if (!StoreLoc || !StoreLoc->AATags.Scope)
4929 StoresToSink.
end());
4933 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4947 for (
auto &Group :
Groups) {
4960 VPValue *SelectedValue = Group[0]->getOperand(0);
4963 bool IsSingleScalar = Group[0]->isSingleScalar();
4964 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4965 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4966 "all members in group must agree on IsSingleScalar");
4967 VPValue *Mask = Group[
I]->getMask();
4969 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4978 StoreWithMinAlign->getUnderlyingInstr(),
4979 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4980 nullptr, *LastStore, CommonMetadata);
4981 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4985 Store->eraseFromParent();
4992 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4993 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5058 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5060 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5067 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5077 DefR->replaceUsesWithIf(
5078 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5080 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5094 for (
VPValue *Def : R.definedValues()) {
5107 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5109 return U->usesScalars(Def) &&
5112 if (
none_of(Def->users(), IsCandidateUnpackUser))
5119 Unpack->insertAfter(&R);
5120 Def->replaceUsesWithIf(Unpack,
5121 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5122 return IsCandidateUnpackUser(&U);
5132 bool RequiresScalarEpilogue,
5144 assert(StepR->getParent() == VectorPHVPBB &&
5145 "Step must be defined in VectorPHVPBB");
5147 InsertPt = std::next(StepR->getIterator());
5149 VPBuilder Builder(VectorPHVPBB, InsertPt);
5157 if (TailByMasking) {
5158 TC = Builder.createAdd(
5169 Builder.createNaryOp(Instruction::URem, {TC, Step},
5178 if (RequiresScalarEpilogue) {
5180 "requiring scalar epilogue is not supported with fail folding");
5183 R = Builder.createSelect(IsZero, Step, R);
5197 "VF and VFxUF must be materialized together");
5209 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5216 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5220 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5224 VPValue *MulByUF = Builder.createOverflowingOp(
5236 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5244 const SCEV *Expr = ExpSCEV->getSCEV();
5247 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5252 ExpSCEV->eraseFromParent();
5255 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5256 "before any VPIRInstructions");
5259 auto EI = Entry->begin();
5269 return ExpandedSCEVs;
5281 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5285 return Member0Op == OpV;
5289 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5292 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5309 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5312 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5317 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5318 const auto &[
OpIdx, OpV] =
P;
5333 if (!InterleaveR || InterleaveR->
getMask())
5334 return std::nullopt;
5336 Type *GroupElementTy =
nullptr;
5340 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5341 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5343 return std::nullopt;
5348 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5349 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5351 return std::nullopt;
5355 if (IG->getFactor() != IG->getNumMembers())
5356 return std::nullopt;
5362 assert(
Size.isScalable() == VF.isScalable() &&
5363 "if Size is scalable, VF must be scalable and vice versa");
5364 return Size.getKnownMinValue();
5368 unsigned MinVal = VF.getKnownMinValue();
5370 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5373 return std::nullopt;
5381 return RepR && RepR->isSingleScalar();
5388 auto *R = V->getDefiningRecipe();
5397 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5398 WideMember0->setOperand(
5407 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5409 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
5410 false, {}, LoadGroup->getDebugLoc());
5411 L->insertBefore(LoadGroup);
5417 assert(RepR->isSingleScalar() &&
5419 "must be a single scalar load");
5420 NarrowedOps.
insert(RepR);
5425 VPValue *PtrOp = WideLoad->getAddr();
5427 PtrOp = VecPtr->getOperand(0);
5432 nullptr, {}, *WideLoad);
5433 N->insertBefore(WideLoad);
5438std::unique_ptr<VPlan>
5458 "unexpected branch-on-count");
5462 std::optional<ElementCount> VFToOptimize;
5479 if (R.mayWriteToMemory() && !InterleaveR)
5494 std::optional<ElementCount> NarrowedVF =
5496 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5498 VFToOptimize = NarrowedVF;
5501 if (InterleaveR->getStoredValues().empty())
5506 auto *Member0 = InterleaveR->getStoredValues()[0];
5516 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5519 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5520 return IR && IR->getInterleaveGroup()->isFull() &&
5521 IR->getVPValue(Op.index()) == Op.value();
5530 VFToOptimize->isScalable()))
5535 if (StoreGroups.
empty())
5539 bool RequiresScalarEpilogue =
5550 std::unique_ptr<VPlan> NewPlan;
5552 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5553 Plan.
setVF(*VFToOptimize);
5554 NewPlan->removeVF(*VFToOptimize);
5560 for (
auto *StoreGroup : StoreGroups) {
5566 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5567 false, {}, StoreGroup->getDebugLoc());
5568 S->insertBefore(StoreGroup);
5569 StoreGroup->eraseFromParent();
5581 if (VFToOptimize->isScalable()) {
5594 RequiresScalarEpilogue, Step);
5596 Inc->setOperand(1, Step);
5602 "All VPVectorPointerRecipes should have been removed");
5618 "must have a BranchOnCond");
5621 if (VF.
isScalable() && VScaleForTuning.has_value())
5622 VectorStep *= *VScaleForTuning;
5623 assert(VectorStep > 0 &&
"trip count should not be zero");
5627 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5634 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5646 "Cannot handle loops with uncountable early exits");
5719 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5733 "vector.recur.extract.for.phi");
5751 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
bool UseMax,
5752 bool Signed) -> std::optional<APInt> {
5763 return std::nullopt;
5771 PhiR->getRecurrenceKind()))
5780 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5781 VPValue *CondSelect = BackedgeVal;
5796 VPValue *
IV = TrueVal == PhiR ? FalseVal : TrueVal;
5810 bool UseSigned =
true;
5811 std::optional<APInt> SentinelVal =
5812 CheckSentinel(IVSCEV, UseMax,
true);
5814 SentinelVal = CheckSentinel(IVSCEV, UseMax,
false);
5823 if (AR->hasNoSignedWrap())
5825 else if (AR->hasNoUnsignedWrap())
5838 VPIRFlags Flags(MinMaxKind,
false,
false,
5847 VPValue *StartVPV = PhiR->getStartValue();
5855 MiddleBuilder.
createSelect(Cmp, ReducedIV, StartVPV, ExitDL);
5864 AnyOfPhi->insertAfter(PhiR);
5868 if (TrueVal == PhiR)
5875 {StartVPV, ReducedIV, OrVal}, {}, ExitDL);
5887 *CondSelect,
RdxUnordered{1}, {}, PhiR->hasUsesOutsideReductionChain());
5888 NewPhiR->insertBefore(PhiR);
5889 PhiR->replaceAllUsesWith(NewPhiR);
5890 PhiR->eraseFromParent();
5898struct ExtendedReductionOperand {
5901 std::array<VPWidenCastRecipe *, 2> CastRecipes = {};
5907struct VPPartialReductionChain {
5910 VPWidenRecipe *ReductionBinOp;
5912 ExtendedReductionOperand ExtendedOp;
5913 unsigned ScaleFactor;
5936 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5939 BinOp->
setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5952 if (!
Mul->hasOneUse() ||
5953 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
5954 MulLHS->getOpcode() != MulRHS->getOpcode())
5957 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
5958 MulLHS->getOperand(0),
5959 Ext->getResultType()));
5960 Mul->setOperand(1, MulLHS == MulRHS
5961 ?
Mul->getOperand(0)
5962 : Builder.createWidenCast(MulRHS->getOpcode(),
5963 MulRHS->getOperand(0),
5964 Ext->getResultType()));
5973static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6002 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6012 Builder.insert(NegRecipe);
6017 BinOp = optimizeExtendsForPartialReduction(BinOp, TypeInfo);
6027 assert((!ExitValue || IsLastInChain) &&
6028 "if we found ExitValue, it must match RdxPhi's backedge value");
6039 PartialRed->insertBefore(WidenRecipe);
6056 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6057 StartInst->setOperand(2, NewScaleFactor);
6065 VPValue *OldStartValue = StartInst->getOperand(0);
6066 StartInst->setOperand(0, StartInst->getOperand(1));
6070 assert(RdxResult &&
"Could not find reduction result");
6073 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6079 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6085static bool isValidPartialReduction(
const VPPartialReductionChain &Chain,
6089 -> std::pair<Type *, TargetTransformInfo::PartialReductionExtendKind> {
6095 return {ExtOpType, ExtKind};
6097 ExtendedReductionOperand ExtendedOp = Chain.ExtendedOp;
6101 Type *ExtOpTypeA, *ExtOpTypeB;
6103 std::tie(ExtOpTypeA, ExtKindA) = GetExtInfo(ExtendA);
6104 std::tie(ExtOpTypeB, ExtKindB) = GetExtInfo(ExtendB);
6108 if (!ExtendB && ExtendedOp.BinOp &&
6109 ExtendedOp.BinOp != Chain.ReductionBinOp) {
6117 ExtOpTypeB = ExtOpTypeA;
6118 ExtKindB = ExtKindA;
6121 std::optional<unsigned> BinOpc;
6122 if (ExtendedOp.BinOp && ExtendedOp.BinOp != Chain.ReductionBinOp)
6130 WidenRecipe->
getOpcode(), ExtOpTypeA, ExtOpTypeB, PhiType, VF,
6131 ExtKindA, ExtKindB, BinOpc, CostCtx.
CostKind,
6133 ? std::optional{WidenRecipe->getFastMathFlags()}
6159static std::optional<ExtendedReductionOperand>
6162 "Op should be operand of UpdateR");
6164 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6167 VPValue *CastSource = CastRecipe->getOperand(0);
6176 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6177 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6178 UpdateR->
getOpcode() == Instruction::FAdd) {
6182 return ExtendedReductionOperand{UpdateR, {CastRecipe,
nullptr}};
6186 if (!
Op->hasOneUse())
6187 return std::nullopt;
6196 return std::nullopt;
6206 return std::nullopt;
6216 return std::nullopt;
6220 if (Cast && OuterExtKind &&
6221 getPartialReductionExtendKind(Cast) != OuterExtKind)
6222 return std::nullopt;
6224 return ExtendedReductionOperand{BinOp, {LHSCast, RHSCast}};
6231static std::optional<SmallVector<VPPartialReductionChain>>
6239 return std::nullopt;
6249 VPValue *CurrentValue = ExitValue;
6250 while (CurrentValue != RedPhiR) {
6253 return std::nullopt;
6260 std::optional<ExtendedReductionOperand> ExtendedOp =
6261 matchExtendedReductionOperand(UpdateR,
Op);
6263 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6265 return std::nullopt;
6270 ExtendedOp->CastRecipes[0]->getOperand(0));
6273 return std::nullopt;
6275 VPPartialReductionChain Chain(
6276 {UpdateR, *ExtendedOp,
6278 if (!isValidPartialReduction(Chain, PhiType, CostCtx,
Range))
6279 return std::nullopt;
6282 CurrentValue = PrevValue;
6287 std::reverse(Chains.
begin(), Chains.
end());
6306 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6307 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6310 if (ChainsByPhi.
empty())
6317 for (
const auto &[
_, Chains] : ChainsByPhi)
6318 for (
const VPPartialReductionChain &Chain : Chains) {
6319 PartialReductionOps.
insert(Chain.ExtendedOp.BinOp);
6320 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6328 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6337 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6338 for (
const VPPartialReductionChain &Chain : Chains) {
6339 if (!
all_of(Chain.ExtendedOp.CastRecipes, ExtendUsersValid)) {
6343 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6345 return PhiR == RedPhiR;
6347 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6353 if (!
all_of(Chain.ReductionBinOp->
users(), UseIsValid)) {
6362 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6363 return RepR && isa<StoreInst>(RepR->getUnderlyingInstr());
6372 for (
auto &[Phi, Chains] : ChainsByPhi)
6373 for (
const VPPartialReductionChain &Chain : Chains)
6374 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
auto m_AnyExtend(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...