57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
84 Ingredient.getDebugLoc());
87 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
88 nullptr ,
false ,
false , *VPI,
89 Ingredient.getDebugLoc());
92 Ingredient.getDebugLoc());
104 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
109 if (VectorID == Intrinsic::assume ||
110 VectorID == Intrinsic::lifetime_end ||
111 VectorID == Intrinsic::lifetime_start ||
112 VectorID == Intrinsic::sideeffect ||
113 VectorID == Intrinsic::pseudoprobe) {
118 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
119 VectorID != Intrinsic::pseudoprobe;
123 Ingredient.getDebugLoc());
126 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
127 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
131 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
135 *VPI, Ingredient.getDebugLoc());
139 "inductions must be created earlier");
148 "Only recpies with zero or one defined values expected");
149 Ingredient.eraseFromParent();
166 if (
A->getOpcode() != Instruction::Store ||
167 B->getOpcode() != Instruction::Store)
177 const APInt *Distance;
183 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
185 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
191 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
193 auto VFs =
B->getParent()->getPlan()->vectorFactors();
197 return Distance->
abs().
uge(
205 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
206 L(L), TypeInfo(TypeInfo) {}
213 return ExcludeRecipes.contains(&R) ||
214 (Store && isNoAliasViaDistance(Store, &GroupLeader));
227 std::optional<SinkStoreInfo> SinkInfo = {}) {
228 bool CheckReads = SinkInfo.has_value();
235 if (SinkInfo && SinkInfo->shouldSkip(R))
239 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
257template <
unsigned Opcode>
262 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
263 "Only Load and Store opcodes supported");
264 constexpr bool IsLoad = (Opcode == Instruction::Load);
271 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
275 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
278 RecipesByAddress[AddrSCEV].push_back(RepR);
283 for (
auto &Group :
Groups) {
302 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
307 return RepR && RepR->getOpcode() == Instruction::Alloca;
316 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
332 if (!ScalarVFOnly && RepR->isSingleScalar())
335 WorkList.
insert({SinkTo, Candidate});
347 for (
auto &Recipe : *VPBB)
349 InsertIfValidSinkCandidate(VPBB,
Op);
353 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
356 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
361 auto UsersOutsideSinkTo =
363 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
365 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
366 return !U->usesFirstLaneOnly(SinkCandidate);
369 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
371 if (NeedsDuplicating) {
375 if (
auto *SinkCandidateRepR =
381 nullptr , *SinkCandidateRepR,
385 Clone = SinkCandidate->
clone();
395 InsertIfValidSinkCandidate(SinkTo,
Op);
405 if (!EntryBB || EntryBB->size() != 1 ||
415 if (EntryBB->getNumSuccessors() != 2)
420 if (!Succ0 || !Succ1)
423 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
425 if (Succ0->getSingleSuccessor() == Succ1)
427 if (Succ1->getSingleSuccessor() == Succ0)
444 if (!Region1->isReplicator())
446 auto *MiddleBasicBlock =
448 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
453 if (!Region2 || !Region2->isReplicator())
458 if (!Mask1 || Mask1 != Mask2)
461 assert(Mask1 && Mask2 &&
"both region must have conditions");
467 if (TransformedRegions.
contains(Region1))
474 if (!Then1 || !Then2)
494 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
500 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
501 Phi1ToMove.eraseFromParent();
504 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
518 TransformedRegions.
insert(Region1);
521 return !TransformedRegions.
empty();
528 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
529 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
530 auto *BlockInMask = PredRecipe->
getMask();
549 RecipeWithoutMask->getDebugLoc());
573 if (RepR->isPredicated())
592 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
604 if (!VPBB->getParent())
608 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
617 R.moveBefore(*PredVPBB, PredVPBB->
end());
619 auto *ParentRegion = VPBB->getParent();
620 if (ParentRegion && ParentRegion->getExiting() == VPBB)
621 ParentRegion->setExiting(PredVPBB);
625 return !WorkList.
empty();
632 bool ShouldSimplify =
true;
633 while (ShouldSimplify) {
649 if (!
IV ||
IV->getTruncInst())
664 for (
auto *U : FindMyCast->
users()) {
666 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
667 FoundUserCast = UserCast;
671 FindMyCast = FoundUserCast;
696 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
710 WidenOriginalIV->dropPoisonGeneratingFlags();
723 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
725 if (IsConditionalAssume)
728 if (R.mayHaveSideEffects())
732 return all_of(R.definedValues(),
733 [](
VPValue *V) { return V->getNumUsers() == 0; });
753 VPUser *PhiUser = PhiR->getSingleUser();
756 if (PhiUser !=
Incoming->getDefiningRecipe() ||
759 PhiR->replaceAllUsesWith(Start);
760 PhiR->eraseFromParent();
761 Incoming->getDefiningRecipe()->eraseFromParent();
776 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
786 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
792 if (ResultTy != StepTy) {
799 Builder.setInsertPoint(VecPreheader);
800 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
802 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
808 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
813 Users.insert_range(V->users());
815 return Users.takeVector();
829 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
866 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
867 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
875 Def->operands(),
true,
877 Clone->insertAfter(Def);
878 Def->replaceAllUsesWith(Clone);
889 PtrIV->replaceAllUsesWith(PtrAdd);
896 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
897 return U->usesScalars(WideIV);
903 Plan,
ID.getKind(),
ID.getInductionOpcode(),
905 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
906 WideIV->getDebugLoc(), Builder);
909 if (!HasOnlyVectorVFs) {
911 "plans containing a scalar VF cannot also include scalable VFs");
912 WideIV->replaceAllUsesWith(Steps);
915 WideIV->replaceUsesWithIf(Steps,
916 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
918 return U.usesFirstLaneOnly(WideIV);
919 return U.usesScalars(WideIV);
935 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
940 if (!Def || Def->getNumOperands() != 2)
948 auto IsWideIVInc = [&]() {
949 auto &
ID = WideIV->getInductionDescriptor();
952 VPValue *IVStep = WideIV->getStepValue();
953 switch (
ID.getInductionOpcode()) {
954 case Instruction::Add:
956 case Instruction::FAdd:
958 case Instruction::FSub:
961 case Instruction::Sub: {
981 return IsWideIVInc() ? WideIV :
nullptr;
1001 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1014 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
1015 FirstActiveLaneType,
DL);
1016 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1023 EndValue =
B.createAdd(EndValue, One,
DL);
1026 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1028 VPIRValue *Start = WideIV->getStartValue();
1029 VPValue *Step = WideIV->getStepValue();
1030 EndValue =
B.createDerivedIV(
1032 Start, EndValue, Step);
1047 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1054 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1057 Start, VectorTC, Step);
1086 assert(EndValue &&
"Must have computed the end value up front");
1102 auto *Zero = Plan.
getZero(StepTy);
1103 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1108 return B.createNaryOp(
1109 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1111 : Instruction::FAdd,
1112 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1124 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1133 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1134 EndValues[WideIV] = EndValue;
1144 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1145 R.eraseFromParent();
1154 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1156 if (PredVPBB == MiddleVPBB)
1158 ExitIRI->getOperand(Idx),
1162 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1164 ExitIRI->setOperand(Idx, Escape);
1181 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1184 ExpR->replaceAllUsesWith(V->second);
1185 ExpR->eraseFromParent();
1194 while (!WorkList.
empty()) {
1196 if (!Seen.
insert(Cur).second)
1204 R->eraseFromParent();
1211static std::optional<std::pair<bool, unsigned>>
1214 std::optional<std::pair<bool, unsigned>>>(R)
1217 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1219 return std::make_pair(
true,
I->getVectorIntrinsicID());
1221 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1225 return std::make_pair(
false,
1228 .
Default([](
auto *) {
return std::nullopt; });
1246 Value *V =
Op->getUnderlyingValue();
1252 auto FoldToIRValue = [&]() ->
Value * {
1254 if (OpcodeOrIID->first) {
1255 if (R.getNumOperands() != 2)
1257 unsigned ID = OpcodeOrIID->second;
1258 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1261 unsigned Opcode = OpcodeOrIID->second;
1270 return Folder.FoldSelect(
Ops[0],
Ops[1],
1273 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1275 case Instruction::Select:
1276 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1277 case Instruction::ICmp:
1278 case Instruction::FCmp:
1281 case Instruction::GetElementPtr: {
1284 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1294 case Instruction::ExtractElement:
1301 if (
Value *V = FoldToIRValue())
1302 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1308 VPlan *Plan = Def->getParent()->getPlan();
1314 return Def->replaceAllUsesWith(V);
1320 PredPHI->replaceAllUsesWith(
Op);
1333 bool CanCreateNewRecipe =
1340 if (TruncTy == ATy) {
1341 Def->replaceAllUsesWith(
A);
1350 : Instruction::ZExt;
1353 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1355 Ext->setUnderlyingValue(UnderlyingExt);
1357 Def->replaceAllUsesWith(Ext);
1359 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1360 Def->replaceAllUsesWith(Trunc);
1368 for (
VPUser *U :
A->users()) {
1370 for (
VPValue *VPV : R->definedValues())
1384 Def->replaceAllUsesWith(
X);
1385 Def->eraseFromParent();
1391 return Def->replaceAllUsesWith(
1396 return Def->replaceAllUsesWith(
X);
1400 return Def->replaceAllUsesWith(
1405 return Def->replaceAllUsesWith(
1410 return Def->replaceAllUsesWith(
X);
1414 return Def->replaceAllUsesWith(Plan->
getFalse());
1418 return Def->replaceAllUsesWith(
X);
1421 if (CanCreateNewRecipe &&
1426 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1427 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1428 return Def->replaceAllUsesWith(
1429 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1434 return Def->replaceAllUsesWith(Def->getOperand(1));
1439 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1443 return Def->replaceAllUsesWith(Plan->
getFalse());
1446 return Def->replaceAllUsesWith(
X);
1450 if (CanCreateNewRecipe &&
1452 return Def->replaceAllUsesWith(Builder.createNot(
C));
1456 Def->setOperand(0,
C);
1457 Def->setOperand(1,
Y);
1458 Def->setOperand(2,
X);
1463 return Def->replaceAllUsesWith(
A);
1466 return Def->replaceAllUsesWith(
A);
1469 return Def->replaceAllUsesWith(
1476 return Def->replaceAllUsesWith(
1478 Def->getDebugLoc(),
"", NW));
1484 return Def->replaceAllUsesWith(Builder.createNaryOp(
1486 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1491 return Def->replaceAllUsesWith(Builder.createNaryOp(
1493 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1498 return Def->replaceAllUsesWith(
A);
1513 R->setOperand(1,
Y);
1514 R->setOperand(2,
X);
1518 R->replaceAllUsesWith(Cmp);
1523 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1524 Cmp->setDebugLoc(Def->getDebugLoc());
1536 if (
Op->getNumUsers() > 1 ||
1540 }
else if (!UnpairedCmp) {
1541 UnpairedCmp =
Op->getDefiningRecipe();
1545 UnpairedCmp =
nullptr;
1552 if (NewOps.
size() < Def->getNumOperands()) {
1554 return Def->replaceAllUsesWith(NewAnyOf);
1561 if (CanCreateNewRecipe &&
1567 return Def->replaceAllUsesWith(NewCmp);
1575 return Def->replaceAllUsesWith(Def->getOperand(1));
1581 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1582 Def->replaceAllUsesWith(
X);
1592 Def->setOperand(1, Def->getOperand(0));
1593 Def->setOperand(0,
Y);
1600 return Def->replaceAllUsesWith(Def->getOperand(0));
1606 Def->replaceAllUsesWith(
1607 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1611 return Def->replaceAllUsesWith(
A);
1617 Def->replaceAllUsesWith(
1618 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1625 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1630 Def->replaceAllUsesWith(
1640 "broadcast operand must be single-scalar");
1641 Def->setOperand(0,
C);
1646 if (Def->getNumOperands() == 1) {
1647 Def->replaceAllUsesWith(Def->getOperand(0));
1652 Phi->replaceAllUsesWith(Phi->getOperand(0));
1658 if (Def->getNumOperands() == 1 &&
1660 return Def->replaceAllUsesWith(IRV);
1673 return Def->replaceAllUsesWith(
A);
1676 Def->replaceAllUsesWith(Builder.createNaryOp(
1677 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1691 auto *IVInc = Def->getOperand(0);
1692 if (IVInc->getNumUsers() == 2) {
1697 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1698 Def->replaceAllUsesWith(IVInc);
1700 Inc->replaceAllUsesWith(Phi);
1701 Phi->setOperand(0,
Y);
1717 Steps->replaceAllUsesWith(Steps->getOperand(0));
1725 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1727 return PhiR && PhiR->isInLoop();
1733 Def->replaceAllUsesWith(
A);
1742 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1743 return Def->replaceAllUsesWith(
A);
1747 return Def->replaceAllUsesWith(
A);
1774 while (!Worklist.
empty()) {
1783 R->replaceAllUsesWith(
1784 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1803 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1812 !WidenStoreR->isConsecutive()) {
1813 assert(!WidenStoreR->isReverse() &&
1814 "Not consecutive memory recipes shouldn't be reversed");
1815 VPValue *Mask = WidenStoreR->getMask();
1824 {WidenStoreR->getOperand(1)});
1829 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1830 true ,
nullptr , {},
1832 ScalarStore->insertBefore(WidenStoreR);
1833 WidenStoreR->eraseFromParent();
1841 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1842 true ,
nullptr , *RepR ,
1843 *RepR , RepR->getDebugLoc());
1844 Clone->insertBefore(RepOrWidenR);
1846 VPValue *ExtractOp = Clone->getOperand(0);
1852 Clone->setOperand(0, ExtractOp);
1853 RepR->eraseFromParent();
1862 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1871 return !U->usesScalars(
Op);
1875 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1878 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1879 IntroducesBCastOf(Op)))
1883 auto *IRV = dyn_cast<VPIRValue>(Op);
1884 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1885 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1886 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1891 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1892 true ,
nullptr, *RepOrWidenR);
1893 Clone->insertBefore(RepOrWidenR);
1894 RepOrWidenR->replaceAllUsesWith(Clone);
1896 RepOrWidenR->eraseFromParent();
1932 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1933 UniqueValues.
insert(Blend->getIncomingValue(0));
1934 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1936 UniqueValues.
insert(Blend->getIncomingValue(
I));
1938 if (UniqueValues.
size() == 1) {
1939 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1940 Blend->eraseFromParent();
1944 if (Blend->isNormalized())
1950 unsigned StartIndex = 0;
1951 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1956 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1963 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1965 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1966 if (
I == StartIndex)
1968 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1969 OperandsWithMask.
push_back(Blend->getMask(
I));
1974 OperandsWithMask, *Blend, Blend->getDebugLoc());
1975 NewBlend->insertBefore(&R);
1977 VPValue *DeadMask = Blend->getMask(StartIndex);
1979 Blend->eraseFromParent();
1984 if (NewBlend->getNumOperands() == 3 &&
1986 VPValue *Inc0 = NewBlend->getOperand(0);
1987 VPValue *Inc1 = NewBlend->getOperand(1);
1988 VPValue *OldMask = NewBlend->getOperand(2);
1989 NewBlend->setOperand(0, Inc1);
1990 NewBlend->setOperand(1, Inc0);
1991 NewBlend->setOperand(2, NewMask);
2018 APInt MaxVal = AlignedTC - 1;
2021 unsigned NewBitWidth =
2027 bool MadeChange =
false;
2036 if (!WideIV || !WideIV->isCanonical() ||
2037 WideIV->hasMoreThanOneUniqueUser() ||
2038 NewIVTy == WideIV->getScalarType())
2043 VPUser *SingleUser = WideIV->getSingleUser();
2051 auto *NewStart = Plan.
getZero(NewIVTy);
2052 WideIV->setStartValue(NewStart);
2054 WideIV->setStepValue(NewStep);
2061 Cmp->setOperand(1, NewBTC);
2075 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2077 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2090 const SCEV *VectorTripCount =
2095 "Trip count SCEV must be computable");
2116 auto *Term = &ExitingVPBB->
back();
2129 for (
unsigned Part = 0; Part < UF; ++Part) {
2135 Extracts[Part] = Ext;
2147 match(Phi->getBackedgeValue(),
2149 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2166 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2173 "Expected incoming values of Phi to be ActiveLaneMasks");
2178 EntryALM->setOperand(2, ALMMultiplier);
2179 LoopALM->setOperand(2, ALMMultiplier);
2183 ExtractFromALM(EntryALM, EntryExtracts);
2188 ExtractFromALM(LoopALM, LoopExtracts);
2190 Not->setOperand(0, LoopExtracts[0]);
2193 for (
unsigned Part = 0; Part < UF; ++Part) {
2194 Phis[Part]->setStartValue(EntryExtracts[Part]);
2195 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2208 auto *Term = &ExitingVPBB->
back();
2220 const SCEV *VectorTripCount =
2226 "Trip count SCEV must be computable");
2245 Term->setOperand(1, Plan.
getTrue());
2250 {}, Term->getDebugLoc());
2252 Term->eraseFromParent();
2287 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2297 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2298 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2326 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2329 if (SinkCandidate == Previous)
2333 !Seen.
insert(SinkCandidate).second ||
2346 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2349 "only recipes with a single defined value expected");
2364 if (SinkCandidate == FOR)
2367 SinkCandidate->moveAfter(Previous);
2368 Previous = SinkCandidate;
2392 [&VPDT, HoistPoint](
VPUser *U) {
2393 auto *R = cast<VPRecipeBase>(U);
2394 return HoistPoint == R ||
2395 VPDT.properlyDominates(HoistPoint, R);
2397 "HoistPoint must dominate all users of FOR");
2399 auto NeedsHoisting = [HoistPoint, &VPDT,
2401 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2402 if (!HoistCandidate)
2407 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2408 "CFG in VPlan should still be flat, without replicate regions");
2410 if (!Visited.
insert(HoistCandidate).second)
2422 return HoistCandidate;
2431 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2434 "only recipes with a single defined value expected");
2446 if (
auto *R = NeedsHoisting(
Op)) {
2449 if (R->getNumDefinedValues() != 1)
2463 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2483 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2486 while (
auto *PrevPhi =
2488 assert(PrevPhi->getParent() == FOR->getParent());
2490 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2509 {FOR, FOR->getBackedgeValue()});
2514 RecurSplice->setOperand(0, FOR);
2520 for (
VPUser *U : RecurSplice->users()) {
2530 VPValue *PenultimateIndex =
B.createSub(LastActiveLane, One);
2531 VPValue *PenultimateLastIter =
2533 {PenultimateIndex, FOR->getBackedgeValue()});
2538 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2551 RecurKind RK = PhiR->getRecurrenceKind();
2558 RecWithFlags->dropPoisonGeneratingFlags();
2564struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2566 return Def == getEmptyKey() || Def == getTombstoneKey();
2577 return GEP->getSourceElementType();
2580 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2581 [](
auto *
I) {
return I->getSourceElementType(); })
2582 .
Default([](
auto *) {
return nullptr; });
2586 static bool canHandle(
const VPSingleDefRecipe *Def) {
2595 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2596 C->second == Instruction::ExtractValue)))
2602 return !
Def->mayReadFromMemory();
2606 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2607 const VPlan *Plan =
Def->getParent()->getPlan();
2608 VPTypeAnalysis TypeInfo(*Plan);
2611 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2614 if (RFlags->hasPredicate())
2620 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2623 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2625 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2627 !
equal(
L->operands(),
R->operands()))
2630 "must have valid opcode info for both recipes");
2632 if (LFlags->hasPredicate() &&
2633 LFlags->getPredicate() !=
2639 const VPRegionBlock *RegionL =
L->getRegion();
2640 const VPRegionBlock *RegionR =
R->getRegion();
2643 L->getParent() !=
R->getParent())
2645 const VPlan *Plan =
L->getParent()->getPlan();
2646 VPTypeAnalysis TypeInfo(*Plan);
2647 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2663 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2667 if (!VPDT.
dominates(V->getParent(), VPBB))
2672 Def->replaceAllUsesWith(V);
2691 "Expected vector prehader's successor to be the vector loop region");
2698 return !Op->isDefinedOutsideLoopRegions();
2701 R.moveBefore(*Preheader, Preheader->
end());
2719 assert(!RepR->isPredicated() &&
2720 "Expected prior transformation of predicated replicates to "
2721 "replicate regions");
2726 if (!RepR->isSingleScalar())
2736 if (Def->getNumUsers() == 0)
2745 auto *UserR = cast<VPRecipeBase>(U);
2746 VPBasicBlock *Parent = UserR->getParent();
2749 if (UserR->isPhi() || Parent->getEnclosingLoopRegion())
2752 if (SinkBB && SinkBB != Parent)
2766 "Defining block must dominate sink block");
2792 VPValue *ResultVPV = R.getVPSingleValue();
2794 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2795 if (!NewResSizeInBits)
2808 (void)OldResSizeInBits;
2816 VPW->dropPoisonGeneratingFlags();
2818 if (OldResSizeInBits != NewResSizeInBits &&
2822 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2824 Ext->insertAfter(&R);
2826 Ext->setOperand(0, ResultVPV);
2827 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2830 "Only ICmps should not need extending the result.");
2840 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2841 auto *
Op = R.getOperand(Idx);
2842 unsigned OpSizeInBits =
2844 if (OpSizeInBits == NewResSizeInBits)
2846 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2847 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2849 R.setOperand(Idx, ProcessedIter->second);
2857 Builder.setInsertPoint(&R);
2859 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2860 ProcessedIter->second = NewOp;
2861 R.setOperand(Idx, NewOp);
2869 std::optional<VPDominatorTree> VPDT;
2883 assert(VPBB->getNumSuccessors() == 2 &&
2884 "Two successors expected for BranchOnCond");
2885 unsigned RemovedIdx;
2896 "There must be a single edge between VPBB and its successor");
2905 VPBB->back().eraseFromParent();
2959 VPValue *StartV = CanonicalIVPHI->getStartValue();
2961 auto *CanonicalIVIncrement =
2964 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2965 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2976 auto *EntryIncrement = Builder.createOverflowingOp(
2978 DL,
"index.part.next");
2984 {EntryIncrement, TC, ALMMultiplier},
DL,
2985 "active.lane.mask.entry");
2991 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2996 Builder.setInsertPoint(OriginalTerminator);
2997 auto *InLoopIncrement = Builder.createOverflowingOp(
2999 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
3001 {InLoopIncrement, TC, ALMMultiplier},
DL,
3002 "active.lane.mask.next");
3007 auto *NotMask = Builder.createNot(ALM,
DL);
3014 bool UseActiveLaneMaskForControlFlow) {
3016 auto *FoundWidenCanonicalIVUser =
find_if(
3018 assert(FoundWidenCanonicalIVUser &&
3019 "Must have widened canonical IV when tail folding!");
3021 auto *WideCanonicalIV =
3024 if (UseActiveLaneMaskForControlFlow) {
3033 nullptr,
"active.lane.mask");
3049 template <
typename OpTy>
bool match(OpTy *V)
const {
3060template <
typename Op0_t,
typename Op1_t>
3079 VPValue *Addr, *Mask, *EndPtr;
3082 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3084 EVLEndPtr->insertBefore(&CurRecipe);
3085 EVLEndPtr->setOperand(1, &EVL);
3089 if (
match(&CurRecipe,
3103 LoadR->insertBefore(&CurRecipe);
3105 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3114 StoredVal, EVL, Mask);
3116 if (
match(&CurRecipe,
3122 Intrinsic::experimental_vp_reverse,
3123 {ReversedVal, Plan->
getTrue(), &EVL},
3127 AdjustEndPtr(EndPtr), NewReverse, EVL,
3132 if (Rdx->isConditional() &&
3137 if (Interleave->getMask() &&
3142 if (
match(&CurRecipe,
3151 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3171 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3176 HeaderMask = R.getVPSingleValue();
3188 NewR->insertBefore(R);
3189 for (
auto [Old, New] :
3190 zip_equal(R->definedValues(), NewR->definedValues()))
3191 Old->replaceAllUsesWith(New);
3205 Merge->insertBefore(LogicalAnd);
3206 LogicalAnd->replaceAllUsesWith(
Merge);
3214 R->eraseFromParent();
3231 "User of VF that we can't transform to EVL.");
3241 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3242 "increment of the canonical induction.");
3258 MaxEVL = Builder.createScalarZExtOrTrunc(
3262 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3263 VPValue *PrevEVL = Builder.createScalarPhi(
3277 Intrinsic::experimental_vp_splice,
3278 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3282 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3299 VPValue *EVLMask = Builder.createICmp(
3360 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3368 VPValue *StartV = CanonicalIVPHI->getStartValue();
3371 auto *CurrentIteration =
3373 CurrentIteration->insertAfter(CanonicalIVPHI);
3374 VPBuilder Builder(Header, Header->getFirstNonPhi());
3377 VPPhi *AVLPhi = Builder.createScalarPhi(
3381 if (MaxSafeElements) {
3391 auto *CanonicalIVIncrement =
3393 Builder.setInsertPoint(CanonicalIVIncrement);
3397 OpVPEVL = Builder.createScalarZExtOrTrunc(
3398 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3400 auto *NextIter = Builder.createAdd(
3401 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3402 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3403 CurrentIteration->addOperand(NextIter);
3407 "avl.next", {
true,
false});
3415 CanonicalIVPHI->replaceAllUsesWith(CurrentIteration);
3416 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3430 assert(!CurrentIteration &&
3431 "Found multiple CurrentIteration. Only one expected");
3432 CurrentIteration = PhiR;
3436 if (!CurrentIteration)
3447 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3453 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3456 "Unexpected canonical iv");
3462 CanonicalIV->eraseFromParent();
3482 if (!
match(EVLPhi->getBackedgeValue(),
3495 [[maybe_unused]]
bool FoundAVLNext =
3498 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3510 "Expected BranchOnCond with ICmp comparing CanIV increment with vector "
3515 LatchBr->setOperand(
3526 return R->getRegion() ||
3530 for (
const SCEV *Stride : StridesMap.
values()) {
3533 const APInt *StrideConst;
3556 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3563 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3566 if (NewSCEV != ScevExpr) {
3568 ExpSCEV->replaceAllUsesWith(NewExp);
3577 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3581 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3586 while (!Worklist.
empty()) {
3589 if (!Visited.
insert(CurRec).second)
3611 RecWithFlags->isDisjoint()) {
3614 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3615 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3616 RecWithFlags->replaceAllUsesWith(New);
3617 RecWithFlags->eraseFromParent();
3620 RecWithFlags->dropPoisonGeneratingFlags();
3625 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3626 "found instruction with poison generating flags not covered by "
3627 "VPRecipeWithIRFlags");
3632 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3644 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3645 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3646 if (AddrDef && WidenRec->isConsecutive() &&
3647 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3648 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3650 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3654 InterleaveRec->getInterleaveGroup();
3655 bool NeedPredication =
false;
3657 I < NumMembers; ++
I) {
3660 NeedPredication |= BlockNeedsPredication(Member->getParent());
3663 if (NeedPredication)
3664 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3676 if (InterleaveGroups.empty())
3683 for (
const auto *IG : InterleaveGroups) {
3689 StoredValues.
push_back(StoreR->getStoredValue());
3690 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3697 StoredValues.
push_back(StoreR->getStoredValue());
3701 bool NeedsMaskForGaps =
3702 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3703 (!StoredValues.
empty() && !IG->isFull());
3715 VPValue *Addr = Start->getAddr();
3724 assert(IG->getIndex(IRInsertPos) != 0 &&
3725 "index of insert position shouldn't be zero");
3729 IG->getIndex(IRInsertPos),
3733 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3739 if (IG->isReverse()) {
3742 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3743 ReversePtr->insertBefore(InsertPos);
3747 InsertPos->getMask(), NeedsMaskForGaps,
3748 InterleaveMD, InsertPos->getDebugLoc());
3749 VPIG->insertBefore(InsertPos);
3752 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3755 if (!Member->getType()->isVoidTy()) {
3814 AddOp = Instruction::Add;
3815 MulOp = Instruction::Mul;
3817 AddOp =
ID.getInductionOpcode();
3818 MulOp = Instruction::FMul;
3826 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3827 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3836 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3841 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3842 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3848 WidePHI->insertBefore(WidenIVR);
3859 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3863 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3866 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3869 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3876 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3879 WidePHI->addOperand(
Next);
3907 VPlan *Plan = R->getParent()->getPlan();
3908 VPValue *Start = R->getStartValue();
3909 VPValue *Step = R->getStepValue();
3910 VPValue *VF = R->getVFValue();
3912 assert(R->getInductionDescriptor().getKind() ==
3914 "Not a pointer induction according to InductionDescriptor!");
3917 "Recipe should have been replaced");
3923 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3927 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3930 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3932 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3933 R->replaceAllUsesWith(PtrAdd);
3938 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3940 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3943 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3952 if (!R->isReplicator())
3956 R->dissolveToCFGLoop();
3977 assert(Br->getNumOperands() == 2 &&
3978 "BranchOnTwoConds must have exactly 2 conditions");
3982 assert(Successors.size() == 3 &&
3983 "BranchOnTwoConds must have exactly 3 successors");
3988 VPValue *Cond0 = Br->getOperand(0);
3989 VPValue *Cond1 = Br->getOperand(1);
3994 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4007 Br->eraseFromParent();
4030 WidenIVR->replaceAllUsesWith(PtrAdd);
4043 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4044 Select = Builder.createSelect(Blend->getMask(
I),
4045 Blend->getIncomingValue(
I),
Select,
4046 R.getDebugLoc(),
"predphi", *Blend);
4047 Blend->replaceAllUsesWith(
Select);
4052 if (!VEPR->getOffset()) {
4054 "Expected unroller to have materialized offset for UF != 1");
4055 VEPR->materializeOffset();
4070 for (
VPValue *
Op : LastActiveL->operands()) {
4071 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4076 VPValue *FirstInactiveLane = Builder.createNaryOp(
4078 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4084 Builder.createSub(FirstInactiveLane, One,
4085 LastActiveL->getDebugLoc(),
"last.active.lane");
4095 assert(VPI->isMasked() &&
4096 "Unmasked MaskedCond should be simplified earlier");
4097 VPI->replaceAllUsesWith(Builder.createNaryOp(
4109 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4110 VPI->getDebugLoc());
4111 VPI->replaceAllUsesWith(
Add);
4120 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4123 ToRemove.push_back(BranchOnCountInst);
4138 ? Instruction::UIToFP
4139 : Instruction::Trunc;
4140 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4146 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4152 MulOpc = Instruction::FMul;
4153 Flags = VPI->getFastMathFlags();
4155 MulOpc = Instruction::Mul;
4160 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4162 VPI->replaceAllUsesWith(VectorStep);
4168 R->eraseFromParent();
4176 struct EarlyExitInfo {
4187 if (Pred == MiddleVPBB)
4192 VPValue *CondOfEarlyExitingVPBB;
4193 [[maybe_unused]]
bool Matched =
4194 match(EarlyExitingVPBB->getTerminator(),
4196 assert(Matched &&
"Terminator must be BranchOnCond");
4200 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4201 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4203 TrueSucc == ExitBlock
4204 ? CondOfEarlyExitingVPBB
4205 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4211 "exit condition must dominate the latch");
4220 assert(!Exits.
empty() &&
"must have at least one early exit");
4227 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4229 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4230 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4236 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4237 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4239 Exits[
I].EarlyExitingVPBB) &&
4240 "RPO sort must place dominating exits before dominated ones");
4246 VPValue *Combined = Exits[0].CondToExit;
4247 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4248 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4254 "Early exit store masking not implemented");
4258 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4262 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4270 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4304 for (
auto [Exit, VectorEarlyExitVPBB] :
4305 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4306 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4318 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4319 VPValue *NewIncoming = IncomingVal;
4321 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4326 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4327 ExitIRI->addOperand(NewIncoming);
4330 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4364 bool IsLastDispatch = (
I + 2 == Exits.
size());
4366 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4372 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4375 CurrentBB = FalseBB;
4382 "Unexpected terminator");
4383 auto *IsLatchExitTaken =
4385 LatchExitingBranch->getOperand(1));
4387 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4388 LatchExitingBranch->eraseFromParent();
4389 Builder.setInsertPoint(LatchVPBB);
4391 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4393 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4404 Type *RedTy = Ctx.Types.inferScalarType(Red);
4405 VPValue *VecOp = Red->getVecOp();
4410 if (Red->isPartialReduction())
4414 auto IsExtendedRedValidAndClampRange =
4430 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4431 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4432 Red->getFastMathFlags(),
CostKind);
4433 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4442 IsExtendedRedValidAndClampRange(
4445 Ctx.Types.inferScalarType(
A)))
4464 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4465 Opcode != Instruction::FAdd)
4468 Type *RedTy = Ctx.Types.inferScalarType(Red);
4471 auto IsMulAccValidAndClampRange =
4479 if (Red->isPartialReduction())
4484 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4490 (Ext0->getOpcode() != Ext1->getOpcode() ||
4491 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4495 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4497 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4504 ExtCost += Ext0->computeCost(VF, Ctx);
4506 ExtCost += Ext1->computeCost(VF, Ctx);
4508 ExtCost += OuterExt->computeCost(VF, Ctx);
4510 return MulAccCost.
isValid() &&
4511 MulAccCost < ExtCost + MulCost + RedCost;
4516 VPValue *VecOp = Red->getVecOp();
4523 assert(Opcode == Instruction::FAdd &&
4524 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4533 if (RecipeA && RecipeB &&
4534 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4556 if (!ExtA || ExtB || !
isa<VPIRValue>(ValB) || Red->isPartialReduction())
4558 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
4572 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4573 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4574 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4575 Mul->setOperand(1, ExtB);
4585 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4590 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4597 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4606 if (!Red->isPartialReduction() &&
4615 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4624 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4625 Ext0->getOpcode() == Ext1->getOpcode() &&
4626 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4628 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4629 *Ext0, *Ext0, Ext0->getDebugLoc());
4630 NewExt0->insertBefore(Ext0);
4635 Ext->getResultType(),
nullptr, *Ext1,
4636 *Ext1, Ext1->getDebugLoc());
4639 Mul->setOperand(0, NewExt0);
4640 Mul->setOperand(1, NewExt1);
4641 Red->setOperand(1,
Mul);
4654 auto IP = std::next(Red->getIterator());
4655 auto *VPBB = Red->getParent();
4665 Red->replaceAllUsesWith(AbstractR);
4695 for (
VPValue *VPV : VPValues) {
4704 if (
User->usesScalars(VPV))
4707 HoistPoint = HoistBlock->
begin();
4711 "All users must be in the vector preheader or dominated by it");
4716 VPV->replaceUsesWithIf(Broadcast,
4717 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4718 return Broadcast != &U && !U.usesScalars(VPV);
4735 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4736 RepR->getOpcode() != Instruction::Load)
4739 VPValue *Addr = RepR->getOperand(0);
4742 if (!
Loc.AATags.Scope)
4747 if (R.mayWriteToMemory()) {
4749 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4757 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4761 const AAMDNodes &LoadAA = LoadLoc.AATags;
4777 return CommonMetadata;
4780template <
unsigned Opcode>
4785 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4786 "Only Load and Store opcodes supported");
4787 constexpr bool IsLoad = (Opcode == Instruction::Load);
4793 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4798 for (
auto Recipes :
Groups) {
4799 if (Recipes.size() < 2)
4807 VPValue *MaskI = RecipeI->getMask();
4808 Type *TypeI = GetLoadStoreValueType(RecipeI);
4814 bool HasComplementaryMask =
false;
4819 VPValue *MaskJ = RecipeJ->getMask();
4820 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4821 if (TypeI == TypeJ) {
4831 if (HasComplementaryMask) {
4832 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4842template <
typename InstType>
4860 for (
auto &Group :
Groups) {
4880 return R->isSingleScalar() == IsSingleScalar;
4882 "all members in group must agree on IsSingleScalar");
4887 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4888 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4890 UnpredicatedLoad->insertBefore(EarliestLoad);
4894 Load->replaceAllUsesWith(UnpredicatedLoad);
4895 Load->eraseFromParent();
4905 if (!StoreLoc || !StoreLoc->AATags.Scope)
4911 StoresToSink.
end());
4915 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4929 for (
auto &Group :
Groups) {
4942 VPValue *SelectedValue = Group[0]->getOperand(0);
4945 bool IsSingleScalar = Group[0]->isSingleScalar();
4946 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4947 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4948 "all members in group must agree on IsSingleScalar");
4949 VPValue *Mask = Group[
I]->getMask();
4951 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4960 StoreWithMinAlign->getUnderlyingInstr(),
4961 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4962 nullptr, *LastStore, CommonMetadata);
4963 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4967 Store->eraseFromParent();
4974 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4975 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5040 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5042 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5049 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5059 DefR->replaceUsesWithIf(
5060 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5062 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5076 for (
VPValue *Def : R.definedValues()) {
5089 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5091 return U->usesScalars(Def) &&
5094 if (
none_of(Def->users(), IsCandidateUnpackUser))
5101 Unpack->insertAfter(&R);
5102 Def->replaceUsesWithIf(Unpack,
5103 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5104 return IsCandidateUnpackUser(&U);
5114 bool RequiresScalarEpilogue,
5126 assert(StepR->getParent() == VectorPHVPBB &&
5127 "Step must be defined in VectorPHVPBB");
5129 InsertPt = std::next(StepR->getIterator());
5131 VPBuilder Builder(VectorPHVPBB, InsertPt);
5139 if (TailByMasking) {
5140 TC = Builder.createAdd(
5151 Builder.createNaryOp(Instruction::URem, {TC, Step},
5160 if (RequiresScalarEpilogue) {
5162 "requiring scalar epilogue is not supported with fail folding");
5165 R = Builder.createSelect(IsZero, Step, R);
5179 "VF and VFxUF must be materialized together");
5191 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5198 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5202 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5206 VPValue *MulByUF = Builder.createOverflowingOp(
5218 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5226 const SCEV *Expr = ExpSCEV->getSCEV();
5229 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5234 ExpSCEV->eraseFromParent();
5237 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5238 "before any VPIRInstructions");
5241 auto EI = Entry->begin();
5251 return ExpandedSCEVs;
5263 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5267 return Member0Op == OpV;
5271 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5274 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5291 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5294 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5299 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5300 const auto &[
OpIdx, OpV] =
P;
5315 if (!InterleaveR || InterleaveR->
getMask())
5316 return std::nullopt;
5318 Type *GroupElementTy =
nullptr;
5322 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5323 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5325 return std::nullopt;
5330 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5331 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5333 return std::nullopt;
5337 if (IG->getFactor() != IG->getNumMembers())
5338 return std::nullopt;
5344 assert(
Size.isScalable() == VF.isScalable() &&
5345 "if Size is scalable, VF must be scalable and vice versa");
5346 return Size.getKnownMinValue();
5350 unsigned MinVal = VF.getKnownMinValue();
5352 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5355 return std::nullopt;
5363 return RepR && RepR->isSingleScalar();
5370 auto *R = V->getDefiningRecipe();
5379 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5380 WideMember0->setOperand(
5389 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5391 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
5392 false, {}, LoadGroup->getDebugLoc());
5393 L->insertBefore(LoadGroup);
5399 assert(RepR->isSingleScalar() &&
5401 "must be a single scalar load");
5402 NarrowedOps.
insert(RepR);
5407 VPValue *PtrOp = WideLoad->getAddr();
5409 PtrOp = VecPtr->getOperand(0);
5414 nullptr, {}, *WideLoad);
5415 N->insertBefore(WideLoad);
5420std::unique_ptr<VPlan>
5440 "unexpected branch-on-count");
5444 std::optional<ElementCount> VFToOptimize;
5461 if (R.mayWriteToMemory() && !InterleaveR)
5476 std::optional<ElementCount> NarrowedVF =
5478 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5480 VFToOptimize = NarrowedVF;
5483 if (InterleaveR->getStoredValues().empty())
5488 auto *Member0 = InterleaveR->getStoredValues()[0];
5498 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5501 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5502 return IR && IR->getInterleaveGroup()->isFull() &&
5503 IR->getVPValue(Op.index()) == Op.value();
5512 VFToOptimize->isScalable()))
5517 if (StoreGroups.
empty())
5521 bool RequiresScalarEpilogue =
5532 std::unique_ptr<VPlan> NewPlan;
5534 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5535 Plan.
setVF(*VFToOptimize);
5536 NewPlan->removeVF(*VFToOptimize);
5542 for (
auto *StoreGroup : StoreGroups) {
5548 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5549 false, {}, StoreGroup->getDebugLoc());
5550 S->insertBefore(StoreGroup);
5551 StoreGroup->eraseFromParent();
5563 if (VFToOptimize->isScalable()) {
5576 RequiresScalarEpilogue, Step);
5578 Inc->setOperand(1, Step);
5584 "All VPVectorPointerRecipes should have been removed");
5600 "must have a BranchOnCond");
5603 if (VF.
isScalable() && VScaleForTuning.has_value())
5604 VectorStep *= *VScaleForTuning;
5605 assert(VectorStep > 0 &&
"trip count should not be zero");
5609 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5616 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5628 "Cannot handle loops with uncountable early exits");
5701 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5715 "vector.recur.extract.for.phi");
5734 VPValue *WidenIVCandidate = BinOp->getOperand(0);
5735 VPValue *InvariantCandidate = BinOp->getOperand(1);
5737 std::swap(WidenIVCandidate, InvariantCandidate);
5751 auto *ClonedOp = BinOp->
clone();
5752 if (ClonedOp->getOperand(0) == WidenIV) {
5753 ClonedOp->setOperand(0, ScalarIV);
5755 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
5756 ClonedOp->setOperand(1, ScalarIV);
5771 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
5772 bool UseMax) -> std::optional<APSInt> {
5774 for (
bool Signed : {
true,
false}) {
5783 return std::nullopt;
5791 PhiR->getRecurrenceKind()))
5800 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5814 !
match(FindLastSelect,
5823 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
5829 "IVOfExpressionToSink not being an AddRec must imply "
5830 "FindLastExpression not being an AddRec.");
5841 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
5842 bool UseSigned = SentinelVal && SentinelVal->isSigned();
5849 if (IVOfExpressionToSink) {
5850 const SCEV *FindLastExpressionSCEV =
5852 if (
match(FindLastExpressionSCEV,
5855 if (
auto NewSentinel =
5856 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
5859 SentinelVal = *NewSentinel;
5860 UseSigned = NewSentinel->isSigned();
5862 IVSCEV = FindLastExpressionSCEV;
5863 IVOfExpressionToSink =
nullptr;
5873 if (AR->hasNoSignedWrap())
5875 else if (AR->hasNoUnsignedWrap())
5885 VPValue *NewFindLastSelect = BackedgeVal;
5887 if (!SentinelVal || IVOfExpressionToSink) {
5890 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
5891 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
5892 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
5893 SelectCond = LoopBuilder.
createNot(SelectCond);
5900 if (SelectCond !=
Cond || IVOfExpressionToSink) {
5903 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
5912 VPIRFlags Flags(MinMaxKind,
false,
false,
5918 NewFindLastSelect, Flags, ExitDL);
5921 VPValue *VectorRegionExitingVal = ReducedIV;
5922 if (IVOfExpressionToSink)
5923 VectorRegionExitingVal =
5925 ReducedIV, IVOfExpressionToSink);
5928 VPValue *StartVPV = PhiR->getStartValue();
5935 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
5945 AnyOfPhi->insertAfter(PhiR);
5957 {StartVPV, VectorRegionExitingVal, OrReduce}, {}, ExitDL);
5970 PhiR->hasUsesOutsideReductionChain());
5971 NewPhiR->insertBefore(PhiR);
5972 PhiR->replaceAllUsesWith(NewPhiR);
5973 PhiR->eraseFromParent();
5980struct ReductionExtend {
5981 Type *SrcType =
nullptr;
5982 ExtendKind Kind = ExtendKind::PR_None;
5988struct ExtendedReductionOperand {
5992 ReductionExtend ExtendA, ExtendB;
5998struct VPPartialReductionChain {
6001 VPWidenRecipe *ReductionBinOp =
nullptr;
6003 ExtendedReductionOperand ExtendedOp;
6004 unsigned ScaleFactor;
6027 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6030 BinOp->
setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6043 if (!
Mul->hasOneUse() ||
6044 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6045 MulLHS->getOpcode() != MulRHS->getOpcode())
6048 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
6049 MulLHS->getOperand(0),
6050 Ext->getResultType()));
6051 Mul->setOperand(1, MulLHS == MulRHS
6052 ?
Mul->getOperand(0)
6053 : Builder.createWidenCast(MulRHS->getOpcode(),
6054 MulRHS->getOperand(0),
6055 Ext->getResultType()));
6064static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6093 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
6101 Builder.insert(NegRecipe);
6106 BinOp = optimizeExtendsForPartialReduction(BinOp, TypeInfo);
6116 assert((!ExitValue || IsLastInChain) &&
6117 "if we found ExitValue, it must match RdxPhi's backedge value");
6128 PartialRed->insertBefore(WidenRecipe);
6145 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6146 StartInst->setOperand(2, NewScaleFactor);
6154 VPValue *OldStartValue = StartInst->getOperand(0);
6155 StartInst->setOperand(0, StartInst->getOperand(1));
6159 assert(RdxResult &&
"Could not find reduction result");
6162 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6168 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6174 const VPPartialReductionChain &Link,
6177 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6178 std::optional<unsigned> BinOpc = std::nullopt;
6180 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6181 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6183 std::optional<llvm::FastMathFlags>
Flags;
6188 ? (unsigned)Instruction::Add
6191 Opcode, ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType, RdxType,
6192 VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6214static std::optional<ExtendedReductionOperand>
6218 "Op should be operand of UpdateR");
6220 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6223 VPValue *CastSource = CastRecipe->getOperand(0);
6224 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6233 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6234 UpdateR->
getOpcode() == Instruction::FAdd) {
6238 return ExtendedReductionOperand{
6245 if (!
Op->hasOneUse())
6246 return std::nullopt;
6255 return std::nullopt;
6265 return std::nullopt;
6269 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6272 const APInt *RHSConst =
nullptr;
6278 return std::nullopt;
6282 if (Cast && OuterExtKind &&
6283 getPartialReductionExtendKind(Cast) != OuterExtKind)
6284 return std::nullopt;
6286 Type *RHSInputType = LHSInputType;
6287 ExtendKind RHSExtendKind = LHSExtendKind;
6290 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6293 return ExtendedReductionOperand{
6294 BinOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6301static std::optional<SmallVector<VPPartialReductionChain>>
6309 return std::nullopt;
6320 VPValue *CurrentValue = ExitValue;
6321 while (CurrentValue != RedPhiR) {
6324 return std::nullopt;
6331 std::optional<ExtendedReductionOperand> ExtendedOp =
6332 matchExtendedReductionOperand(UpdateR,
Op, TypeInfo);
6334 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue, TypeInfo);
6336 return std::nullopt;
6340 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6343 return std::nullopt;
6348 VPPartialReductionChain Link(
6349 {UpdateR, *ExtendedOp,
6352 CurrentValue = PrevValue;
6357 std::reverse(Chain.
begin(), Chain.
end());
6376 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6377 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6380 if (ChainsByPhi.
empty())
6387 for (
const auto &[
_, Chains] : ChainsByPhi)
6388 for (
const VPPartialReductionChain &Chain : Chains) {
6389 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6390 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6396 auto ExtendUsersValid = [&](
VPValue *Ext) {
6398 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6402 auto IsProfitablePartialReductionChainForVF =
6409 for (
const VPPartialReductionChain &Link : Chain) {
6410 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6411 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6415 PartialCost += LinkCost;
6416 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6418 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6419 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6422 RegularCost += Extend->computeCost(VF, CostCtx);
6424 return PartialCost.
isValid() && PartialCost <= RegularCost;
6432 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6433 for (
const VPPartialReductionChain &Chain : Chains) {
6434 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6438 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6440 return PhiR == RedPhiR;
6442 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6448 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6457 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6458 return RepR && isa<StoreInst>(RepR->getUnderlyingInstr());
6469 return IsProfitablePartialReductionChainForVF(Chains, VF);
6475 for (
auto &[Phi, Chains] : ChainsByPhi)
6476 for (
const VPPartialReductionChain &Chain : Chains)
6477 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...