52 GetIntOrFpInductionDescriptor,
59 if (!VPBB->getParent())
62 auto EndIter = Term ? Term->getIterator() : VPBB->end();
67 VPValue *VPV = Ingredient.getVPSingleValue();
76 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
90 Phi, Start, Step, &Plan.
getVF(), *
II, Flags,
91 Ingredient.getDebugLoc());
99 *Load, Ingredient.getOperand(0),
nullptr ,
100 false ,
false , *VPI,
101 Ingredient.getDebugLoc());
104 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
105 nullptr ,
false ,
false , *VPI,
106 Ingredient.getDebugLoc());
109 Ingredient.getDebugLoc());
117 *VPI, CI->getDebugLoc());
120 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
124 *VPI, Ingredient.getDebugLoc());
133 "Only recpies with zero or one defined values expected");
134 Ingredient.eraseFromParent();
151 if (
A->getOpcode() != Instruction::Store ||
152 B->getOpcode() != Instruction::Store)
162 const APInt *Distance;
168 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
170 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
176 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
178 auto VFs =
B->getParent()->getPlan()->vectorFactors();
180 return Distance->
abs().
uge(
188 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
189 L(L), TypeInfo(TypeInfo) {}
196 return ExcludeRecipes.contains(&R) ||
197 (Store && isNoAliasViaDistance(Store, &GroupLeader));
210 std::optional<SinkStoreInfo> SinkInfo = {}) {
211 bool CheckReads = SinkInfo.has_value();
220 "Expected at most one successor in block chain");
223 if (SinkInfo && SinkInfo->shouldSkip(R))
227 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
238 if (CheckReads &&
R.mayReadFromMemory() &&
245 Loc->AATags.NoAlias))
265 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
270 return RepR && RepR->getOpcode() == Instruction::Alloca;
279 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
295 if (!ScalarVFOnly && RepR->isSingleScalar())
298 WorkList.
insert({SinkTo, Candidate});
310 for (
auto &Recipe : *VPBB)
312 InsertIfValidSinkCandidate(VPBB,
Op);
316 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
319 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
324 auto UsersOutsideSinkTo =
326 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
328 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
329 return !U->usesFirstLaneOnly(SinkCandidate);
332 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
334 if (NeedsDuplicating) {
338 if (
auto *SinkCandidateRepR =
344 nullptr , *SinkCandidateRepR,
348 Clone = SinkCandidate->
clone();
358 InsertIfValidSinkCandidate(SinkTo,
Op);
368 if (!EntryBB || EntryBB->size() != 1 ||
378 if (EntryBB->getNumSuccessors() != 2)
383 if (!Succ0 || !Succ1)
386 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
388 if (Succ0->getSingleSuccessor() == Succ1)
390 if (Succ1->getSingleSuccessor() == Succ0)
407 if (!Region1->isReplicator())
409 auto *MiddleBasicBlock =
411 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
416 if (!Region2 || !Region2->isReplicator())
421 if (!Mask1 || Mask1 != Mask2)
424 assert(Mask1 && Mask2 &&
"both region must have conditions");
430 if (TransformedRegions.
contains(Region1))
437 if (!Then1 || !Then2)
457 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
463 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
464 Phi1ToMove.eraseFromParent();
467 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
481 TransformedRegions.
insert(Region1);
484 return !TransformedRegions.
empty();
491 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
492 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
493 auto *BlockInMask = PredRecipe->
getMask();
512 RecipeWithoutMask->getDebugLoc());
536 if (RepR->isPredicated())
555 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
569 if (!VPBB->getParent())
573 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
582 R.moveBefore(*PredVPBB, PredVPBB->
end());
584 auto *ParentRegion = VPBB->getParent();
585 if (ParentRegion && ParentRegion->getExiting() == VPBB)
586 ParentRegion->setExiting(PredVPBB);
587 for (
auto *Succ :
to_vector(VPBB->successors())) {
593 return !WorkList.
empty();
600 bool ShouldSimplify =
true;
601 while (ShouldSimplify) {
617 if (!
IV ||
IV->getTruncInst())
632 for (
auto *U : FindMyCast->
users()) {
634 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
635 FoundUserCast = UserCast;
639 FindMyCast = FoundUserCast;
664 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
677 WidenOriginalIV->dropPoisonGeneratingFlags();
690 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
692 if (IsConditionalAssume)
695 if (R.mayHaveSideEffects())
699 return all_of(R.definedValues(),
700 [](
VPValue *V) { return V->getNumUsers() == 0; });
716 if (!PhiR || PhiR->getNumOperands() != 2)
718 VPUser *PhiUser = PhiR->getSingleUser();
722 if (PhiUser !=
Incoming->getDefiningRecipe() ||
725 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
726 PhiR->eraseFromParent();
727 Incoming->getDefiningRecipe()->eraseFromParent();
742 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
752 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
758 if (ResultTy != StepTy) {
765 Builder.setInsertPoint(VecPreheader);
766 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
768 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
774 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
779 Users.insert_range(V->users());
781 return Users.takeVector();
795 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
832 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
833 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
841 Def->operands(),
true,
843 Clone->insertAfter(Def);
844 Def->replaceAllUsesWith(Clone);
855 PtrIV->replaceAllUsesWith(PtrAdd);
862 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
863 return U->usesScalars(WideIV);
869 Plan,
ID.getKind(),
ID.getInductionOpcode(),
871 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
872 WideIV->getDebugLoc(), Builder);
875 if (!HasOnlyVectorVFs) {
877 "plans containing a scalar VF cannot also include scalable VFs");
878 WideIV->replaceAllUsesWith(Steps);
881 WideIV->replaceUsesWithIf(Steps,
882 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
884 return U.usesFirstLaneOnly(WideIV);
885 return U.usesScalars(WideIV);
901 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
906 if (!Def || Def->getNumOperands() != 2)
914 auto IsWideIVInc = [&]() {
915 auto &
ID = WideIV->getInductionDescriptor();
918 VPValue *IVStep = WideIV->getStepValue();
919 switch (
ID.getInductionOpcode()) {
920 case Instruction::Add:
922 case Instruction::FAdd:
925 case Instruction::FSub:
928 case Instruction::Sub: {
948 return IsWideIVInc() ? WideIV :
nullptr;
968 if (WideIntOrFp && WideIntOrFp->getTruncInst())
981 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
982 FirstActiveLaneType,
DL);
984 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
991 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
994 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
996 VPIRValue *Start = WideIV->getStartValue();
997 VPValue *Step = WideIV->getStepValue();
998 EndValue =
B.createDerivedIV(
1000 Start, EndValue, Step);
1020 assert(EndValue &&
"end value must have been pre-computed");
1030 VPValue *Step = WideIV->getStepValue();
1033 return B.createNaryOp(Instruction::Sub, {EndValue, Step},
1038 return B.createPtrAdd(EndValue,
1039 B.createNaryOp(Instruction::Sub, {Zero, Step}),
1043 const auto &
ID = WideIV->getInductionDescriptor();
1044 return B.createNaryOp(
1045 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1047 : Instruction::FAdd,
1048 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1063 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1065 if (PredVPBB == MiddleVPBB)
1067 ExitIRI->getOperand(Idx),
1071 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1073 ExitIRI->setOperand(Idx, Escape);
1090 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1093 ExpR->replaceAllUsesWith(V->second);
1094 ExpR->eraseFromParent();
1103 while (!WorkList.
empty()) {
1105 if (!Seen.
insert(Cur).second)
1113 R->eraseFromParent();
1120static std::optional<std::pair<bool, unsigned>>
1123 std::optional<std::pair<bool, unsigned>>>(R)
1126 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1127 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
1128 return std::make_pair(
true,
I->getVectorIntrinsicID());
1130 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1134 return std::make_pair(
false,
1137 .
Default([](
auto *) {
return std::nullopt; });
1155 Value *V =
Op->getUnderlyingValue();
1161 auto FoldToIRValue = [&]() ->
Value * {
1163 if (OpcodeOrIID->first) {
1164 if (R.getNumOperands() != 2)
1166 unsigned ID = OpcodeOrIID->second;
1167 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1170 unsigned Opcode = OpcodeOrIID->second;
1179 return Folder.FoldSelect(
Ops[0],
Ops[1],
1182 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1184 case Instruction::Select:
1185 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1186 case Instruction::ICmp:
1187 case Instruction::FCmp:
1190 case Instruction::GetElementPtr: {
1193 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1203 case Instruction::ExtractElement:
1210 if (
Value *V = FoldToIRValue())
1211 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1217 VPlan *Plan = Def->getParent()->getPlan();
1224 return Def->replaceAllUsesWith(V);
1230 PredPHI->replaceAllUsesWith(
Op);
1238 if (TruncTy == ATy) {
1239 Def->replaceAllUsesWith(
A);
1248 : Instruction::ZExt;
1251 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1253 Ext->setUnderlyingValue(UnderlyingExt);
1255 Def->replaceAllUsesWith(Ext);
1257 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1258 Def->replaceAllUsesWith(Trunc);
1266 for (
VPUser *U :
A->users()) {
1268 for (
VPValue *VPV : R->definedValues())
1282 Def->replaceAllUsesWith(
X);
1283 Def->eraseFromParent();
1289 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1293 return Def->replaceAllUsesWith(
X);
1297 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1301 return Def->replaceAllUsesWith(Def->getOperand(1));
1308 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1309 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1310 return Def->replaceAllUsesWith(
1311 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1315 return Def->replaceAllUsesWith(Plan->
getFalse());
1318 return Def->replaceAllUsesWith(
X);
1323 Def->setOperand(0,
C);
1324 Def->setOperand(1,
Y);
1325 Def->setOperand(2,
X);
1334 X->hasMoreThanOneUniqueUser())
1335 return Def->replaceAllUsesWith(
1336 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1339 return Def->replaceAllUsesWith(
A);
1342 return Def->replaceAllUsesWith(
A);
1345 return Def->replaceAllUsesWith(
1346 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1350 return Def->replaceAllUsesWith(Builder.createNaryOp(
1352 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1357 const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
1358 bool IsInReplicateRegion = ParentRegion && ParentRegion->
isReplicator();
1361 return Def->replaceAllUsesWith(Builder.createNaryOp(
1363 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, {},
1364 Def->getDebugLoc()));
1368 return Def->replaceAllUsesWith(
A);
1383 R->setOperand(1,
Y);
1384 R->setOperand(2,
X);
1388 R->replaceAllUsesWith(Cmp);
1393 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1394 Cmp->setDebugLoc(Def->getDebugLoc());
1406 if (
Op->getNumUsers() > 1 ||
1410 }
else if (!UnpairedCmp) {
1411 UnpairedCmp =
Op->getDefiningRecipe();
1415 UnpairedCmp =
nullptr;
1422 if (NewOps.
size() < Def->getNumOperands()) {
1424 return Def->replaceAllUsesWith(NewAnyOf);
1436 return Def->replaceAllUsesWith(NewCmp);
1444 return Def->replaceAllUsesWith(Def->getOperand(1));
1450 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1451 Def->replaceAllUsesWith(
X);
1461 Def->setOperand(1, Def->getOperand(0));
1462 Def->setOperand(0,
Y);
1467 if (Phi->getOperand(0) == Phi->getOperand(1))
1468 Phi->replaceAllUsesWith(Phi->getOperand(0));
1476 Def->replaceAllUsesWith(
1477 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1481 return Def->replaceAllUsesWith(
A);
1487 Def->replaceAllUsesWith(
1488 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1495 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1500 Def->replaceAllUsesWith(
1510 "broadcast operand must be single-scalar");
1511 Def->setOperand(0,
C);
1516 if (Phi->getNumOperands() == 1)
1517 Phi->replaceAllUsesWith(Phi->getOperand(0));
1531 return Def->replaceAllUsesWith(
A);
1538 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1539 Phi->getSingleUser() == Def) {
1540 Phi->setOperand(0,
Y);
1541 Def->replaceAllUsesWith(Phi);
1550 return VPR->replaceAllUsesWith(VPR->getOperand(0));
1556 Steps->replaceAllUsesWith(Steps->getOperand(0));
1564 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1566 return PhiR && PhiR->isInLoop();
1572 Def->replaceAllUsesWith(
A);
1581 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1582 return Def->replaceAllUsesWith(
A);
1586 return Def->replaceAllUsesWith(
A);
1615 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1624 !WidenStoreR->isConsecutive()) {
1625 assert(!WidenStoreR->isReverse() &&
1626 "Not consecutive memory recipes shouldn't be reversed");
1627 VPValue *Mask = WidenStoreR->getMask();
1636 {WidenStoreR->getOperand(1)});
1641 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1642 true ,
nullptr , {},
1644 ScalarStore->insertBefore(WidenStoreR);
1645 WidenStoreR->eraseFromParent();
1653 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1654 true ,
nullptr , *RepR ,
1655 *RepR , RepR->getDebugLoc());
1656 Clone->insertBefore(RepOrWidenR);
1658 VPValue *ExtractOp = Clone->getOperand(0);
1664 Clone->setOperand(0, ExtractOp);
1665 RepR->eraseFromParent();
1678 if (!
all_of(RepOrWidenR->users(),
1679 [RepOrWidenR](
const VPUser *U) {
1680 if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1681 unsigned Opcode = VPI->getOpcode();
1682 if (Opcode == VPInstruction::ExtractLastLane ||
1683 Opcode == VPInstruction::ExtractLastPart ||
1684 Opcode == VPInstruction::ExtractPenultimateElement)
1688 return U->usesScalars(RepOrWidenR);
1691 if (Op->getSingleUser() != RepOrWidenR)
1695 auto *IRV = dyn_cast<VPIRValue>(Op);
1696 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1697 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1698 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1703 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1704 true ,
nullptr, *RepOrWidenR);
1705 Clone->insertBefore(RepOrWidenR);
1706 RepOrWidenR->replaceAllUsesWith(Clone);
1708 RepOrWidenR->eraseFromParent();
1744 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1745 UniqueValues.
insert(Blend->getIncomingValue(0));
1746 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1748 UniqueValues.
insert(Blend->getIncomingValue(
I));
1750 if (UniqueValues.
size() == 1) {
1751 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1752 Blend->eraseFromParent();
1756 if (Blend->isNormalized())
1762 unsigned StartIndex = 0;
1763 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1768 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1775 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1777 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1778 if (
I == StartIndex)
1780 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1781 OperandsWithMask.
push_back(Blend->getMask(
I));
1786 OperandsWithMask, Blend->getDebugLoc());
1787 NewBlend->insertBefore(&R);
1789 VPValue *DeadMask = Blend->getMask(StartIndex);
1791 Blend->eraseFromParent();
1796 if (NewBlend->getNumOperands() == 3 &&
1798 VPValue *Inc0 = NewBlend->getOperand(0);
1799 VPValue *Inc1 = NewBlend->getOperand(1);
1800 VPValue *OldMask = NewBlend->getOperand(2);
1801 NewBlend->setOperand(0, Inc1);
1802 NewBlend->setOperand(1, Inc0);
1803 NewBlend->setOperand(2, NewMask);
1830 APInt MaxVal = AlignedTC - 1;
1833 unsigned NewBitWidth =
1839 bool MadeChange =
false;
1848 if (!WideIV || !WideIV->isCanonical() ||
1849 WideIV->hasMoreThanOneUniqueUser() ||
1850 NewIVTy == WideIV->getScalarType())
1855 VPUser *SingleUser = WideIV->getSingleUser();
1864 WideIV->setStartValue(NewStart);
1866 WideIV->setStepValue(NewStep);
1872 Cmp->setOperand(1, NewBTC);
1886 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1888 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
1901 const SCEV *VectorTripCount =
1906 "Trip count SCEV must be computable");
1927 auto *Term = &ExitingVPBB->
back();
1940 for (
unsigned Part = 0; Part < UF; ++Part) {
1948 Extracts[Part] = Ext;
1960 match(Phi->getBackedgeValue(),
1962 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1975 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1982 "Expected incoming values of Phi to be ActiveLaneMasks");
1987 EntryALM->setOperand(2, ALMMultiplier);
1988 LoopALM->setOperand(2, ALMMultiplier);
1992 ExtractFromALM(EntryALM, EntryExtracts);
1997 ExtractFromALM(LoopALM, LoopExtracts);
1999 Not->setOperand(0, LoopExtracts[0]);
2002 for (
unsigned Part = 0; Part < UF; ++Part) {
2003 Phis[Part]->setStartValue(EntryExtracts[Part]);
2004 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2017 auto *Term = &ExitingVPBB->
back();
2024 const SCEV *VectorTripCount =
2030 "Trip count SCEV must be computable");
2055 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2056 return R->isCanonical();
2057 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
2058 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2064 R->getScalarType());
2066 HeaderR.eraseFromParent();
2070 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2071 HeaderR.eraseFromParent();
2081 B->setParent(
nullptr);
2090 if (Exits.
size() != 1) {
2092 "BranchOnTwoConds needs 2 remaining exits");
2094 Term->getOperand(0));
2103 Term->setOperand(1, Plan.
getTrue());
2108 {}, {}, Term->getDebugLoc());
2112 Term->eraseFromParent();
2139 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2149 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2150 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2159 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
2174 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2177 if (SinkCandidate == Previous)
2181 !Seen.
insert(SinkCandidate).second ||
2194 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2197 "only recipes with a single defined value expected");
2212 if (SinkCandidate == FOR)
2215 SinkCandidate->moveAfter(Previous);
2216 Previous = SinkCandidate;
2234 for (
VPUser *U : FOR->users()) {
2240 [&VPDT, HoistPoint](
VPUser *U) {
2241 auto *R = cast<VPRecipeBase>(U);
2242 return HoistPoint == R ||
2243 VPDT.properlyDominates(HoistPoint, R);
2245 "HoistPoint must dominate all users of FOR");
2247 auto NeedsHoisting = [HoistPoint, &VPDT,
2249 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2250 if (!HoistCandidate)
2255 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2256 "CFG in VPlan should still be flat, without replicate regions");
2258 if (!Visited.
insert(HoistCandidate).second)
2270 return HoistCandidate;
2279 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2282 "only recipes with a single defined value expected");
2294 if (
auto *R = NeedsHoisting(
Op)) {
2297 if (R->getNumDefinedValues() != 1)
2311 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2330 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2333 while (
auto *PrevPhi =
2335 assert(PrevPhi->getParent() == FOR->getParent());
2337 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2355 {FOR, FOR->getBackedgeValue()});
2357 FOR->replaceAllUsesWith(RecurSplice);
2360 RecurSplice->setOperand(0, FOR);
2366 for (
VPUser *U : RecurSplice->users()) {
2377 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
2378 VPValue *PenultimateLastIter =
2380 {PenultimateIndex, FOR->getBackedgeValue()});
2385 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2398 RecurKind RK = PhiR->getRecurrenceKind();
2405 RecWithFlags->dropPoisonGeneratingFlags();
2411struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2413 return Def == getEmptyKey() || Def == getTombstoneKey();
2424 return GEP->getSourceElementType();
2427 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2428 [](
auto *
I) {
return I->getSourceElementType(); })
2429 .
Default([](
auto *) {
return nullptr; });
2433 static bool canHandle(
const VPSingleDefRecipe *Def) {
2442 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2443 C->second == Instruction::ExtractValue)))
2449 return !
Def->mayReadFromMemory();
2453 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2454 const VPlan *Plan =
Def->getParent()->getPlan();
2455 VPTypeAnalysis TypeInfo(*Plan);
2458 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2461 if (RFlags->hasPredicate())
2467 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2470 if (
L->getVPDefID() !=
R->getVPDefID() ||
2472 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2474 !
equal(
L->operands(),
R->operands()))
2477 "must have valid opcode info for both recipes");
2479 if (LFlags->hasPredicate() &&
2480 LFlags->getPredicate() !=
2486 const VPRegionBlock *RegionL =
L->getRegion();
2487 const VPRegionBlock *RegionR =
R->getRegion();
2490 L->getParent() !=
R->getParent())
2492 const VPlan *Plan =
L->getParent()->getPlan();
2493 VPTypeAnalysis TypeInfo(*Plan);
2494 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2509 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2513 if (!VPDT.
dominates(V->getParent(), VPBB))
2518 Def->replaceAllUsesWith(V);
2537 "Expected vector prehader's successor to be the vector loop region");
2544 return !Op->isDefinedOutsideLoopRegions();
2547 R.moveBefore(*Preheader, Preheader->
end());
2570 VPValue *ResultVPV = R.getVPSingleValue();
2572 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2573 if (!NewResSizeInBits)
2586 (void)OldResSizeInBits;
2594 VPW->dropPoisonGeneratingFlags();
2596 if (OldResSizeInBits != NewResSizeInBits &&
2601 Ext->insertAfter(&R);
2603 Ext->setOperand(0, ResultVPV);
2604 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2607 "Only ICmps should not need extending the result.");
2617 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2618 auto *
Op = R.getOperand(Idx);
2619 unsigned OpSizeInBits =
2621 if (OpSizeInBits == NewResSizeInBits)
2623 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2624 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2626 R.setOperand(Idx, ProcessedIter->second);
2634 Builder.setInsertPoint(&R);
2636 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2637 ProcessedIter->second = NewOp;
2638 R.setOperand(Idx, NewOp);
2653 assert(VPBB->getNumSuccessors() == 2 &&
2654 "Two successors expected for BranchOnCond");
2655 unsigned RemovedIdx;
2666 "There must be a single edge between VPBB and its successor");
2675 VPBB->back().eraseFromParent();
2737 VPValue *StartV = CanonicalIVPHI->getStartValue();
2739 auto *CanonicalIVIncrement =
2743 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2744 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2754 VPValue *TripCount, *IncrementValue;
2759 IncrementValue = CanonicalIVIncrement;
2765 IncrementValue = CanonicalIVPHI;
2769 auto *EntryIncrement = Builder.createOverflowingOp(
2777 {EntryIncrement, TC, ALMMultiplier},
DL,
2778 "active.lane.mask.entry");
2784 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2789 Builder.setInsertPoint(OriginalTerminator);
2790 auto *InLoopIncrement =
2792 {IncrementValue}, {
false,
false},
DL);
2794 {InLoopIncrement, TripCount, ALMMultiplier},
2795 DL,
"active.lane.mask.next");
2800 auto *NotMask = Builder.createNot(ALM,
DL);
2813 auto *FoundWidenCanonicalIVUser =
find_if(
2817 "Must have at most one VPWideCanonicalIVRecipe");
2818 if (FoundWidenCanonicalIVUser !=
2820 auto *WideCanonicalIV =
2822 WideCanonicalIVs.
push_back(WideCanonicalIV);
2830 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2831 WideCanonicalIVs.
push_back(WidenOriginalIV);
2837 for (
auto *Wide : WideCanonicalIVs) {
2838 for (
VPUser *U : Wide->users()) {
2843 assert(VPI->getOperand(0) == Wide &&
2844 "WidenCanonicalIV must be the first operand of the compare");
2845 assert(!HeaderMask &&
"Multiple header masks found?");
2853 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2856 UseActiveLaneMaskForControlFlow) &&
2857 "DataAndControlFlowWithoutRuntimeCheck implies "
2858 "UseActiveLaneMaskForControlFlow");
2861 auto *FoundWidenCanonicalIVUser =
find_if(
2863 assert(FoundWidenCanonicalIVUser &&
2864 "Must have widened canonical IV when tail folding!");
2866 auto *WideCanonicalIV =
2869 if (UseActiveLaneMaskForControlFlow) {
2879 nullptr,
"active.lane.mask");
2895 template <
typename OpTy>
bool match(OpTy *V)
const {
2906template <
typename Op0_t,
typename Op1_t>
2925 VPValue *Addr, *Mask, *EndPtr;
2928 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2930 EVLEndPtr->insertBefore(&CurRecipe);
2931 EVLEndPtr->setOperand(1, &EVL);
2935 if (
match(&CurRecipe,
2949 LoadR->insertBefore(&CurRecipe);
2951 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2960 StoredVal, EVL, Mask);
2962 if (
match(&CurRecipe,
2968 Intrinsic::experimental_vp_reverse,
2969 {ReversedVal, Plan->
getTrue(), &EVL},
2973 AdjustEndPtr(EndPtr), NewReverse, EVL,
2978 if (Rdx->isConditional() &&
2983 if (Interleave->getMask() &&
2988 if (
match(&CurRecipe,
2997 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3020 "User of VF that we can't transform to EVL.");
3026 [&LoopRegion, &Plan](
VPUser *U) {
3028 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3029 m_Specific(&Plan.getVFxUF()))) ||
3030 isa<VPWidenPointerInductionRecipe>(U);
3032 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3033 "increment of the canonical induction.");
3053 MaxEVL = Builder.createScalarZExtOrTrunc(
3057 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3058 VPValue *PrevEVL = Builder.createScalarPhi(
3072 Intrinsic::experimental_vp_splice,
3073 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3077 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3095 VPValue *EVLMask = Builder.createICmp(
3113 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
3114 "New recipe must define the same number of values as the "
3119 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
3120 VPValue *CurVPV = CurRecipe->getVPValue(
I);
3132 R->eraseFromParent();
3182 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3190 VPValue *StartV = CanonicalIVPHI->getStartValue();
3194 EVLPhi->insertAfter(CanonicalIVPHI);
3195 VPBuilder Builder(Header, Header->getFirstNonPhi());
3198 VPPhi *AVLPhi = Builder.createScalarPhi(
3202 if (MaxSafeElements) {
3212 auto *CanonicalIVIncrement =
3214 Builder.setInsertPoint(CanonicalIVIncrement);
3218 OpVPEVL = Builder.createScalarZExtOrTrunc(
3219 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3221 auto *NextEVLIV = Builder.createOverflowingOp(
3222 Instruction::Add, {OpVPEVL, EVLPhi},
3223 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3224 CanonicalIVIncrement->hasNoSignedWrap()},
3225 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
3226 EVLPhi->addOperand(NextEVLIV);
3228 VPValue *NextAVL = Builder.createOverflowingOp(
3229 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
3237 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
3238 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3252 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
3263 [[maybe_unused]]
bool FoundAVL =
3266 assert(FoundAVL &&
"Didn't find AVL?");
3274 [[maybe_unused]]
bool FoundAVLNext =
3277 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3288 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3291 "Unexpected canonical iv");
3297 CanonicalIV->eraseFromParent();
3311 "Expected BranchOnCond with ICmp comparing EVL increment with vector "
3316 LatchExitingBr->setOperand(0,
3328 return R->getRegion() ||
3332 for (
const SCEV *Stride : StridesMap.
values()) {
3335 const APInt *StrideConst;
3358 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3365 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3368 if (NewSCEV != ScevExpr) {
3370 ExpSCEV->replaceAllUsesWith(NewExp);
3379 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3383 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3388 while (!Worklist.
empty()) {
3391 if (!Visited.
insert(CurRec).second)
3413 RecWithFlags->isDisjoint()) {
3416 Instruction::Add, {
A,
B}, {
false,
false},
3417 RecWithFlags->getDebugLoc());
3418 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3419 RecWithFlags->replaceAllUsesWith(New);
3420 RecWithFlags->eraseFromParent();
3423 RecWithFlags->dropPoisonGeneratingFlags();
3428 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3429 "found instruction with poison generating flags not covered by "
3430 "VPRecipeWithIRFlags");
3435 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3447 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3448 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3449 if (AddrDef && WidenRec->isConsecutive() &&
3450 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3451 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3453 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3457 InterleaveRec->getInterleaveGroup();
3458 bool NeedPredication =
false;
3460 I < NumMembers; ++
I) {
3463 NeedPredication |= BlockNeedsPredication(Member->getParent());
3466 if (NeedPredication)
3467 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3479 if (InterleaveGroups.empty())
3486 for (
const auto *IG : InterleaveGroups) {
3492 StoredValues.
push_back(StoreR->getStoredValue());
3493 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3500 StoredValues.
push_back(StoreR->getStoredValue());
3504 bool NeedsMaskForGaps =
3505 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3506 (!StoredValues.
empty() && !IG->isFull());
3518 VPValue *Addr = Start->getAddr();
3527 assert(IG->getIndex(IRInsertPos) != 0 &&
3528 "index of insert position shouldn't be zero");
3532 IG->getIndex(IRInsertPos),
3536 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3542 if (IG->isReverse()) {
3545 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3546 ReversePtr->insertBefore(InsertPos);
3550 InsertPos->getMask(), NeedsMaskForGaps,
3551 InterleaveMD, InsertPos->getDebugLoc());
3552 VPIG->insertBefore(InsertPos);
3555 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3558 if (!Member->getType()->isVoidTy()) {
3617 AddOp = Instruction::Add;
3618 MulOp = Instruction::Mul;
3620 AddOp =
ID.getInductionOpcode();
3621 MulOp = Instruction::FMul;
3629 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3630 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3632 Flags.dropPoisonGeneratingFlags();
3641 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3646 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3647 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3653 WidePHI->insertBefore(WidenIVR);
3664 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3668 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3671 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3674 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3681 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3684 WidePHI->addOperand(
Next);
3712 VPlan *Plan = R->getParent()->getPlan();
3713 VPValue *Start = R->getStartValue();
3714 VPValue *Step = R->getStepValue();
3715 VPValue *VF = R->getVFValue();
3717 assert(R->getInductionDescriptor().getKind() ==
3719 "Not a pointer induction according to InductionDescriptor!");
3722 "Recipe should have been replaced");
3728 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3732 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3735 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3736 VPValue *PtrAdd = Builder.createNaryOp(
3738 R->replaceAllUsesWith(PtrAdd);
3743 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3745 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3748 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3757 if (!R->isReplicator())
3761 R->dissolveToCFGLoop();
3780 assert(Br->getNumOperands() == 2 &&
3781 "BranchOnTwoConds must have exactly 2 conditions");
3785 assert(Successors.size() == 3 &&
3786 "BranchOnTwoConds must have exactly 3 successors");
3791 VPValue *EarlyExitingCond = Br->getOperand(0);
3792 VPValue *LateExitingCond = Br->getOperand(1);
3801 VPValue *AnyExitTaken = Builder.createNaryOp(
3802 Instruction::Or, {EarlyExitingCond, LateExitingCond},
DL);
3812 Br->eraseFromParent();
3835 WidenIVR->replaceAllUsesWith(PtrAdd);
3848 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3849 Select = Builder.createSelect(Blend->getMask(
I),
3850 Blend->getIncomingValue(
I),
Select,
3851 R.getDebugLoc(),
"predphi");
3852 Blend->replaceAllUsesWith(
Select);
3867 for (
VPValue *
Op : LastActiveL->operands()) {
3868 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3873 VPValue *FirstInactiveLane = Builder.createNaryOp(
3875 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3880 VPValue *LastLane = Builder.createNaryOp(
3881 Instruction::Sub, {FirstInactiveLane, One},
3882 LastActiveL->getDebugLoc(),
"last.active.lane");
3893 DebugLoc DL = BranchOnCountInst->getDebugLoc();
3896 ToRemove.push_back(BranchOnCountInst);
3911 ? Instruction::UIToFP
3912 : Instruction::Trunc;
3913 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3919 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3924 Flags = {VPI->getFastMathFlags()};
3929 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3931 VPI->replaceAllUsesWith(VectorStep);
3937 R->eraseFromParent();
3950 "unsupported early exit VPBB");
3961 "Terminator must be be BranchOnCond");
3962 VPValue *CondOfEarlyExitingVPBB =
3964 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3965 ? CondOfEarlyExitingVPBB
3966 : Builder.createNot(CondOfEarlyExitingVPBB);
3980 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3985 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3986 if (ExitIRI->getNumOperands() != 1) {
3989 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
3992 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
4001 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
4011 "Unexpected terminator");
4012 auto *IsLatchExitTaken =
4014 LatchExitingBranch->getOperand(1));
4016 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4017 LatchExitingBranch->eraseFromParent();
4019 Builder.setInsertPoint(LatchVPBB);
4021 {IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
4023 LatchVPBB->
setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
4034 Type *RedTy = Ctx.Types.inferScalarType(Red);
4035 VPValue *VecOp = Red->getVecOp();
4038 auto IsExtendedRedValidAndClampRange =
4050 if (Red->isPartialReduction()) {
4055 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4056 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4059 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4060 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4061 Red->getFastMathFlags(),
CostKind);
4063 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4071 IsExtendedRedValidAndClampRange(
4074 Ctx.Types.inferScalarType(
A)))
4092 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
4095 Type *RedTy = Ctx.Types.inferScalarType(Red);
4098 auto IsMulAccValidAndClampRange =
4105 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4108 if (Red->isPartialReduction()) {
4110 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4113 MulAccCost = Ctx.TTI.getPartialReductionCost(
4114 Opcode, SrcTy, SrcTy2, RedTy, VF,
4124 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
4128 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4130 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4138 ExtCost += Ext0->computeCost(VF, Ctx);
4140 ExtCost += Ext1->computeCost(VF, Ctx);
4142 ExtCost += OuterExt->computeCost(VF, Ctx);
4144 return MulAccCost.
isValid() &&
4145 MulAccCost < ExtCost + MulCost + RedCost;
4150 VPValue *VecOp = Red->getVecOp();
4184 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4185 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4186 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4187 Mul->setOperand(1, ExtB);
4197 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4202 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4209 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4226 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4235 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4236 Ext0->getOpcode() == Ext1->getOpcode() &&
4237 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4239 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4240 *Ext0, *Ext0, Ext0->getDebugLoc());
4241 NewExt0->insertBefore(Ext0);
4246 Ext->getResultType(),
nullptr, *Ext1,
4247 *Ext1, Ext1->getDebugLoc());
4250 Mul->setOperand(0, NewExt0);
4251 Mul->setOperand(1, NewExt1);
4252 Red->setOperand(1,
Mul);
4265 auto IP = std::next(Red->getIterator());
4266 auto *VPBB = Red->getParent();
4276 Red->replaceAllUsesWith(AbstractR);
4306 for (
VPValue *VPV : VPValues) {
4315 if (
User->usesScalars(VPV))
4318 HoistPoint = HoistBlock->
begin();
4322 "All users must be in the vector preheader or dominated by it");
4327 VPV->replaceUsesWithIf(Broadcast,
4328 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4329 return Broadcast != &U && !U.usesScalars(VPV);
4346 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4347 RepR->getOpcode() != Instruction::Load)
4350 VPValue *Addr = RepR->getOperand(0);
4353 if (!
Loc.AATags.Scope)
4358 if (R.mayWriteToMemory()) {
4360 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4368 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4372 const AAMDNodes &LoadAA = LoadLoc.AATags;
4388 return CommonMetadata;
4391template <
unsigned Opcode>
4396 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4397 "Only Load and Store opcodes supported");
4398 constexpr bool IsLoad = (Opcode == Instruction::Load);
4408 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4412 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4415 RecipesByAddress[AddrSCEV].push_back(RepR);
4422 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4424 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4425 if (Recipes.size() < 2)
4433 VPValue *MaskI = RecipeI->getMask();
4434 Type *TypeI = GetLoadStoreValueType(RecipeI);
4440 bool HasComplementaryMask =
false;
4445 VPValue *MaskJ = RecipeJ->getMask();
4446 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4447 if (TypeI == TypeJ) {
4457 if (HasComplementaryMask) {
4458 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4468template <
typename InstType>
4488 for (
auto &Group :
Groups) {
4513 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4514 false,
nullptr, *EarliestLoad,
4517 UnpredicatedLoad->insertBefore(EarliestLoad);
4521 Load->replaceAllUsesWith(UnpredicatedLoad);
4522 Load->eraseFromParent();
4532 if (!StoreLoc || !StoreLoc->AATags.Scope)
4538 StoresToSink.
end());
4542 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4557 for (
auto &Group :
Groups) {
4574 VPValue *SelectedValue = Group[0]->getOperand(0);
4577 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4578 VPValue *Mask = Group[
I]->getMask();
4580 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4588 auto *UnpredicatedStore =
4590 {SelectedValue, LastStore->getOperand(1)},
4592 nullptr, *LastStore, CommonMetadata);
4593 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4597 Store->eraseFromParent();
4604 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4605 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4639 auto *TCMO = Builder.createNaryOp(
4667 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4669 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4676 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4686 DefR->replaceUsesWithIf(
4687 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4689 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4703 for (
VPValue *Def : R.definedValues()) {
4716 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4718 return U->usesScalars(Def) &&
4721 if (
none_of(Def->users(), IsCandidateUnpackUser))
4728 Unpack->insertAfter(&R);
4729 Def->replaceUsesWithIf(Unpack,
4730 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4731 return IsCandidateUnpackUser(&U);
4741 bool RequiresScalarEpilogue) {
4761 if (TailByMasking) {
4762 TC = Builder.createNaryOp(
4764 {TC, Builder.createNaryOp(Instruction::Sub,
4775 Builder.createNaryOp(Instruction::URem, {TC, Step},
4784 if (RequiresScalarEpilogue) {
4786 "requiring scalar epilogue is not supported with fail folding");
4789 R = Builder.createSelect(IsZero, Step, R);
4792 VPValue *Res = Builder.createNaryOp(
4811 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4818 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4822 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4827 VPValue *MulByUF = Builder.createOverflowingOp(
4828 Instruction::Mul, {RuntimeVF, UF}, {
true,
false});
4837 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4845 const SCEV *Expr = ExpSCEV->getSCEV();
4848 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4853 ExpSCEV->eraseFromParent();
4856 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4857 "after any VPIRInstructions");
4860 auto EI = Entry->begin();
4870 return ExpandedSCEVs;
4886 return Member0Op == OpV;
4888 return !W->getMask() && Member0Op == OpV;
4890 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4901 if (!InterleaveR || InterleaveR->
getMask())
4904 Type *GroupElementTy =
nullptr;
4908 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4909 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4916 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4917 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4926 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4927 GroupSize == VectorRegWidth;
4935 return RepR && RepR->isSingleScalar();
4942 auto *R = V->getDefiningRecipe();
4950 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4951 WideMember0->setOperand(
4960 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4962 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4963 false, {}, LoadGroup->getDebugLoc());
4964 L->insertBefore(LoadGroup);
4970 assert(RepR->isSingleScalar() &&
4972 "must be a single scalar load");
4973 NarrowedOps.
insert(RepR);
4978 VPValue *PtrOp = WideLoad->getAddr();
4980 PtrOp = VecPtr->getOperand(0);
4985 nullptr, {}, *WideLoad);
4986 N->insertBefore(WideLoad);
5016 if (R.mayWriteToMemory() && !InterleaveR)
5038 if (InterleaveR->getStoredValues().empty())
5043 auto *Member0 = InterleaveR->getStoredValues()[0];
5045 all_of(InterleaveR->getStoredValues(),
5046 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
5054 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5057 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5058 return IR && IR->getInterleaveGroup()->isFull() &&
5059 IR->getVPValue(Op.index()) == Op.value();
5071 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
5073 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
5074 R->getNumOperands() > 2)
5077 [WideMember0, Idx =
I](
const auto &
P) {
5078 const auto &[OpIdx, OpV] = P;
5079 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
5086 if (StoreGroups.
empty())
5092 for (
auto *StoreGroup : StoreGroups) {
5098 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5099 false, {}, StoreGroup->getDebugLoc());
5100 S->insertBefore(StoreGroup);
5101 StoreGroup->eraseFromParent();
5116 Instruction::Mul, {VScale, UF}, {
true,
false});
5120 Inc->setOperand(1, UF);
5139 "must have a BranchOnCond");
5142 if (VF.
isScalable() && VScaleForTuning.has_value())
5143 VectorStep *= *VScaleForTuning;
5144 assert(VectorStep > 0 &&
"trip count should not be zero");
5148 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5161 if (WideIntOrFp && WideIntOrFp->getTruncInst())
5168 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
5171 Start, VectorTC, Step);
5194 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5204 IVEndValues[WideIVR] = EndValue;
5205 ResumePhiR->setOperand(0, EndValue);
5206 ResumePhiR->setName(
"bc.resume.val");
5213 "should only skip truncated wide inductions");
5221 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
5223 "Cannot handle loops with uncountable early exits");
5229 "vector.recur.extract");
5231 ResumePhiR->setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
5232 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
5241 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
5242 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5254 "Cannot handle loops with uncountable early exits");
5327 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5341 "vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
size_t getNumPredecessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
void setParent(VPRegionBlock *P)
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...