46 GetIntOrFpInductionDescriptor,
53 if (!VPBB->getParent())
56 auto EndIter = Term ? Term->getIterator() : VPBB->end();
61 VPValue *VPV = Ingredient.getVPSingleValue();
70 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
80 Phi, Start, Step, &Plan.
getVF(), *
II, Ingredient.getDebugLoc());
84 "only VPInstructions expected here");
89 *Load, Ingredient.getOperand(0),
nullptr ,
91 Ingredient.getDebugLoc());
94 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
95 nullptr ,
false ,
false ,
105 drop_end(Ingredient.operands()), CI->getType(),
111 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
122 "Only recpies with zero or one defined values expected");
123 Ingredient.eraseFromParent();
139 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
144 return RepR && RepR->getOpcode() == Instruction::Alloca;
153 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
169 if (!ScalarVFOnly && RepR->isSingleScalar())
172 WorkList.
insert({SinkTo, Candidate});
184 for (
auto &Recipe : *VPBB)
186 InsertIfValidSinkCandidate(VPBB,
Op);
190 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
193 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
198 auto UsersOutsideSinkTo =
200 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
202 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
203 return !U->usesFirstLaneOnly(SinkCandidate);
206 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
208 if (NeedsDuplicating) {
212 if (
auto *SinkCandidateRepR =
218 nullptr , *SinkCandidateRepR);
221 Clone = SinkCandidate->
clone();
231 InsertIfValidSinkCandidate(SinkTo,
Op);
241 if (!EntryBB || EntryBB->size() != 1 ||
251 if (EntryBB->getNumSuccessors() != 2)
256 if (!Succ0 || !Succ1)
259 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
261 if (Succ0->getSingleSuccessor() == Succ1)
263 if (Succ1->getSingleSuccessor() == Succ0)
280 if (!Region1->isReplicator())
282 auto *MiddleBasicBlock =
284 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
289 if (!Region2 || !Region2->isReplicator())
294 if (!Mask1 || Mask1 != Mask2)
297 assert(Mask1 && Mask2 &&
"both region must have conditions");
303 if (TransformedRegions.
contains(Region1))
310 if (!Then1 || !Then2)
330 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
336 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
337 Phi1ToMove.eraseFromParent();
340 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
354 TransformedRegions.
insert(Region1);
357 return !TransformedRegions.
empty();
364 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
365 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
366 auto *BlockInMask = PredRecipe->
getMask();
384 RecipeWithoutMask->getDebugLoc());
408 if (RepR->isPredicated())
427 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
441 if (!VPBB->getParent())
445 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
454 R.moveBefore(*PredVPBB, PredVPBB->
end());
456 auto *ParentRegion = VPBB->getParent();
457 if (ParentRegion && ParentRegion->getExiting() == VPBB)
458 ParentRegion->setExiting(PredVPBB);
459 for (
auto *Succ :
to_vector(VPBB->successors())) {
465 return !WorkList.
empty();
472 bool ShouldSimplify =
true;
473 while (ShouldSimplify) {
489 if (!
IV ||
IV->getTruncInst())
500 auto &Casts =
IV->getInductionDescriptor().getCastInsts();
504 for (
auto *U : FindMyCast->
users()) {
506 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
507 FoundUserCast = UserCast;
511 FindMyCast = FoundUserCast;
536 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
557 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
559 if (IsConditionalAssume)
562 if (R.mayHaveSideEffects())
566 return all_of(R.definedValues(),
567 [](
VPValue *V) { return V->getNumUsers() == 0; });
583 if (!PhiR || PhiR->getNumOperands() != 2)
585 VPUser *PhiUser = PhiR->getSingleUser();
589 if (PhiUser !=
Incoming->getDefiningRecipe() ||
592 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
593 PhiR->eraseFromParent();
594 Incoming->getDefiningRecipe()->eraseFromParent();
609 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
619 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
625 if (ResultTy != StepTy) {
632 Builder.setInsertPoint(VecPreheader);
633 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
635 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
641 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
646 Users.insert_range(V->users());
648 return Users.takeVector();
682 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
683 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
691 Def->operands(),
true);
692 Clone->insertAfter(Def);
693 Def->replaceAllUsesWith(Clone);
704 VPValue *StepV = PtrIV->getOperand(1);
707 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
709 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
719 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
720 return U->usesScalars(WideIV);
726 Plan,
ID.getKind(),
ID.getInductionOpcode(),
728 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
729 WideIV->getDebugLoc(), Builder);
732 if (!HasOnlyVectorVFs)
733 WideIV->replaceAllUsesWith(Steps);
735 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
736 return U.usesScalars(WideIV);
751 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
756 if (!Def || Def->getNumOperands() != 2)
764 auto IsWideIVInc = [&]() {
765 auto &
ID = WideIV->getInductionDescriptor();
768 VPValue *IVStep = WideIV->getStepValue();
769 switch (
ID.getInductionOpcode()) {
770 case Instruction::Add:
772 case Instruction::FAdd:
775 case Instruction::FSub:
778 case Instruction::Sub: {
797 return IsWideIVInc() ? WideIV :
nullptr;
817 if (WideIntOrFp && WideIntOrFp->getTruncInst())
830 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
831 FirstActiveLaneType,
DL);
833 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
840 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
843 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
845 VPValue *Start = WideIV->getStartValue();
846 VPValue *Step = WideIV->getStepValue();
847 EndValue =
B.createDerivedIV(
849 Start, EndValue, Step);
869 assert(EndValue &&
"end value must have been pre-computed");
879 VPValue *Step = WideIV->getStepValue();
882 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
886 return B.createPtrAdd(EndValue,
887 B.createNaryOp(Instruction::Sub, {Zero, Step}),
891 const auto &
ID = WideIV->getInductionDescriptor();
892 return B.createNaryOp(
893 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
896 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
911 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
913 if (PredVPBB == MiddleVPBB)
915 ExitIRI->getOperand(Idx),
919 ExitIRI->getOperand(Idx), SE);
921 ExitIRI->setOperand(Idx, Escape);
938 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
941 ExpR->replaceAllUsesWith(V->second);
942 ExpR->eraseFromParent();
951 while (!WorkList.
empty()) {
953 if (!Seen.
insert(Cur).second)
961 R->eraseFromParent();
968static std::optional<std::pair<bool, unsigned>>
971 std::optional<std::pair<bool, unsigned>>>(R)
974 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
975 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
976 return std::make_pair(
true,
I->getVectorIntrinsicID());
978 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
982 return std::make_pair(
false,
985 .
Default([](
auto *) {
return std::nullopt; });
1001 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1003 Ops.push_back(
Op->getLiveInIRValue());
1006 auto FoldToIRValue = [&]() ->
Value * {
1008 if (OpcodeOrIID->first) {
1009 if (R.getNumOperands() != 2)
1011 unsigned ID = OpcodeOrIID->second;
1012 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1015 unsigned Opcode = OpcodeOrIID->second;
1024 return Folder.FoldSelect(
Ops[0],
Ops[1],
1027 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1029 case Instruction::Select:
1030 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1031 case Instruction::ICmp:
1032 case Instruction::FCmp:
1035 case Instruction::GetElementPtr: {
1038 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1048 case Instruction::ExtractElement:
1055 if (
Value *V = FoldToIRValue())
1056 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1062 VPlan *Plan = Def->getParent()->getPlan();
1069 return Def->replaceAllUsesWith(V);
1075 PredPHI->replaceAllUsesWith(
Op);
1083 if (TruncTy == ATy) {
1084 Def->replaceAllUsesWith(
A);
1093 : Instruction::ZExt;
1096 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1098 Ext->setUnderlyingValue(UnderlyingExt);
1100 Def->replaceAllUsesWith(Ext);
1102 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1103 Def->replaceAllUsesWith(Trunc);
1111 for (
VPUser *U :
A->users()) {
1113 for (
VPValue *VPV : R->definedValues())
1127 Def->replaceAllUsesWith(
X);
1128 Def->eraseFromParent();
1134 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1138 return Def->replaceAllUsesWith(
X);
1142 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1146 return Def->replaceAllUsesWith(Def->getOperand(1));
1153 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1154 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1155 return Def->replaceAllUsesWith(
1156 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1160 return Def->replaceAllUsesWith(Plan->
getFalse());
1163 return Def->replaceAllUsesWith(
X);
1168 Def->setOperand(0,
C);
1169 Def->setOperand(1,
Y);
1170 Def->setOperand(2,
X);
1179 X->hasMoreThanOneUniqueUser())
1180 return Def->replaceAllUsesWith(
1181 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1184 return Def->replaceAllUsesWith(
A);
1187 return Def->replaceAllUsesWith(
1188 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1192 return Def->replaceAllUsesWith(
A);
1207 R->setOperand(1,
Y);
1208 R->setOperand(2,
X);
1212 R->replaceAllUsesWith(Cmp);
1217 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1218 Cmp->setDebugLoc(Def->getDebugLoc());
1231 return Def->replaceAllUsesWith(NewCmp);
1239 return Def->replaceAllUsesWith(Def->getOperand(1));
1245 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1246 Def->replaceAllUsesWith(
X);
1256 Def->setOperand(1, Def->getOperand(0));
1257 Def->setOperand(0,
Y);
1262 if (Phi->getOperand(0) == Phi->getOperand(1))
1263 Phi->replaceAllUsesWith(Phi->getOperand(0));
1271 Def->replaceAllUsesWith(
1272 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1280 Def->replaceAllUsesWith(
1281 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1288 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1293 Def->replaceAllUsesWith(
1303 "broadcast operand must be single-scalar");
1304 Def->setOperand(0,
C);
1309 if (Phi->getNumOperands() == 1)
1310 Phi->replaceAllUsesWith(Phi->getOperand(0));
1323 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1324 Phi->getSingleUser() == Def) {
1325 Phi->setOperand(0,
Y);
1326 Def->replaceAllUsesWith(Phi);
1333 if (VecPtr->isFirstPart()) {
1334 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1343 Steps->replaceAllUsesWith(Steps->getOperand(0));
1351 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1353 return PhiR && PhiR->isInLoop();
1361 Def->replaceAllUsesWith(
A);
1371 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1372 return Def->replaceAllUsesWith(
A);
1375 if (Plan->
getUF() == 1 &&
1377 return Def->replaceAllUsesWith(
1407 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1414 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1415 true ,
nullptr , *RepR );
1416 Clone->insertBefore(RepOrWidenR);
1417 unsigned ExtractOpc =
1421 auto *Ext =
new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
1422 Ext->insertBefore(Clone);
1423 Clone->setOperand(0, Ext);
1424 RepR->eraseFromParent();
1432 !
all_of(RepOrWidenR->users(), [RepOrWidenR](
const VPUser *U) {
1433 if (auto *Store = dyn_cast<VPWidenStoreRecipe>(U)) {
1439 assert(RepOrWidenR != Store->getStoredValue() ||
1440 vputils::isSingleScalar(Store->getStoredValue()));
1445 unsigned Opcode = VPI->getOpcode();
1452 return U->usesScalars(RepOrWidenR);
1457 RepOrWidenR->operands(),
1459 Clone->insertBefore(RepOrWidenR);
1460 RepOrWidenR->replaceAllUsesWith(Clone);
1462 RepOrWidenR->eraseFromParent();
1498 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1499 UniqueValues.
insert(Blend->getIncomingValue(0));
1500 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1502 UniqueValues.
insert(Blend->getIncomingValue(
I));
1504 if (UniqueValues.
size() == 1) {
1505 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1506 Blend->eraseFromParent();
1510 if (Blend->isNormalized())
1516 unsigned StartIndex = 0;
1517 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1522 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1529 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1531 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1532 if (
I == StartIndex)
1534 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1535 OperandsWithMask.
push_back(Blend->getMask(
I));
1540 OperandsWithMask, Blend->getDebugLoc());
1541 NewBlend->insertBefore(&R);
1543 VPValue *DeadMask = Blend->getMask(StartIndex);
1545 Blend->eraseFromParent();
1550 if (NewBlend->getNumOperands() == 3 &&
1552 VPValue *Inc0 = NewBlend->getOperand(0);
1553 VPValue *Inc1 = NewBlend->getOperand(1);
1554 VPValue *OldMask = NewBlend->getOperand(2);
1555 NewBlend->setOperand(0, Inc1);
1556 NewBlend->setOperand(1, Inc0);
1557 NewBlend->setOperand(2, NewMask);
1584 APInt MaxVal = AlignedTC - 1;
1587 unsigned NewBitWidth =
1593 bool MadeChange =
false;
1602 if (!WideIV || !WideIV->isCanonical() ||
1603 WideIV->hasMoreThanOneUniqueUser() ||
1604 NewIVTy == WideIV->getScalarType())
1609 VPUser *SingleUser = WideIV->getSingleUser();
1618 WideIV->setStartValue(NewStart);
1620 WideIV->setStepValue(NewStep);
1626 Cmp->setOperand(1, NewBTC);
1640 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1642 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1655 const SCEV *VectorTripCount =
1660 "Trip count SCEV must be computable");
1680 auto *Term = &ExitingVPBB->
back();
1693 for (
unsigned Part = 0; Part < UF; ++Part) {
1700 Extracts[Part] = Ext;
1701 Ext->insertAfter(ALM);
1712 match(Phi->getBackedgeValue(),
1714 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1727 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1734 "Expected incoming values of Phi to be ActiveLaneMasks");
1739 EntryALM->setOperand(2, ALMMultiplier);
1740 LoopALM->setOperand(2, ALMMultiplier);
1744 ExtractFromALM(EntryALM, EntryExtracts);
1749 ExtractFromALM(LoopALM, LoopExtracts);
1751 Not->setOperand(0, LoopExtracts[0]);
1754 for (
unsigned Part = 0; Part < UF; ++Part) {
1755 Phis[Part]->setStartValue(EntryExtracts[Part]);
1756 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1769 auto *Term = &ExitingVPBB->
back();
1777 const SCEV *VectorTripCount =
1782 "Trip count SCEV must be computable");
1804 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1805 return R->isCanonical();
1806 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1807 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1813 R->getScalarType());
1815 HeaderR.eraseFromParent();
1819 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1820 HeaderR.eraseFromParent();
1829 B->setParent(
nullptr);
1838 Term->getDebugLoc());
1842 Term->eraseFromParent();
1869 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
1879 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1880 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1889 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1904 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1907 if (SinkCandidate == Previous)
1911 !Seen.
insert(SinkCandidate).second ||
1924 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1927 "only recipes with a single defined value expected");
1942 if (SinkCandidate == FOR)
1945 SinkCandidate->moveAfter(Previous);
1946 Previous = SinkCandidate;
1964 for (
VPUser *U : FOR->users()) {
1970 [&VPDT, HoistPoint](
VPUser *U) {
1971 auto *R = cast<VPRecipeBase>(U);
1972 return HoistPoint == R ||
1973 VPDT.properlyDominates(HoistPoint, R);
1975 "HoistPoint must dominate all users of FOR");
1977 auto NeedsHoisting = [HoistPoint, &VPDT,
1979 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
1980 if (!HoistCandidate)
1985 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
1986 "CFG in VPlan should still be flat, without replicate regions");
1988 if (!Visited.
insert(HoistCandidate).second)
2000 return HoistCandidate;
2009 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2012 "only recipes with a single defined value expected");
2024 if (
auto *R = NeedsHoisting(
Op))
2036 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2055 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2058 while (
auto *PrevPhi =
2060 assert(PrevPhi->getParent() == FOR->getParent());
2062 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2080 {FOR, FOR->getBackedgeValue()});
2082 FOR->replaceAllUsesWith(RecurSplice);
2085 RecurSplice->setOperand(0, FOR);
2096 RecurKind RK = PhiR->getRecurrenceKind();
2103 RecWithFlags->dropPoisonGeneratingFlags();
2109struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2111 return Def == getEmptyKey() || Def == getTombstoneKey();
2122 return GEP->getSourceElementType();
2125 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2126 [](
auto *
I) {
return I->getSourceElementType(); })
2127 .
Default([](
auto *) {
return nullptr; });
2131 static bool canHandle(
const VPSingleDefRecipe *Def) {
2140 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2141 C->second == Instruction::ExtractValue)))
2147 return !
Def->mayReadFromMemory();
2151 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2152 const VPlan *Plan =
Def->getParent()->getPlan();
2153 VPTypeAnalysis TypeInfo(*Plan);
2156 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2159 if (RFlags->hasPredicate())
2165 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2168 if (
L->getVPDefID() !=
R->getVPDefID() ||
2170 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2172 !
equal(
L->operands(),
R->operands()))
2175 "must have valid opcode info for both recipes");
2177 if (LFlags->hasPredicate() &&
2178 LFlags->getPredicate() !=
2184 const VPRegionBlock *RegionL =
L->getRegion();
2185 const VPRegionBlock *RegionR =
R->getRegion();
2188 L->getParent() !=
R->getParent())
2190 const VPlan *Plan =
L->getParent()->getPlan();
2191 VPTypeAnalysis TypeInfo(*Plan);
2192 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2207 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2211 if (!VPDT.
dominates(V->getParent(), VPBB))
2216 Def->replaceAllUsesWith(V);
2235 "Expected vector prehader's successor to be the vector loop region");
2242 return !Op->isDefinedOutsideLoopRegions();
2245 R.moveBefore(*Preheader, Preheader->
end());
2269 VPValue *ResultVPV = R.getVPSingleValue();
2271 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2272 if (!NewResSizeInBits)
2285 (void)OldResSizeInBits;
2293 VPW->dropPoisonGeneratingFlags();
2295 if (OldResSizeInBits != NewResSizeInBits &&
2300 Ext->insertAfter(&R);
2302 Ext->setOperand(0, ResultVPV);
2303 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2306 "Only ICmps should not need extending the result.");
2315 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2316 auto *
Op = R.getOperand(Idx);
2317 unsigned OpSizeInBits =
2319 if (OpSizeInBits == NewResSizeInBits)
2321 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2322 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2324 R.setOperand(Idx, ProcessedIter->second);
2332 Builder.setInsertPoint(&R);
2334 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2335 ProcessedIter->second = NewOp;
2336 R.setOperand(Idx, NewOp);
2351 assert(VPBB->getNumSuccessors() == 2 &&
2352 "Two successors expected for BranchOnCond");
2353 unsigned RemovedIdx;
2364 "There must be a single edge between VPBB and its successor");
2373 VPBB->back().eraseFromParent();
2434 VPValue *StartV = CanonicalIVPHI->getStartValue();
2436 auto *CanonicalIVIncrement =
2440 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2441 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2451 VPValue *TripCount, *IncrementValue;
2456 IncrementValue = CanonicalIVIncrement;
2462 IncrementValue = CanonicalIVPHI;
2466 auto *EntryIncrement = Builder.createOverflowingOp(
2474 {EntryIncrement, TC, ALMMultiplier},
DL,
2475 "active.lane.mask.entry");
2481 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2486 Builder.setInsertPoint(OriginalTerminator);
2487 auto *InLoopIncrement =
2489 {IncrementValue}, {
false,
false},
DL);
2491 {InLoopIncrement, TripCount, ALMMultiplier},
2492 DL,
"active.lane.mask.next");
2497 auto *NotMask = Builder.createNot(ALM,
DL);
2510 auto *FoundWidenCanonicalIVUser =
find_if(
2514 "Must have at most one VPWideCanonicalIVRecipe");
2515 if (FoundWidenCanonicalIVUser !=
2517 auto *WideCanonicalIV =
2519 WideCanonicalIVs.
push_back(WideCanonicalIV);
2527 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2528 WideCanonicalIVs.
push_back(WidenOriginalIV);
2534 for (
auto *Wide : WideCanonicalIVs) {
2540 assert(VPI->getOperand(0) == Wide &&
2541 "WidenCanonicalIV must be the first operand of the compare");
2542 assert(!HeaderMask &&
"Multiple header masks found?");
2550 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2553 UseActiveLaneMaskForControlFlow) &&
2554 "DataAndControlFlowWithoutRuntimeCheck implies "
2555 "UseActiveLaneMaskForControlFlow");
2558 auto *FoundWidenCanonicalIVUser =
find_if(
2560 assert(FoundWidenCanonicalIVUser &&
2561 "Must have widened canonical IV when tail folding!");
2563 auto *WideCanonicalIV =
2566 if (UseActiveLaneMaskForControlFlow) {
2576 nullptr,
"active.lane.mask");
2592 template <
typename OpTy>
bool match(OpTy *V)
const {
2605template <
typename Op0_t,
typename Op1_t>
2623 VPValue *Addr, *Mask, *EndPtr;
2626 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2628 EVLEndPtr->insertBefore(&CurRecipe);
2629 EVLEndPtr->setOperand(1, &EVL);
2633 if (
match(&CurRecipe,
2639 if (
match(&CurRecipe,
2644 AdjustEndPtr(EndPtr), EVL, Mask);
2657 AdjustEndPtr(EndPtr), EVL, Mask);
2660 if (Rdx->isConditional() &&
2665 if (Interleave->getMask() &&
2670 if (
match(&CurRecipe,
2679 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2694 "User of VF that we can't transform to EVL.");
2700 [&LoopRegion, &Plan](
VPUser *U) {
2702 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2703 m_Specific(&Plan.getVFxUF()))) ||
2704 isa<VPWidenPointerInductionRecipe>(U);
2706 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2707 "increment of the canonical induction.");
2727 MaxEVL = Builder.createScalarZExtOrTrunc(
2731 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2732 VPValue *PrevEVL = Builder.createScalarPhi(
2746 Intrinsic::experimental_vp_splice,
2747 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
2750 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2768 VPValue *EVLMask = Builder.createICmp(
2786 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2787 "New recipe must define the same number of values as the "
2792 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2793 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2805 R->eraseFromParent();
2855 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2863 VPValue *StartV = CanonicalIVPHI->getStartValue();
2867 EVLPhi->insertAfter(CanonicalIVPHI);
2868 VPBuilder Builder(Header, Header->getFirstNonPhi());
2871 VPPhi *AVLPhi = Builder.createScalarPhi(
2875 if (MaxSafeElements) {
2885 auto *CanonicalIVIncrement =
2887 Builder.setInsertPoint(CanonicalIVIncrement);
2891 OpVPEVL = Builder.createScalarZExtOrTrunc(
2892 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2894 auto *NextEVLIV = Builder.createOverflowingOp(
2895 Instruction::Add, {OpVPEVL, EVLPhi},
2896 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2897 CanonicalIVIncrement->hasNoSignedWrap()},
2898 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2899 EVLPhi->addOperand(NextEVLIV);
2901 VPValue *NextAVL = Builder.createOverflowingOp(
2902 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2910 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2911 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2925 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2936 [[maybe_unused]]
bool FoundAVL =
2939 assert(FoundAVL &&
"Didn't find AVL?");
2947 [[maybe_unused]]
bool FoundAVLNext =
2950 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
2961 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
2964 "Unexpected canonical iv");
2970 CanonicalIV->eraseFromParent();
2983 match(LatchExitingBr,
2986 "Unexpected terminator in EVL loop");
2993 LatchExitingBr->eraseFromParent();
3003 return R->getRegion() ||
3007 for (
const SCEV *Stride : StridesMap.
values()) {
3010 const APInt *StrideConst;
3011 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
3027 unsigned BW = U->getType()->getScalarSizeInBits();
3033 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3040 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3043 if (NewSCEV != ScevExpr) {
3045 ExpSCEV->replaceAllUsesWith(NewExp);
3054 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3058 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3063 while (!Worklist.
empty()) {
3066 if (!Visited.
insert(CurRec).second)
3088 RecWithFlags->isDisjoint()) {
3091 Instruction::Add, {
A,
B}, {
false,
false},
3092 RecWithFlags->getDebugLoc());
3093 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3094 RecWithFlags->replaceAllUsesWith(New);
3095 RecWithFlags->eraseFromParent();
3098 RecWithFlags->dropPoisonGeneratingFlags();
3103 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3104 "found instruction with poison generating flags not covered by "
3105 "VPRecipeWithIRFlags");
3110 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3122 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3123 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3124 if (AddrDef && WidenRec->isConsecutive() &&
3125 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3126 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3128 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3132 InterleaveRec->getInterleaveGroup();
3133 bool NeedPredication =
false;
3135 I < NumMembers; ++
I) {
3138 NeedPredication |= BlockNeedsPredication(Member->getParent());
3141 if (NeedPredication)
3142 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3154 if (InterleaveGroups.empty())
3161 for (
const auto *IG : InterleaveGroups) {
3167 StoredValues.
push_back(StoreR->getStoredValue());
3168 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3175 StoredValues.
push_back(StoreR->getStoredValue());
3179 bool NeedsMaskForGaps =
3180 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3181 (!StoredValues.
empty() && !IG->isFull());
3193 VPValue *Addr = Start->getAddr();
3202 assert(IG->getIndex(IRInsertPos) != 0 &&
3203 "index of insert position shouldn't be zero");
3207 IG->getIndex(IRInsertPos),
3211 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3217 if (IG->isReverse()) {
3220 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3221 ReversePtr->insertBefore(InsertPos);
3225 InsertPos->getMask(), NeedsMaskForGaps,
3226 InterleaveMD, InsertPos->getDebugLoc());
3227 VPIG->insertBefore(InsertPos);
3230 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3233 if (!Member->getType()->isVoidTy()) {
3294 AddOp = Instruction::Add;
3295 MulOp = Instruction::Mul;
3297 AddOp =
ID.getInductionOpcode();
3298 MulOp = Instruction::FMul;
3299 Flags =
ID.getInductionBinOp()->getFastMathFlags();
3307 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3308 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3317 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3322 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3323 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3329 WidePHI->insertBefore(WidenIVR);
3340 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3344 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3347 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3350 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3357 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3360 WidePHI->addOperand(
Next);
3388 VPlan *Plan = R->getParent()->getPlan();
3389 VPValue *Start = R->getStartValue();
3390 VPValue *Step = R->getStepValue();
3391 VPValue *VF = R->getVFValue();
3393 assert(R->getInductionDescriptor().getKind() ==
3395 "Not a pointer induction according to InductionDescriptor!");
3398 "Recipe should have been replaced");
3404 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3408 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3411 Offset = Builder.createNaryOp(Instruction::Mul, {
Offset, Step});
3412 VPValue *PtrAdd = Builder.createNaryOp(
3414 R->replaceAllUsesWith(PtrAdd);
3419 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3421 VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
3424 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3433 if (!R->isReplicator())
3437 R->dissolveToCFGLoop();
3462 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3463 Select = Builder.createSelect(Blend->getMask(
I),
3464 Blend->getIncomingValue(
I),
Select,
3465 R.getDebugLoc(),
"predphi");
3466 Blend->replaceAllUsesWith(
Select);
3486 ? Instruction::UIToFP
3487 : Instruction::Trunc;
3488 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3494 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3499 Flags = {VPI->getFastMathFlags()};
3504 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3506 VPI->replaceAllUsesWith(VectorStep);
3512 R->eraseFromParent();
3525 "unsupported early exit VPBB");
3536 "Terminator must be be BranchOnCond");
3537 VPValue *CondOfEarlyExitingVPBB =
3539 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3540 ? CondOfEarlyExitingVPBB
3541 : Builder.createNot(CondOfEarlyExitingVPBB);
3558 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3563 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3564 if (ExitIRI->getNumOperands() != 1) {
3567 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3570 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3571 if (!IncomingFromEarlyExit->
isLiveIn()) {
3575 "first.active.lane");
3578 nullptr,
"early.exit.value");
3579 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3589 "Unexpected terminator");
3590 auto *IsLatchExitTaken =
3592 LatchExitingBranch->getOperand(1));
3593 auto *AnyExitTaken = Builder.createNaryOp(
3594 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3596 LatchExitingBranch->eraseFromParent();
3606 Type *RedTy = Ctx.Types.inferScalarType(Red);
3607 VPValue *VecOp = Red->getVecOp();
3610 auto IsExtendedRedValidAndClampRange =
3627 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3628 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3631 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3632 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3633 Red->getFastMathFlags(),
CostKind);
3635 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3643 IsExtendedRedValidAndClampRange(
3646 Ctx.Types.inferScalarType(
A)))
3666 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3669 Type *RedTy = Ctx.Types.inferScalarType(Red);
3672 auto IsMulAccValidAndClampRange =
3679 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3682 if (IsPartialReduction) {
3684 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3687 MulAccCost = Ctx.TTI.getPartialReductionCost(
3688 Opcode, SrcTy, SrcTy2, RedTy, VF,
3698 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3702 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3704 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3712 ExtCost += Ext0->computeCost(VF, Ctx);
3714 ExtCost += Ext1->computeCost(VF, Ctx);
3716 ExtCost += OuterExt->computeCost(VF, Ctx);
3718 return MulAccCost.
isValid() &&
3719 MulAccCost < ExtCost + MulCost + RedCost;
3724 VPValue *VecOp = Red->getVecOp();
3742 if (!ExtA || ExtB || !ValB->
isLiveIn())
3758 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
3759 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
3760 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
3761 Mul->setOperand(1, ExtB);
3771 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
3776 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
3783 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
3800 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
3809 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3810 Ext0->getOpcode() == Ext1->getOpcode() &&
3811 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
3813 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
3814 *Ext0, Ext0->getDebugLoc());
3815 NewExt0->insertBefore(Ext0);
3820 Ext->getResultType(), *Ext1, *Ext1,
3821 Ext1->getDebugLoc());
3824 Mul->setOperand(0, NewExt0);
3825 Mul->setOperand(1, NewExt1);
3826 Red->setOperand(1,
Mul);
3839 auto IP = std::next(Red->getIterator());
3840 auto *VPBB = Red->getParent();
3850 Red->replaceAllUsesWith(AbstractR);
3880 for (
VPValue *VPV : VPValues) {
3882 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3890 if (
User->usesScalars(VPV))
3893 HoistPoint = HoistBlock->
begin();
3897 "All users must be in the vector preheader or dominated by it");
3902 VPV->replaceUsesWithIf(Broadcast,
3903 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3904 return Broadcast != &U && !U.usesScalars(VPV);
3912 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
3913 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
3947 auto *TCMO = Builder.createNaryOp(
3975 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
3977 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
3984 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
3994 DefR->replaceUsesWithIf(
3995 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3997 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4011 for (
VPValue *Def : R.definedValues()) {
4024 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4026 return U->usesScalars(Def) &&
4029 if (
none_of(Def->users(), IsCandidateUnpackUser))
4036 Unpack->insertAfter(&R);
4037 Def->replaceUsesWithIf(Unpack,
4038 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4039 return IsCandidateUnpackUser(&U);
4049 bool RequiresScalarEpilogue) {
4051 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4070 if (TailByMasking) {
4071 TC = Builder.createNaryOp(
4073 {TC, Builder.createNaryOp(Instruction::Sub,
4084 Builder.createNaryOp(Instruction::URem, {TC, Step},
4093 if (RequiresScalarEpilogue) {
4095 "requiring scalar epilogue is not supported with fail folding");
4098 R = Builder.createSelect(IsZero, Step, R);
4101 VPValue *Res = Builder.createNaryOp(
4120 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4127 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4131 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4136 VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
4146 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4154 const SCEV *Expr = ExpSCEV->getSCEV();
4157 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4162 ExpSCEV->eraseFromParent();
4165 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4166 "after any VPIRInstructions");
4169 auto EI = Entry->begin();
4179 return ExpandedSCEVs;
4195 return Member0Op == OpV;
4197 return !W->getMask() && Member0Op == OpV;
4199 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4210 if (!InterleaveR || InterleaveR->
getMask())
4213 Type *GroupElementTy =
nullptr;
4217 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4218 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4225 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4226 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4235 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4236 GroupSize == VectorRegWidth;
4244 return RepR && RepR->isSingleScalar();
4251 auto *R = V->getDefiningRecipe();
4259 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4260 WideMember0->setOperand(
4269 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4271 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4272 false, {}, LoadGroup->getDebugLoc());
4273 L->insertBefore(LoadGroup);
4279 assert(RepR->isSingleScalar() &&
4281 "must be a single scalar load");
4282 NarrowedOps.
insert(RepR);
4287 VPValue *PtrOp = WideLoad->getAddr();
4289 PtrOp = VecPtr->getOperand(0);
4294 nullptr, *WideLoad);
4295 N->insertBefore(WideLoad);
4325 if (R.mayWriteToMemory() && !InterleaveR)
4347 if (InterleaveR->getStoredValues().empty())
4352 auto *Member0 = InterleaveR->getStoredValues()[0];
4354 all_of(InterleaveR->getStoredValues(),
4355 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4363 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4366 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4367 return IR && IR->getInterleaveGroup()->isFull() &&
4368 IR->getVPValue(Op.index()) == Op.value();
4380 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4382 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4383 R->getNumOperands() > 2)
4386 [WideMember0, Idx =
I](
const auto &
P) {
4387 const auto &[OpIdx, OpV] = P;
4388 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4395 if (StoreGroups.
empty())
4401 for (
auto *StoreGroup : StoreGroups) {
4407 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
4408 false, {}, StoreGroup->getDebugLoc());
4409 S->insertBefore(StoreGroup);
4410 StoreGroup->eraseFromParent();
4428 Inc->setOperand(1, UF);
4447 "must have a BranchOnCond");
4450 if (VF.
isScalable() && VScaleForTuning.has_value())
4451 VectorStep *= *VScaleForTuning;
4452 assert(VectorStep > 0 &&
"trip count should not be zero");
4456 MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
4468 if (WideIntOrFp && WideIntOrFp->getTruncInst())
4475 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
4478 Start, VectorTC, Step);
4491 {EndValue, Start}, WideIV->
getDebugLoc(),
"bc.resume.val");
4492 return ResumePhiRecipe;
4504 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4515 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
4518 IVEndValues[WideIVR] = ResumePhi->getOperand(0);
4519 ScalarPhiIRI->addOperand(ResumePhi);
4526 "should only skip truncated wide inductions");
4534 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
4536 "Cannot handle loops with uncountable early exits");
4540 "vector.recur.extract");
4541 StringRef Name = IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx";
4543 {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
4553 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
4554 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4566 "Cannot handle loops with uncountable early exits");
4638 for (
VPUser *U : FOR->users()) {
4652 {},
"vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLanePerPart, Op0_t > m_ExtractLastLanePerPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...