51 GetIntOrFpInductionDescriptor,
58 if (!VPBB->getParent())
61 auto EndIter = Term ? Term->getIterator() : VPBB->end();
66 VPValue *VPV = Ingredient.getVPSingleValue();
75 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
89 Phi, Start, Step, &Plan.
getVF(), *
II, Flags,
90 Ingredient.getDebugLoc());
98 *Load, Ingredient.getOperand(0),
nullptr ,
100 Ingredient.getDebugLoc());
103 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
104 nullptr ,
false ,
false , *VPI,
105 Ingredient.getDebugLoc());
108 Ingredient.getDebugLoc());
116 *VPI, CI->getDebugLoc());
119 *VPI, Ingredient.getDebugLoc());
122 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
126 *VPI, Ingredient.getDebugLoc());
135 "Only recpies with zero or one defined values expected");
136 Ingredient.eraseFromParent();
152 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
157 return RepR && RepR->getOpcode() == Instruction::Alloca;
166 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
182 if (!ScalarVFOnly && RepR->isSingleScalar())
185 WorkList.
insert({SinkTo, Candidate});
197 for (
auto &Recipe : *VPBB)
199 InsertIfValidSinkCandidate(VPBB,
Op);
203 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
206 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
211 auto UsersOutsideSinkTo =
213 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
215 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
216 return !U->usesFirstLaneOnly(SinkCandidate);
219 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
221 if (NeedsDuplicating) {
225 if (
auto *SinkCandidateRepR =
231 nullptr , *SinkCandidateRepR,
235 Clone = SinkCandidate->
clone();
245 InsertIfValidSinkCandidate(SinkTo,
Op);
255 if (!EntryBB || EntryBB->size() != 1 ||
265 if (EntryBB->getNumSuccessors() != 2)
270 if (!Succ0 || !Succ1)
273 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
275 if (Succ0->getSingleSuccessor() == Succ1)
277 if (Succ1->getSingleSuccessor() == Succ0)
294 if (!Region1->isReplicator())
296 auto *MiddleBasicBlock =
298 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
303 if (!Region2 || !Region2->isReplicator())
308 if (!Mask1 || Mask1 != Mask2)
311 assert(Mask1 && Mask2 &&
"both region must have conditions");
317 if (TransformedRegions.
contains(Region1))
324 if (!Then1 || !Then2)
344 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
350 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
351 Phi1ToMove.eraseFromParent();
354 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
368 TransformedRegions.
insert(Region1);
371 return !TransformedRegions.
empty();
378 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
379 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
380 auto *BlockInMask = PredRecipe->
getMask();
399 RecipeWithoutMask->getDebugLoc());
423 if (RepR->isPredicated())
442 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
456 if (!VPBB->getParent())
460 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
469 R.moveBefore(*PredVPBB, PredVPBB->
end());
471 auto *ParentRegion = VPBB->getParent();
472 if (ParentRegion && ParentRegion->getExiting() == VPBB)
473 ParentRegion->setExiting(PredVPBB);
474 for (
auto *Succ :
to_vector(VPBB->successors())) {
480 return !WorkList.
empty();
487 bool ShouldSimplify =
true;
488 while (ShouldSimplify) {
504 if (!
IV ||
IV->getTruncInst())
515 auto &Casts =
IV->getInductionDescriptor().getCastInsts();
519 for (
auto *U : FindMyCast->
users()) {
521 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
522 FoundUserCast = UserCast;
526 FindMyCast = FoundUserCast;
551 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
564 WidenOriginalIV->dropPoisonGeneratingFlags();
577 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
579 if (IsConditionalAssume)
582 if (R.mayHaveSideEffects())
586 return all_of(R.definedValues(),
587 [](
VPValue *V) { return V->getNumUsers() == 0; });
603 if (!PhiR || PhiR->getNumOperands() != 2)
605 VPUser *PhiUser = PhiR->getSingleUser();
609 if (PhiUser !=
Incoming->getDefiningRecipe() ||
612 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
613 PhiR->eraseFromParent();
614 Incoming->getDefiningRecipe()->eraseFromParent();
629 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
639 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
645 if (ResultTy != StepTy) {
652 Builder.setInsertPoint(VecPreheader);
653 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
655 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
661 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
666 Users.insert_range(V->users());
668 return Users.takeVector();
702 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
703 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
711 Def->operands(),
true,
713 Clone->insertAfter(Def);
714 Def->replaceAllUsesWith(Clone);
725 VPValue *StepV = PtrIV->getOperand(1);
728 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
730 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
740 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
741 return U->usesScalars(WideIV);
747 Plan,
ID.getKind(),
ID.getInductionOpcode(),
749 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
750 WideIV->getDebugLoc(), Builder);
753 if (!HasOnlyVectorVFs)
754 WideIV->replaceAllUsesWith(Steps);
756 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
757 return U.usesScalars(WideIV);
772 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
777 if (!Def || Def->getNumOperands() != 2)
785 auto IsWideIVInc = [&]() {
786 auto &
ID = WideIV->getInductionDescriptor();
789 VPValue *IVStep = WideIV->getStepValue();
790 switch (
ID.getInductionOpcode()) {
791 case Instruction::Add:
793 case Instruction::FAdd:
796 case Instruction::FSub:
799 case Instruction::Sub: {
818 return IsWideIVInc() ? WideIV :
nullptr;
838 if (WideIntOrFp && WideIntOrFp->getTruncInst())
851 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
852 FirstActiveLaneType,
DL);
854 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
861 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
864 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
866 VPValue *Start = WideIV->getStartValue();
867 VPValue *Step = WideIV->getStepValue();
868 EndValue =
B.createDerivedIV(
870 Start, EndValue, Step);
890 assert(EndValue &&
"end value must have been pre-computed");
900 VPValue *Step = WideIV->getStepValue();
903 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
907 return B.createPtrAdd(EndValue,
908 B.createNaryOp(Instruction::Sub, {Zero, Step}),
912 const auto &
ID = WideIV->getInductionDescriptor();
913 return B.createNaryOp(
914 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
917 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
932 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
934 if (PredVPBB == MiddleVPBB)
936 ExitIRI->getOperand(Idx),
940 ExitIRI->getOperand(Idx), SE);
942 ExitIRI->setOperand(Idx, Escape);
959 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
962 ExpR->replaceAllUsesWith(V->second);
963 ExpR->eraseFromParent();
972 while (!WorkList.
empty()) {
974 if (!Seen.
insert(Cur).second)
982 R->eraseFromParent();
989static std::optional<std::pair<bool, unsigned>>
992 std::optional<std::pair<bool, unsigned>>>(R)
995 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
996 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
997 return std::make_pair(
true,
I->getVectorIntrinsicID());
999 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1003 return std::make_pair(
false,
1006 .
Default([](
auto *) {
return std::nullopt; });
1022 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1024 Ops.push_back(
Op->getLiveInIRValue());
1027 auto FoldToIRValue = [&]() ->
Value * {
1029 if (OpcodeOrIID->first) {
1030 if (R.getNumOperands() != 2)
1032 unsigned ID = OpcodeOrIID->second;
1033 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1036 unsigned Opcode = OpcodeOrIID->second;
1045 return Folder.FoldSelect(
Ops[0],
Ops[1],
1048 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1050 case Instruction::Select:
1051 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1052 case Instruction::ICmp:
1053 case Instruction::FCmp:
1056 case Instruction::GetElementPtr: {
1059 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1069 case Instruction::ExtractElement:
1076 if (
Value *V = FoldToIRValue())
1077 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1083 VPlan *Plan = Def->getParent()->getPlan();
1090 return Def->replaceAllUsesWith(V);
1096 PredPHI->replaceAllUsesWith(
Op);
1104 if (TruncTy == ATy) {
1105 Def->replaceAllUsesWith(
A);
1114 : Instruction::ZExt;
1117 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1119 Ext->setUnderlyingValue(UnderlyingExt);
1121 Def->replaceAllUsesWith(Ext);
1123 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1124 Def->replaceAllUsesWith(Trunc);
1132 for (
VPUser *U :
A->users()) {
1134 for (
VPValue *VPV : R->definedValues())
1148 Def->replaceAllUsesWith(
X);
1149 Def->eraseFromParent();
1155 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1159 return Def->replaceAllUsesWith(
X);
1163 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1167 return Def->replaceAllUsesWith(Def->getOperand(1));
1174 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1175 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1176 return Def->replaceAllUsesWith(
1177 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1181 return Def->replaceAllUsesWith(Plan->
getFalse());
1184 return Def->replaceAllUsesWith(
X);
1189 Def->setOperand(0,
C);
1190 Def->setOperand(1,
Y);
1191 Def->setOperand(2,
X);
1200 X->hasMoreThanOneUniqueUser())
1201 return Def->replaceAllUsesWith(
1202 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1205 return Def->replaceAllUsesWith(
A);
1208 return Def->replaceAllUsesWith(
1209 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1213 return Def->replaceAllUsesWith(
A);
1228 R->setOperand(1,
Y);
1229 R->setOperand(2,
X);
1233 R->replaceAllUsesWith(Cmp);
1238 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1239 Cmp->setDebugLoc(Def->getDebugLoc());
1252 return Def->replaceAllUsesWith(NewCmp);
1260 return Def->replaceAllUsesWith(Def->getOperand(1));
1266 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1267 Def->replaceAllUsesWith(
X);
1277 Def->setOperand(1, Def->getOperand(0));
1278 Def->setOperand(0,
Y);
1283 if (Phi->getOperand(0) == Phi->getOperand(1))
1284 Phi->replaceAllUsesWith(Phi->getOperand(0));
1292 Def->replaceAllUsesWith(
1293 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1301 Def->replaceAllUsesWith(
1302 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1309 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1314 Def->replaceAllUsesWith(
1324 "broadcast operand must be single-scalar");
1325 Def->setOperand(0,
C);
1330 if (Phi->getNumOperands() == 1)
1331 Phi->replaceAllUsesWith(Phi->getOperand(0));
1344 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1345 Phi->getSingleUser() == Def) {
1346 Phi->setOperand(0,
Y);
1347 Def->replaceAllUsesWith(Phi);
1354 if (VecPtr->isFirstPart()) {
1355 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1364 Steps->replaceAllUsesWith(Steps->getOperand(0));
1372 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1374 return PhiR && PhiR->isInLoop();
1382 Def->replaceAllUsesWith(
A);
1392 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1393 return Def->replaceAllUsesWith(
A);
1396 if (Plan->
getUF() == 1 &&
1398 return Def->replaceAllUsesWith(
1428 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1435 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1436 true ,
nullptr , *RepR ,
1437 *RepR , RepR->getDebugLoc());
1438 Clone->insertBefore(RepOrWidenR);
1439 unsigned ExtractOpc =
1443 auto *Ext =
new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
1444 Ext->insertBefore(Clone);
1445 Clone->setOperand(0, Ext);
1446 RepR->eraseFromParent();
1454 !
all_of(RepOrWidenR->users(), [RepOrWidenR](
const VPUser *U) {
1455 if (auto *Store = dyn_cast<VPWidenStoreRecipe>(U)) {
1461 assert(RepOrWidenR != Store->getStoredValue() ||
1462 vputils::isSingleScalar(Store->getStoredValue()));
1467 unsigned Opcode = VPI->getOpcode();
1474 return U->usesScalars(RepOrWidenR);
1479 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1480 true ,
nullptr, *RepOrWidenR);
1481 Clone->insertBefore(RepOrWidenR);
1482 RepOrWidenR->replaceAllUsesWith(Clone);
1484 RepOrWidenR->eraseFromParent();
1520 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1521 UniqueValues.
insert(Blend->getIncomingValue(0));
1522 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1524 UniqueValues.
insert(Blend->getIncomingValue(
I));
1526 if (UniqueValues.
size() == 1) {
1527 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1528 Blend->eraseFromParent();
1532 if (Blend->isNormalized())
1538 unsigned StartIndex = 0;
1539 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1544 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1551 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1553 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1554 if (
I == StartIndex)
1556 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1557 OperandsWithMask.
push_back(Blend->getMask(
I));
1562 OperandsWithMask, Blend->getDebugLoc());
1563 NewBlend->insertBefore(&R);
1565 VPValue *DeadMask = Blend->getMask(StartIndex);
1567 Blend->eraseFromParent();
1572 if (NewBlend->getNumOperands() == 3 &&
1574 VPValue *Inc0 = NewBlend->getOperand(0);
1575 VPValue *Inc1 = NewBlend->getOperand(1);
1576 VPValue *OldMask = NewBlend->getOperand(2);
1577 NewBlend->setOperand(0, Inc1);
1578 NewBlend->setOperand(1, Inc0);
1579 NewBlend->setOperand(2, NewMask);
1606 APInt MaxVal = AlignedTC - 1;
1609 unsigned NewBitWidth =
1615 bool MadeChange =
false;
1624 if (!WideIV || !WideIV->isCanonical() ||
1625 WideIV->hasMoreThanOneUniqueUser() ||
1626 NewIVTy == WideIV->getScalarType())
1631 VPUser *SingleUser = WideIV->getSingleUser();
1640 WideIV->setStartValue(NewStart);
1642 WideIV->setStepValue(NewStep);
1648 Cmp->setOperand(1, NewBTC);
1662 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1664 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1677 const SCEV *VectorTripCount =
1682 "Trip count SCEV must be computable");
1702 auto *Term = &ExitingVPBB->
back();
1715 for (
unsigned Part = 0; Part < UF; ++Part) {
1723 Extracts[Part] = Ext;
1724 Ext->insertAfter(ALM);
1735 match(Phi->getBackedgeValue(),
1737 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1750 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1757 "Expected incoming values of Phi to be ActiveLaneMasks");
1762 EntryALM->setOperand(2, ALMMultiplier);
1763 LoopALM->setOperand(2, ALMMultiplier);
1767 ExtractFromALM(EntryALM, EntryExtracts);
1772 ExtractFromALM(LoopALM, LoopExtracts);
1774 Not->setOperand(0, LoopExtracts[0]);
1777 for (
unsigned Part = 0; Part < UF; ++Part) {
1778 Phis[Part]->setStartValue(EntryExtracts[Part]);
1779 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1792 auto *Term = &ExitingVPBB->
back();
1800 const SCEV *VectorTripCount =
1805 "Trip count SCEV must be computable");
1827 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1828 return R->isCanonical();
1829 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1830 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1836 R->getScalarType());
1838 HeaderR.eraseFromParent();
1842 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1843 HeaderR.eraseFromParent();
1852 B->setParent(
nullptr);
1861 {}, {}, Term->getDebugLoc());
1865 Term->eraseFromParent();
1892 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
1902 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1903 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1912 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1927 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1930 if (SinkCandidate == Previous)
1934 !Seen.
insert(SinkCandidate).second ||
1947 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1950 "only recipes with a single defined value expected");
1965 if (SinkCandidate == FOR)
1968 SinkCandidate->moveAfter(Previous);
1969 Previous = SinkCandidate;
1987 for (
VPUser *U : FOR->users()) {
1993 [&VPDT, HoistPoint](
VPUser *U) {
1994 auto *R = cast<VPRecipeBase>(U);
1995 return HoistPoint == R ||
1996 VPDT.properlyDominates(HoistPoint, R);
1998 "HoistPoint must dominate all users of FOR");
2000 auto NeedsHoisting = [HoistPoint, &VPDT,
2002 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2003 if (!HoistCandidate)
2008 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2009 "CFG in VPlan should still be flat, without replicate regions");
2011 if (!Visited.
insert(HoistCandidate).second)
2023 return HoistCandidate;
2032 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2035 "only recipes with a single defined value expected");
2047 if (
auto *R = NeedsHoisting(
Op))
2059 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2078 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2081 while (
auto *PrevPhi =
2083 assert(PrevPhi->getParent() == FOR->getParent());
2085 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2103 {FOR, FOR->getBackedgeValue()});
2105 FOR->replaceAllUsesWith(RecurSplice);
2108 RecurSplice->setOperand(0, FOR);
2119 RecurKind RK = PhiR->getRecurrenceKind();
2126 RecWithFlags->dropPoisonGeneratingFlags();
2132struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2134 return Def == getEmptyKey() || Def == getTombstoneKey();
2145 return GEP->getSourceElementType();
2148 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2149 [](
auto *
I) {
return I->getSourceElementType(); })
2150 .
Default([](
auto *) {
return nullptr; });
2154 static bool canHandle(
const VPSingleDefRecipe *Def) {
2163 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2164 C->second == Instruction::ExtractValue)))
2170 return !
Def->mayReadFromMemory();
2174 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2175 const VPlan *Plan =
Def->getParent()->getPlan();
2176 VPTypeAnalysis TypeInfo(*Plan);
2179 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2182 if (RFlags->hasPredicate())
2188 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2191 if (
L->getVPDefID() !=
R->getVPDefID() ||
2193 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2195 !
equal(
L->operands(),
R->operands()))
2198 "must have valid opcode info for both recipes");
2200 if (LFlags->hasPredicate() &&
2201 LFlags->getPredicate() !=
2207 const VPRegionBlock *RegionL =
L->getRegion();
2208 const VPRegionBlock *RegionR =
R->getRegion();
2211 L->getParent() !=
R->getParent())
2213 const VPlan *Plan =
L->getParent()->getPlan();
2214 VPTypeAnalysis TypeInfo(*Plan);
2215 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2230 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2234 if (!VPDT.
dominates(V->getParent(), VPBB))
2239 Def->replaceAllUsesWith(V);
2258 "Expected vector prehader's successor to be the vector loop region");
2265 return !Op->isDefinedOutsideLoopRegions();
2268 R.moveBefore(*Preheader, Preheader->
end());
2292 VPValue *ResultVPV = R.getVPSingleValue();
2294 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2295 if (!NewResSizeInBits)
2308 (void)OldResSizeInBits;
2316 VPW->dropPoisonGeneratingFlags();
2318 if (OldResSizeInBits != NewResSizeInBits &&
2323 Ext->insertAfter(&R);
2325 Ext->setOperand(0, ResultVPV);
2326 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2329 "Only ICmps should not need extending the result.");
2338 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2339 auto *
Op = R.getOperand(Idx);
2340 unsigned OpSizeInBits =
2342 if (OpSizeInBits == NewResSizeInBits)
2344 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2345 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2347 R.setOperand(Idx, ProcessedIter->second);
2355 Builder.setInsertPoint(&R);
2357 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2358 ProcessedIter->second = NewOp;
2359 R.setOperand(Idx, NewOp);
2374 assert(VPBB->getNumSuccessors() == 2 &&
2375 "Two successors expected for BranchOnCond");
2376 unsigned RemovedIdx;
2387 "There must be a single edge between VPBB and its successor");
2396 VPBB->back().eraseFromParent();
2458 VPValue *StartV = CanonicalIVPHI->getStartValue();
2460 auto *CanonicalIVIncrement =
2464 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2465 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2475 VPValue *TripCount, *IncrementValue;
2480 IncrementValue = CanonicalIVIncrement;
2486 IncrementValue = CanonicalIVPHI;
2490 auto *EntryIncrement = Builder.createOverflowingOp(
2498 {EntryIncrement, TC, ALMMultiplier},
DL,
2499 "active.lane.mask.entry");
2505 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2510 Builder.setInsertPoint(OriginalTerminator);
2511 auto *InLoopIncrement =
2513 {IncrementValue}, {
false,
false},
DL);
2515 {InLoopIncrement, TripCount, ALMMultiplier},
2516 DL,
"active.lane.mask.next");
2521 auto *NotMask = Builder.createNot(ALM,
DL);
2534 auto *FoundWidenCanonicalIVUser =
find_if(
2538 "Must have at most one VPWideCanonicalIVRecipe");
2539 if (FoundWidenCanonicalIVUser !=
2541 auto *WideCanonicalIV =
2543 WideCanonicalIVs.
push_back(WideCanonicalIV);
2551 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2552 WideCanonicalIVs.
push_back(WidenOriginalIV);
2558 for (
auto *Wide : WideCanonicalIVs) {
2564 assert(VPI->getOperand(0) == Wide &&
2565 "WidenCanonicalIV must be the first operand of the compare");
2566 assert(!HeaderMask &&
"Multiple header masks found?");
2574 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2577 UseActiveLaneMaskForControlFlow) &&
2578 "DataAndControlFlowWithoutRuntimeCheck implies "
2579 "UseActiveLaneMaskForControlFlow");
2582 auto *FoundWidenCanonicalIVUser =
find_if(
2584 assert(FoundWidenCanonicalIVUser &&
2585 "Must have widened canonical IV when tail folding!");
2587 auto *WideCanonicalIV =
2590 if (UseActiveLaneMaskForControlFlow) {
2600 nullptr,
"active.lane.mask");
2616 template <
typename OpTy>
bool match(OpTy *V)
const {
2627template <
typename Op0_t,
typename Op1_t>
2645 VPValue *Addr, *Mask, *EndPtr;
2648 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2650 EVLEndPtr->insertBefore(&CurRecipe);
2651 EVLEndPtr->setOperand(1, &EVL);
2655 if (
match(&CurRecipe,
2661 if (
match(&CurRecipe,
2666 AdjustEndPtr(EndPtr), EVL, Mask);
2679 AdjustEndPtr(EndPtr), EVL, Mask);
2682 if (Rdx->isConditional() &&
2687 if (Interleave->getMask() &&
2692 if (
match(&CurRecipe,
2701 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2716 "User of VF that we can't transform to EVL.");
2722 [&LoopRegion, &Plan](
VPUser *U) {
2724 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2725 m_Specific(&Plan.getVFxUF()))) ||
2726 isa<VPWidenPointerInductionRecipe>(U);
2728 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2729 "increment of the canonical induction.");
2749 MaxEVL = Builder.createScalarZExtOrTrunc(
2753 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2754 VPValue *PrevEVL = Builder.createScalarPhi(
2768 Intrinsic::experimental_vp_splice,
2769 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
2773 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2791 VPValue *EVLMask = Builder.createICmp(
2809 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2810 "New recipe must define the same number of values as the "
2815 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2816 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2828 R->eraseFromParent();
2878 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2886 VPValue *StartV = CanonicalIVPHI->getStartValue();
2890 EVLPhi->insertAfter(CanonicalIVPHI);
2891 VPBuilder Builder(Header, Header->getFirstNonPhi());
2894 VPPhi *AVLPhi = Builder.createScalarPhi(
2898 if (MaxSafeElements) {
2908 auto *CanonicalIVIncrement =
2910 Builder.setInsertPoint(CanonicalIVIncrement);
2914 OpVPEVL = Builder.createScalarZExtOrTrunc(
2915 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2917 auto *NextEVLIV = Builder.createOverflowingOp(
2918 Instruction::Add, {OpVPEVL, EVLPhi},
2919 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2920 CanonicalIVIncrement->hasNoSignedWrap()},
2921 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2922 EVLPhi->addOperand(NextEVLIV);
2924 VPValue *NextAVL = Builder.createOverflowingOp(
2925 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2933 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2934 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2948 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2959 [[maybe_unused]]
bool FoundAVL =
2962 assert(FoundAVL &&
"Didn't find AVL?");
2970 [[maybe_unused]]
bool FoundAVLNext =
2973 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
2984 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
2987 "Unexpected canonical iv");
2993 CanonicalIV->eraseFromParent();
3006 match(LatchExitingBr,
3009 "Unexpected terminator in EVL loop");
3016 LatchExitingBr->eraseFromParent();
3026 return R->getRegion() ||
3030 for (
const SCEV *Stride : StridesMap.
values()) {
3033 const APInt *StrideConst;
3034 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
3050 unsigned BW = U->getType()->getScalarSizeInBits();
3056 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3063 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3066 if (NewSCEV != ScevExpr) {
3068 ExpSCEV->replaceAllUsesWith(NewExp);
3077 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3081 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3086 while (!Worklist.
empty()) {
3089 if (!Visited.
insert(CurRec).second)
3111 RecWithFlags->isDisjoint()) {
3114 Instruction::Add, {
A,
B}, {
false,
false},
3115 RecWithFlags->getDebugLoc());
3116 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3117 RecWithFlags->replaceAllUsesWith(New);
3118 RecWithFlags->eraseFromParent();
3121 RecWithFlags->dropPoisonGeneratingFlags();
3126 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3127 "found instruction with poison generating flags not covered by "
3128 "VPRecipeWithIRFlags");
3133 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3145 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3146 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3147 if (AddrDef && WidenRec->isConsecutive() &&
3148 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3149 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3151 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3155 InterleaveRec->getInterleaveGroup();
3156 bool NeedPredication =
false;
3158 I < NumMembers; ++
I) {
3161 NeedPredication |= BlockNeedsPredication(Member->getParent());
3164 if (NeedPredication)
3165 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3177 if (InterleaveGroups.empty())
3184 for (
const auto *IG : InterleaveGroups) {
3190 StoredValues.
push_back(StoreR->getStoredValue());
3191 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3198 StoredValues.
push_back(StoreR->getStoredValue());
3202 bool NeedsMaskForGaps =
3203 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3204 (!StoredValues.
empty() && !IG->isFull());
3216 VPValue *Addr = Start->getAddr();
3225 assert(IG->getIndex(IRInsertPos) != 0 &&
3226 "index of insert position shouldn't be zero");
3230 IG->getIndex(IRInsertPos),
3234 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3240 if (IG->isReverse()) {
3243 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3244 ReversePtr->insertBefore(InsertPos);
3248 InsertPos->getMask(), NeedsMaskForGaps,
3249 InterleaveMD, InsertPos->getDebugLoc());
3250 VPIG->insertBefore(InsertPos);
3253 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3256 if (!Member->getType()->isVoidTy()) {
3315 AddOp = Instruction::Add;
3316 MulOp = Instruction::Mul;
3318 AddOp =
ID.getInductionOpcode();
3319 MulOp = Instruction::FMul;
3327 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3328 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3330 Flags.dropPoisonGeneratingFlags();
3339 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3344 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3345 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3351 WidePHI->insertBefore(WidenIVR);
3362 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3366 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3369 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3372 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3379 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3382 WidePHI->addOperand(
Next);
3410 VPlan *Plan = R->getParent()->getPlan();
3411 VPValue *Start = R->getStartValue();
3412 VPValue *Step = R->getStepValue();
3413 VPValue *VF = R->getVFValue();
3415 assert(R->getInductionDescriptor().getKind() ==
3417 "Not a pointer induction according to InductionDescriptor!");
3420 "Recipe should have been replaced");
3426 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3430 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3433 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3434 VPValue *PtrAdd = Builder.createNaryOp(
3436 R->replaceAllUsesWith(PtrAdd);
3441 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3443 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3446 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3455 if (!R->isReplicator())
3459 R->dissolveToCFGLoop();
3484 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3485 Select = Builder.createSelect(Blend->getMask(
I),
3486 Blend->getIncomingValue(
I),
Select,
3487 R.getDebugLoc(),
"predphi");
3488 Blend->replaceAllUsesWith(
Select);
3508 ? Instruction::UIToFP
3509 : Instruction::Trunc;
3510 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3516 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3521 Flags = {VPI->getFastMathFlags()};
3526 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3528 VPI->replaceAllUsesWith(VectorStep);
3534 R->eraseFromParent();
3547 "unsupported early exit VPBB");
3558 "Terminator must be be BranchOnCond");
3559 VPValue *CondOfEarlyExitingVPBB =
3561 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3562 ? CondOfEarlyExitingVPBB
3563 : Builder.createNot(CondOfEarlyExitingVPBB);
3580 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3585 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3586 if (ExitIRI->getNumOperands() != 1) {
3589 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3592 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3593 if (!IncomingFromEarlyExit->
isLiveIn()) {
3597 "first.active.lane");
3600 nullptr,
"early.exit.value");
3601 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3611 "Unexpected terminator");
3612 auto *IsLatchExitTaken =
3614 LatchExitingBranch->getOperand(1));
3615 auto *AnyExitTaken = Builder.createNaryOp(
3616 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3618 LatchExitingBranch->eraseFromParent();
3628 Type *RedTy = Ctx.Types.inferScalarType(Red);
3629 VPValue *VecOp = Red->getVecOp();
3632 auto IsExtendedRedValidAndClampRange =
3649 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3650 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3653 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3654 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3655 Red->getFastMathFlags(),
CostKind);
3657 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3665 IsExtendedRedValidAndClampRange(
3668 Ctx.Types.inferScalarType(
A)))
3688 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3691 Type *RedTy = Ctx.Types.inferScalarType(Red);
3694 auto IsMulAccValidAndClampRange =
3701 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3704 if (IsPartialReduction) {
3706 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3709 MulAccCost = Ctx.TTI.getPartialReductionCost(
3710 Opcode, SrcTy, SrcTy2, RedTy, VF,
3720 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3724 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3726 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3734 ExtCost += Ext0->computeCost(VF, Ctx);
3736 ExtCost += Ext1->computeCost(VF, Ctx);
3738 ExtCost += OuterExt->computeCost(VF, Ctx);
3740 return MulAccCost.
isValid() &&
3741 MulAccCost < ExtCost + MulCost + RedCost;
3746 VPValue *VecOp = Red->getVecOp();
3764 if (!ExtA || ExtB || !ValB->
isLiveIn())
3780 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
3781 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
3782 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
3783 Mul->setOperand(1, ExtB);
3793 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
3798 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
3805 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
3822 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
3831 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3832 Ext0->getOpcode() == Ext1->getOpcode() &&
3833 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
3835 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
3836 *Ext0, *Ext0, Ext0->getDebugLoc());
3837 NewExt0->insertBefore(Ext0);
3842 Ext->getResultType(),
nullptr, *Ext1,
3843 *Ext1, Ext1->getDebugLoc());
3846 Mul->setOperand(0, NewExt0);
3847 Mul->setOperand(1, NewExt1);
3848 Red->setOperand(1,
Mul);
3861 auto IP = std::next(Red->getIterator());
3862 auto *VPBB = Red->getParent();
3872 Red->replaceAllUsesWith(AbstractR);
3902 for (
VPValue *VPV : VPValues) {
3904 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3912 if (
User->usesScalars(VPV))
3915 HoistPoint = HoistBlock->
begin();
3919 "All users must be in the vector preheader or dominated by it");
3924 VPV->replaceUsesWithIf(Broadcast,
3925 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3926 return Broadcast != &U && !U.usesScalars(VPV);
3943 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
3944 RepR->getOpcode() != Instruction::Load)
3947 VPValue *Addr = RepR->getOperand(0);
3950 if (!
Loc.AATags.Scope)
3955 if (R.mayWriteToMemory()) {
3957 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
3965 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
3969 const AAMDNodes &LoadAA = LoadLoc.AATags;
3982 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
3983 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4017 auto *TCMO = Builder.createNaryOp(
4045 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4047 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4054 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4064 DefR->replaceUsesWithIf(
4065 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4067 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4081 for (
VPValue *Def : R.definedValues()) {
4094 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4096 return U->usesScalars(Def) &&
4099 if (
none_of(Def->users(), IsCandidateUnpackUser))
4106 Unpack->insertAfter(&R);
4107 Def->replaceUsesWithIf(Unpack,
4108 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4109 return IsCandidateUnpackUser(&U);
4119 bool RequiresScalarEpilogue) {
4121 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4140 if (TailByMasking) {
4141 TC = Builder.createNaryOp(
4143 {TC, Builder.createNaryOp(Instruction::Sub,
4154 Builder.createNaryOp(Instruction::URem, {TC, Step},
4163 if (RequiresScalarEpilogue) {
4165 "requiring scalar epilogue is not supported with fail folding");
4168 R = Builder.createSelect(IsZero, Step, R);
4171 VPValue *Res = Builder.createNaryOp(
4190 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4197 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4201 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4206 VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
4216 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4224 const SCEV *Expr = ExpSCEV->getSCEV();
4227 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4232 ExpSCEV->eraseFromParent();
4235 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4236 "after any VPIRInstructions");
4239 auto EI = Entry->begin();
4249 return ExpandedSCEVs;
4265 return Member0Op == OpV;
4267 return !W->getMask() && Member0Op == OpV;
4269 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4280 if (!InterleaveR || InterleaveR->
getMask())
4283 Type *GroupElementTy =
nullptr;
4287 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4288 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4295 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4296 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4305 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4306 GroupSize == VectorRegWidth;
4314 return RepR && RepR->isSingleScalar();
4321 auto *R = V->getDefiningRecipe();
4329 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4330 WideMember0->setOperand(
4339 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4341 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4342 false, {}, LoadGroup->getDebugLoc());
4343 L->insertBefore(LoadGroup);
4349 assert(RepR->isSingleScalar() &&
4351 "must be a single scalar load");
4352 NarrowedOps.
insert(RepR);
4357 VPValue *PtrOp = WideLoad->getAddr();
4359 PtrOp = VecPtr->getOperand(0);
4364 nullptr, {}, *WideLoad);
4365 N->insertBefore(WideLoad);
4395 if (R.mayWriteToMemory() && !InterleaveR)
4417 if (InterleaveR->getStoredValues().empty())
4422 auto *Member0 = InterleaveR->getStoredValues()[0];
4424 all_of(InterleaveR->getStoredValues(),
4425 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4433 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4436 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4437 return IR && IR->getInterleaveGroup()->isFull() &&
4438 IR->getVPValue(Op.index()) == Op.value();
4450 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4452 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4453 R->getNumOperands() > 2)
4456 [WideMember0, Idx =
I](
const auto &
P) {
4457 const auto &[OpIdx, OpV] = P;
4458 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4465 if (StoreGroups.
empty())
4471 for (
auto *StoreGroup : StoreGroups) {
4477 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
4478 false, {}, StoreGroup->getDebugLoc());
4479 S->insertBefore(StoreGroup);
4480 StoreGroup->eraseFromParent();
4498 Inc->setOperand(1, UF);
4517 "must have a BranchOnCond");
4520 if (VF.
isScalable() && VScaleForTuning.has_value())
4521 VectorStep *= *VScaleForTuning;
4522 assert(VectorStep > 0 &&
"trip count should not be zero");
4526 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
4538 if (WideIntOrFp && WideIntOrFp->getTruncInst())
4545 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
4548 Start, VectorTC, Step);
4561 {EndValue, Start}, WideIV->
getDebugLoc(),
"bc.resume.val");
4562 return ResumePhiRecipe;
4574 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4585 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
4588 IVEndValues[WideIVR] = ResumePhi->getOperand(0);
4589 ScalarPhiIRI->addOperand(ResumePhi);
4596 "should only skip truncated wide inductions");
4604 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
4606 "Cannot handle loops with uncountable early exits");
4610 "vector.recur.extract");
4611 StringRef Name = IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx";
4613 {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
4623 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
4624 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4636 "Cannot handle loops with uncountable early exits");
4708 for (
VPUser *U : FOR->users()) {
4722 {},
"vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLanePerPart, Op0_t > m_ExtractLastLanePerPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...