45 cl::desc(
"Enable use of wide get active lane mask instructions"));
50 GetIntOrFpInductionDescriptor,
57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
74 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
84 Phi, Start, Step, &Plan.
getVF(), *
II, Ingredient.getDebugLoc());
88 "only VPInstructions expected here");
93 *Load, Ingredient.getOperand(0),
nullptr ,
95 Ingredient.getDebugLoc());
98 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
99 nullptr ,
false ,
false ,
109 drop_end(Ingredient.operands()), CI->getType(),
115 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
126 "Only recpies with zero or one defined values expected");
127 Ingredient.eraseFromParent();
146 for (
auto &Recipe : *VPBB) {
150 WorkList.
insert({VPBB, Def});
156 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
159 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
160 if (SinkCandidate->
getParent() == SinkTo ||
165 if (!ScalarVFOnly && RepR->isSingleScalar())
170 bool NeedsDuplicating =
false;
175 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
176 SinkCandidate](
VPUser *U) {
178 if (UI->getParent() == SinkTo)
180 NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
183 return NeedsDuplicating &&
186 if (!
all_of(SinkCandidate->
users(), CanSinkWithUser))
189 if (NeedsDuplicating) {
193 if (
auto *SinkCandidateRepR =
199 nullptr , *SinkCandidateRepR);
202 Clone = SinkCandidate->
clone();
214 WorkList.
insert({SinkTo, Def});
224 if (!EntryBB || EntryBB->size() != 1 ||
234 if (EntryBB->getNumSuccessors() != 2)
239 if (!Succ0 || !Succ1)
242 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
244 if (Succ0->getSingleSuccessor() == Succ1)
246 if (Succ1->getSingleSuccessor() == Succ0)
263 if (!Region1->isReplicator())
265 auto *MiddleBasicBlock =
267 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
272 if (!Region2 || !Region2->isReplicator())
277 if (!Mask1 || Mask1 != Mask2)
280 assert(Mask1 && Mask2 &&
"both region must have conditions");
286 if (TransformedRegions.
contains(Region1))
293 if (!Then1 || !Then2)
313 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
319 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
320 Phi1ToMove.eraseFromParent();
323 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
337 TransformedRegions.
insert(Region1);
340 return !TransformedRegions.
empty();
347 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
348 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
349 auto *BlockInMask = PredRecipe->
getMask();
367 RecipeWithoutMask->getDebugLoc());
391 if (RepR->isPredicated())
410 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
424 if (!VPBB->getParent())
428 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
437 R.moveBefore(*PredVPBB, PredVPBB->
end());
439 auto *ParentRegion = VPBB->getParent();
440 if (ParentRegion && ParentRegion->getExiting() == VPBB)
441 ParentRegion->setExiting(PredVPBB);
442 for (
auto *Succ :
to_vector(VPBB->successors())) {
448 return !WorkList.
empty();
455 bool ShouldSimplify =
true;
456 while (ShouldSimplify) {
472 if (!
IV ||
IV->getTruncInst())
483 auto &Casts =
IV->getInductionDescriptor().getCastInsts();
487 for (
auto *U : FindMyCast->
users()) {
489 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
490 FoundUserCast = UserCast;
494 FindMyCast = FoundUserCast;
519 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
540 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
542 if (IsConditionalAssume)
545 if (R.mayHaveSideEffects())
549 return all_of(R.definedValues(),
550 [](
VPValue *V) { return V->getNumUsers() == 0; });
566 if (!PhiR || PhiR->getNumOperands() != 2 || PhiR->getNumUsers() != 1)
569 if (*PhiR->user_begin() !=
Incoming->getDefiningRecipe() ||
572 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
573 PhiR->eraseFromParent();
574 Incoming->getDefiningRecipe()->eraseFromParent();
589 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
599 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
605 if (ResultTy != StepTy) {
612 Builder.setInsertPoint(VecPreheader);
613 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
615 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
621 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
626 Users.insert_range(V->users());
628 return Users.takeVector();
662 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
663 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
671 Def->operands(),
true);
672 Clone->insertAfter(Def);
673 Def->replaceAllUsesWith(Clone);
685 VPValue *StepV = PtrIV->getOperand(1);
688 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
690 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
700 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
701 return U->usesScalars(WideIV);
707 Plan,
ID.getKind(),
ID.getInductionOpcode(),
709 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
710 WideIV->getDebugLoc(), Builder);
713 if (!HasOnlyVectorVFs)
714 WideIV->replaceAllUsesWith(Steps);
716 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
717 return U.usesScalars(WideIV);
732 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
737 if (!Def || Def->getNumOperands() != 2)
745 auto IsWideIVInc = [&]() {
746 auto &
ID = WideIV->getInductionDescriptor();
749 VPValue *IVStep = WideIV->getStepValue();
750 switch (
ID.getInductionOpcode()) {
751 case Instruction::Add:
753 case Instruction::FAdd:
756 case Instruction::FSub:
759 case Instruction::Sub: {
778 return IsWideIVInc() ? WideIV :
nullptr;
798 if (WideIntOrFp && WideIntOrFp->getTruncInst())
811 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
812 FirstActiveLaneType,
DL);
814 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
821 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
824 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
826 VPValue *Start = WideIV->getStartValue();
827 VPValue *Step = WideIV->getStepValue();
828 EndValue =
B.createDerivedIV(
830 Start, EndValue, Step);
850 assert(EndValue &&
"end value must have been pre-computed");
860 VPValue *Step = WideIV->getStepValue();
863 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
867 return B.createPtrAdd(EndValue,
868 B.createNaryOp(Instruction::Sub, {Zero, Step}),
872 const auto &
ID = WideIV->getInductionDescriptor();
873 return B.createNaryOp(
874 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
877 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
892 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
894 if (PredVPBB == MiddleVPBB)
896 ExitIRI->getOperand(Idx),
900 ExitIRI->getOperand(Idx), SE);
902 ExitIRI->setOperand(Idx, Escape);
919 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
922 ExpR->replaceAllUsesWith(V->second);
923 ExpR->eraseFromParent();
932 while (!WorkList.
empty()) {
934 if (!Seen.
insert(Cur).second)
942 R->eraseFromParent();
949static std::optional<std::pair<bool, unsigned>>
952 std::optional<std::pair<bool, unsigned>>>(R)
955 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
956 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
957 return std::make_pair(
true,
I->getVectorIntrinsicID());
959 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
963 return std::make_pair(
false,
966 .
Default([](
auto *) {
return std::nullopt; });
982 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
984 Ops.push_back(
Op->getLiveInIRValue());
987 auto FoldToIRValue = [&]() ->
Value * {
989 if (OpcodeOrIID->first) {
990 if (R.getNumOperands() != 2)
992 unsigned ID = OpcodeOrIID->second;
993 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
996 unsigned Opcode = OpcodeOrIID->second;
1005 return Folder.FoldSelect(
Ops[0],
Ops[1],
1008 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1010 case Instruction::Select:
1011 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1012 case Instruction::ICmp:
1013 case Instruction::FCmp:
1016 case Instruction::GetElementPtr: {
1019 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1029 case Instruction::ExtractElement:
1036 if (
Value *V = FoldToIRValue())
1037 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1043 VPlan *Plan = R.getParent()->getPlan();
1054 return Def->replaceAllUsesWith(V);
1060 PredPHI->replaceAllUsesWith(
Op);
1068 if (TruncTy == ATy) {
1069 Def->replaceAllUsesWith(
A);
1078 : Instruction::ZExt;
1081 if (
auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1083 Ext->setUnderlyingValue(UnderlyingExt);
1085 Def->replaceAllUsesWith(Ext);
1087 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1088 Def->replaceAllUsesWith(Trunc);
1096 for (
VPUser *U :
A->users()) {
1098 for (
VPValue *VPV : R->definedValues())
1112 Def->replaceAllUsesWith(
X);
1113 Def->eraseFromParent();
1119 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1123 return Def->replaceAllUsesWith(
X);
1127 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1131 return Def->replaceAllUsesWith(Def->getOperand(1));
1138 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1139 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1140 return Def->replaceAllUsesWith(
1141 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1145 return Def->replaceAllUsesWith(Plan->
getFalse());
1148 return Def->replaceAllUsesWith(
X);
1153 Def->setOperand(0,
C);
1154 Def->setOperand(1,
Y);
1155 Def->setOperand(2,
X);
1164 X->hasMoreThanOneUniqueUser())
1165 return Def->replaceAllUsesWith(
1166 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1169 return Def->replaceAllUsesWith(
A);
1172 return Def->replaceAllUsesWith(R.getOperand(0) ==
A ? R.getOperand(1)
1177 return Def->replaceAllUsesWith(
A);
1192 R->setOperand(1,
Y);
1193 R->setOperand(2,
X);
1197 R->replaceAllUsesWith(Cmp);
1202 if (!Cmp->getDebugLoc() && R.getDebugLoc())
1203 Cmp->setDebugLoc(R.getDebugLoc());
1213 return Def->replaceAllUsesWith(Def->getOperand(1));
1219 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1220 Def->replaceAllUsesWith(
X);
1230 Def->setOperand(1, Def->getOperand(0));
1231 Def->setOperand(0,
Y);
1236 if (Phi->getOperand(0) == Phi->getOperand(1))
1237 Def->replaceAllUsesWith(Phi->getOperand(0));
1245 Def->replaceAllUsesWith(
1246 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1254 Def->replaceAllUsesWith(
1255 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1262 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1267 if (Phi->getNumOperands() == 1)
1268 Phi->replaceAllUsesWith(Phi->getOperand(0));
1281 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1282 Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) {
1283 Phi->setOperand(0,
Y);
1284 Def->replaceAllUsesWith(Phi);
1291 if (VecPtr->isFirstPart()) {
1292 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1301 Steps->replaceAllUsesWith(Steps->getOperand(0));
1309 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1311 return PhiR && PhiR->isInLoop();
1319 Def->replaceAllUsesWith(
A);
1329 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1330 return Def->replaceAllUsesWith(
A);
1333 if (Plan->
getUF() == 1 &&
1335 return Def->replaceAllUsesWith(
1365 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1372 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1373 true ,
nullptr , *RepR );
1374 Clone->insertBefore(RepOrWidenR);
1375 unsigned ExtractOpc =
1379 auto *Ext =
new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
1380 Ext->insertBefore(Clone);
1381 Clone->setOperand(0, Ext);
1382 RepR->eraseFromParent();
1390 !
all_of(RepOrWidenR->users(), [RepOrWidenR](
const VPUser *U) {
1391 return U->usesScalars(RepOrWidenR) ||
1392 match(cast<VPRecipeBase>(U),
1393 m_CombineOr(m_ExtractLastElement(m_VPValue()),
1394 m_ExtractLastLanePerPart(m_VPValue())));
1399 RepOrWidenR->operands(),
1401 Clone->insertBefore(RepOrWidenR);
1402 RepOrWidenR->replaceAllUsesWith(Clone);
1438 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1439 UniqueValues.
insert(Blend->getIncomingValue(0));
1440 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1442 UniqueValues.
insert(Blend->getIncomingValue(
I));
1444 if (UniqueValues.
size() == 1) {
1445 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1446 Blend->eraseFromParent();
1450 if (Blend->isNormalized())
1456 unsigned StartIndex = 0;
1457 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1462 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1469 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1471 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1472 if (
I == StartIndex)
1474 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1475 OperandsWithMask.
push_back(Blend->getMask(
I));
1480 OperandsWithMask, Blend->getDebugLoc());
1481 NewBlend->insertBefore(&R);
1483 VPValue *DeadMask = Blend->getMask(StartIndex);
1485 Blend->eraseFromParent();
1490 if (NewBlend->getNumOperands() == 3 &&
1492 VPValue *Inc0 = NewBlend->getOperand(0);
1493 VPValue *Inc1 = NewBlend->getOperand(1);
1494 VPValue *OldMask = NewBlend->getOperand(2);
1495 NewBlend->setOperand(0, Inc1);
1496 NewBlend->setOperand(1, Inc0);
1497 NewBlend->setOperand(2, NewMask);
1524 APInt MaxVal = AlignedTC - 1;
1527 unsigned NewBitWidth =
1533 bool MadeChange =
false;
1542 if (!WideIV || !WideIV->isCanonical() ||
1543 WideIV->hasMoreThanOneUniqueUser() ||
1544 NewIVTy == WideIV->getScalarType())
1549 if (!
match(*WideIV->user_begin(),
1556 auto *NewStart = Plan.
getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
1557 WideIV->setStartValue(NewStart);
1558 auto *NewStep = Plan.
getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
1559 WideIV->setStepValue(NewStep);
1565 Cmp->setOperand(1, NewBTC);
1579 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1581 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1594 const SCEV *VectorTripCount =
1599 "Trip count SCEV must be computable");
1619 auto *Term = &ExitingVPBB->
back();
1632 for (
unsigned Part = 0; Part < UF; ++Part) {
1639 Extracts[Part] = Ext;
1640 Ext->insertAfter(ALM);
1651 match(Phi->getBackedgeValue(),
1653 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1666 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1673 "Expected incoming values of Phi to be ActiveLaneMasks");
1679 EntryALM->setOperand(2, ALMMultiplier);
1680 LoopALM->setOperand(2, ALMMultiplier);
1684 ExtractFromALM(EntryALM, EntryExtracts);
1689 ExtractFromALM(LoopALM, LoopExtracts);
1691 Not->setOperand(0, LoopExtracts[0]);
1694 for (
unsigned Part = 0; Part < UF; ++Part) {
1695 Phis[Part]->setStartValue(EntryExtracts[Part]);
1696 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1709 auto *Term = &ExitingVPBB->
back();
1718 const SCEV *TripCount =
1721 "Trip count SCEV must be computable");
1724 if (TripCount->
isZero() ||
1744 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1745 return R->isCanonical();
1746 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1747 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1753 R->getScalarType());
1755 HeaderR.eraseFromParent();
1759 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1760 HeaderR.eraseFromParent();
1769 B->setParent(
nullptr);
1778 Term->getDebugLoc());
1782 Term->eraseFromParent();
1790 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1791 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1799 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1817 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1820 if (SinkCandidate == Previous)
1824 !Seen.
insert(SinkCandidate).second ||
1828 if (SinkCandidate->mayHaveSideEffects())
1837 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1840 "only recipes with a single defined value expected");
1855 if (SinkCandidate == FOR)
1858 SinkCandidate->moveAfter(Previous);
1859 Previous = SinkCandidate;
1877 for (
VPUser *U : FOR->users()) {
1883 [&VPDT, HoistPoint](
VPUser *U) {
1884 auto *R = cast<VPRecipeBase>(U);
1885 return HoistPoint == R ||
1886 VPDT.properlyDominates(HoistPoint, R);
1888 "HoistPoint must dominate all users of FOR");
1890 auto NeedsHoisting = [HoistPoint, &VPDT,
1892 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
1893 if (!HoistCandidate)
1898 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
1899 "CFG in VPlan should still be flat, without replicate regions");
1901 if (!Visited.
insert(HoistCandidate).second)
1913 return HoistCandidate;
1918 return !HoistCandidate->mayHaveSideEffects();
1927 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
1930 "only recipes with a single defined value expected");
1931 if (!CanHoist(Current))
1942 if (
auto *R = NeedsHoisting(
Op))
1954 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
1973 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
1976 while (
auto *PrevPhi =
1978 assert(PrevPhi->getParent() == FOR->getParent());
1980 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
1998 {FOR, FOR->getBackedgeValue()});
2000 FOR->replaceAllUsesWith(RecurSplice);
2003 RecurSplice->setOperand(0, FOR);
2014 RecurKind RK = PhiR->getRecurrenceKind();
2021 RecWithFlags->dropPoisonGeneratingFlags();
2027struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2029 return Def == getEmptyKey() || Def == getTombstoneKey();
2040 return GEP->getSourceElementType();
2043 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2044 [](
auto *
I) {
return I->getSourceElementType(); })
2045 .
Default([](
auto *) {
return nullptr; });
2049 static bool canHandle(
const VPSingleDefRecipe *Def) {
2058 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2059 C->second == Instruction::ExtractValue)))
2065 return !
Def->mayReadFromMemory();
2069 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2070 const VPlan *Plan =
Def->getParent()->getPlan();
2071 VPTypeAnalysis TypeInfo(*Plan);
2074 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2077 if (RFlags->hasPredicate())
2083 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2086 if (
L->getVPDefID() !=
R->getVPDefID() ||
2088 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2090 !
equal(
L->operands(),
R->operands()))
2093 "must have valid opcode info for both recipes");
2095 if (LFlags->hasPredicate() &&
2096 LFlags->getPredicate() !=
2102 const VPRegionBlock *RegionL =
L->getRegion();
2103 const VPRegionBlock *RegionR =
R->getRegion();
2106 L->getParent() !=
R->getParent())
2108 const VPlan *Plan =
L->getParent()->getPlan();
2109 VPTypeAnalysis TypeInfo(*Plan);
2110 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2125 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2129 if (!VPDT.
dominates(V->getParent(), VPBB))
2134 Def->replaceAllUsesWith(V);
2156 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
2161 return RepR && RepR->getOpcode() == Instruction::Alloca;
2171 "Expected vector prehader's successor to be the vector loop region");
2175 if (CannotHoistRecipe(R))
2178 return !Op->isDefinedOutsideLoopRegions();
2181 R.moveBefore(*Preheader, Preheader->
end());
2205 VPValue *ResultVPV = R.getVPSingleValue();
2207 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2208 if (!NewResSizeInBits)
2221 (void)OldResSizeInBits;
2229 VPW->dropPoisonGeneratingFlags();
2231 if (OldResSizeInBits != NewResSizeInBits &&
2236 Ext->insertAfter(&R);
2238 Ext->setOperand(0, ResultVPV);
2239 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2242 "Only ICmps should not need extending the result.");
2251 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2252 auto *
Op = R.getOperand(Idx);
2253 unsigned OpSizeInBits =
2255 if (OpSizeInBits == NewResSizeInBits)
2257 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2258 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2260 R.setOperand(Idx, ProcessedIter->second);
2268 Builder.setInsertPoint(&R);
2270 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2271 ProcessedIter->second = NewOp;
2272 R.setOperand(Idx, NewOp);
2287 assert(VPBB->getNumSuccessors() == 2 &&
2288 "Two successors expected for BranchOnCond");
2289 unsigned RemovedIdx;
2300 "There must be a single edge between VPBB and its successor");
2309 VPBB->back().eraseFromParent();
2370 VPValue *StartV = CanonicalIVPHI->getStartValue();
2372 auto *CanonicalIVIncrement =
2376 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2377 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2387 VPValue *TripCount, *IncrementValue;
2392 IncrementValue = CanonicalIVIncrement;
2398 IncrementValue = CanonicalIVPHI;
2402 auto *EntryIncrement = Builder.createOverflowingOp(
2410 {EntryIncrement, TC, ALMMultiplier},
DL,
2411 "active.lane.mask.entry");
2417 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2422 Builder.setInsertPoint(OriginalTerminator);
2423 auto *InLoopIncrement =
2425 {IncrementValue}, {
false,
false},
DL);
2427 {InLoopIncrement, TripCount, ALMMultiplier},
2428 DL,
"active.lane.mask.next");
2433 auto *NotMask = Builder.createNot(ALM,
DL);
2446 auto *FoundWidenCanonicalIVUser =
find_if(
2450 "Must have at most one VPWideCanonicalIVRecipe");
2451 if (FoundWidenCanonicalIVUser !=
2453 auto *WideCanonicalIV =
2455 WideCanonicalIVs.
push_back(WideCanonicalIV);
2463 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2464 WideCanonicalIVs.
push_back(WidenOriginalIV);
2470 for (
auto *Wide : WideCanonicalIVs) {
2476 assert(VPI->getOperand(0) == Wide &&
2477 "WidenCanonicalIV must be the first operand of the compare");
2478 assert(!HeaderMask &&
"Multiple header masks found?");
2486 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2489 UseActiveLaneMaskForControlFlow) &&
2490 "DataAndControlFlowWithoutRuntimeCheck implies "
2491 "UseActiveLaneMaskForControlFlow");
2494 auto *FoundWidenCanonicalIVUser =
find_if(
2496 assert(FoundWidenCanonicalIVUser &&
2497 "Must have widened canonical IV when tail folding!");
2499 auto *WideCanonicalIV =
2502 if (UseActiveLaneMaskForControlFlow) {
2512 nullptr,
"active.lane.mask");
2538 assert(OrigMask &&
"Unmasked recipe when folding tail");
2543 return HeaderMask == OrigMask ? nullptr : OrigMask;
2547 auto GetNewAddr = [&CurRecipe, &EVL](
VPValue *Addr) ->
VPValue * {
2551 assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() &&
2552 "VPVectorEndPointerRecipe with non-VF VF operand?");
2556 return cast<VPWidenMemoryRecipe>(U)->isReverse();
2558 "VPVectorEndPointRecipe not used by reversed widened memory recipe?");
2567 VPValue *NewMask = GetNewMask(L->getMask());
2568 VPValue *NewAddr = GetNewAddr(L->getAddr());
2577 VPValue *NewMask = GetNewMask(
IR->getMask());
2581 VPValue *NewMask = GetNewMask(Red->getCondOp());
2596 Intrinsic::vp_merge, {&AllOneMask,
LHS,
RHS, &EVL},
2612 "User of VF that we can't transform to EVL.");
2618 [&LoopRegion, &Plan](
VPUser *U) {
2620 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2621 m_Specific(&Plan.getVFxUF()))) ||
2622 isa<VPWidenPointerInductionRecipe>(U);
2624 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2625 "increment of the canonical induction.");
2645 MaxEVL = Builder.createScalarZExtOrTrunc(
2649 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2650 VPValue *PrevEVL = Builder.createScalarPhi(
2664 Intrinsic::experimental_vp_splice,
2665 {V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
2668 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2686 VPValue *EVLMask = Builder.createICmp(
2704 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2705 "New recipe must define the same number of values as the "
2710 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2711 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2723 R->eraseFromParent();
2773 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2781 VPValue *StartV = CanonicalIVPHI->getStartValue();
2785 EVLPhi->insertAfter(CanonicalIVPHI);
2786 VPBuilder Builder(Header, Header->getFirstNonPhi());
2789 VPPhi *AVLPhi = Builder.createScalarPhi(
2793 if (MaxSafeElements) {
2796 Plan.
getOrAddLiveIn(ConstantInt::get(CanIVTy, *MaxSafeElements));
2804 auto *CanonicalIVIncrement =
2806 Builder.setInsertPoint(CanonicalIVIncrement);
2810 OpVPEVL = Builder.createScalarZExtOrTrunc(
2811 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2813 auto *NextEVLIV = Builder.createOverflowingOp(
2814 Instruction::Add, {OpVPEVL, EVLPhi},
2815 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2816 CanonicalIVIncrement->hasNoSignedWrap()},
2817 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2818 EVLPhi->addOperand(NextEVLIV);
2820 VPValue *NextAVL = Builder.createOverflowingOp(
2821 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2829 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2830 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2844 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2855 [[maybe_unused]]
bool FoundAVL =
2858 assert(FoundAVL &&
"Didn't find AVL?");
2866 [[maybe_unused]]
bool FoundAVLNext =
2869 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
2880 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
2883 "Unexpected canonical iv");
2889 CanonicalIV->eraseFromParent();
2902 match(LatchExitingBr,
2905 "Unexpected terminator in EVL loop");
2913 LatchExitingBr->eraseFromParent();
2923 return R->getRegion() ||
2927 for (
const SCEV *Stride : StridesMap.
values()) {
2930 const APInt *StrideConst;
2931 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
2936 Plan.
getOrAddLiveIn(ConstantInt::get(Stride->getType(), *StrideConst));
2948 unsigned BW = U->getType()->getScalarSizeInBits();
2954 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
2961 const SCEV *ScevExpr = ExpSCEV->getSCEV();
2964 if (NewSCEV != ScevExpr) {
2966 ExpSCEV->replaceAllUsesWith(NewExp);
2975 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
2979 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
2984 while (!Worklist.
empty()) {
2987 if (!Visited.
insert(CurRec).second)
3009 RecWithFlags->isDisjoint()) {
3012 Instruction::Add, {
A,
B}, {
false,
false},
3013 RecWithFlags->getDebugLoc());
3014 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3015 RecWithFlags->replaceAllUsesWith(New);
3016 RecWithFlags->eraseFromParent();
3019 RecWithFlags->dropPoisonGeneratingFlags();
3024 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3025 "found instruction with poison generating flags not covered by "
3026 "VPRecipeWithIRFlags");
3031 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3043 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3044 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3045 if (AddrDef && WidenRec->isConsecutive() &&
3046 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3047 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3049 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3053 InterleaveRec->getInterleaveGroup();
3054 bool NeedPredication =
false;
3056 I < NumMembers; ++
I) {
3059 NeedPredication |= BlockNeedsPredication(Member->getParent());
3062 if (NeedPredication)
3063 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3075 if (InterleaveGroups.empty())
3082 for (
const auto *IG : InterleaveGroups) {
3088 StoredValues.
push_back(StoreR->getStoredValue());
3089 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3096 StoredValues.
push_back(StoreR->getStoredValue());
3100 bool NeedsMaskForGaps =
3101 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3102 (!StoredValues.
empty() && !IG->isFull());
3114 VPValue *Addr = Start->getAddr();
3123 assert(IG->getIndex(IRInsertPos) != 0 &&
3124 "index of insert position shouldn't be zero");
3128 IG->getIndex(IRInsertPos),
3133 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3139 if (IG->isReverse()) {
3142 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3143 ReversePtr->insertBefore(InsertPos);
3147 InsertPos->getMask(), NeedsMaskForGaps,
3148 InterleaveMD, InsertPos->getDebugLoc());
3149 VPIG->insertBefore(InsertPos);
3152 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3155 if (!Member->getType()->isVoidTy()) {
3216 AddOp = Instruction::Add;
3217 MulOp = Instruction::Mul;
3219 AddOp =
ID.getInductionOpcode();
3220 MulOp = Instruction::FMul;
3221 Flags =
ID.getInductionBinOp()->getFastMathFlags();
3229 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3230 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3239 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3244 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3245 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3251 WidePHI->insertBefore(WidenIVR);
3262 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3266 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3269 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3272 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3279 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3282 WidePHI->addOperand(
Next);
3310 VPlan *Plan = R->getParent()->getPlan();
3311 VPValue *Start = R->getStartValue();
3312 VPValue *Step = R->getStepValue();
3313 VPValue *VF = R->getVFValue();
3315 assert(R->getInductionDescriptor().getKind() ==
3317 "Not a pointer induction according to InductionDescriptor!");
3320 "Recipe should have been replaced");
3326 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3330 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3333 Offset = Builder.createNaryOp(Instruction::Mul, {
Offset, Step});
3334 VPValue *PtrAdd = Builder.createNaryOp(
3336 R->replaceAllUsesWith(PtrAdd);
3341 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3343 VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
3346 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3355 if (!R->isReplicator())
3359 R->dissolveToCFGLoop();
3384 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3385 Select = Builder.createSelect(Blend->getMask(
I),
3386 Blend->getIncomingValue(
I),
Select,
3387 R.getDebugLoc(),
"predphi");
3388 Blend->replaceAllUsesWith(
Select);
3408 ? Instruction::UIToFP
3409 : Instruction::Trunc;
3410 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3416 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3421 Flags = {VPI->getFastMathFlags()};
3426 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3428 VPI->replaceAllUsesWith(VectorStep);
3434 R->eraseFromParent();
3447 "unsupported early exit VPBB");
3458 "Terminator must be be BranchOnCond");
3459 VPValue *CondOfEarlyExitingVPBB =
3461 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3462 ? CondOfEarlyExitingVPBB
3463 : Builder.createNot(CondOfEarlyExitingVPBB);
3480 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3485 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3486 if (ExitIRI->getNumOperands() != 1) {
3489 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3492 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3493 if (!IncomingFromEarlyExit->
isLiveIn()) {
3497 "first.active.lane");
3500 nullptr,
"early.exit.value");
3501 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3511 "Unexpected terminator");
3512 auto *IsLatchExitTaken =
3514 LatchExitingBranch->getOperand(1));
3515 auto *AnyExitTaken = Builder.createNaryOp(
3516 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3518 LatchExitingBranch->eraseFromParent();
3528 Type *RedTy = Ctx.Types.inferScalarType(Red);
3529 VPValue *VecOp = Red->getVecOp();
3532 auto IsExtendedRedValidAndClampRange =
3549 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3550 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3553 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3554 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3555 Red->getFastMathFlags(),
CostKind);
3557 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3565 IsExtendedRedValidAndClampRange(
3568 Ctx.Types.inferScalarType(
A)))
3588 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3591 Type *RedTy = Ctx.Types.inferScalarType(Red);
3594 auto IsMulAccValidAndClampRange =
3601 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3604 if (IsPartialReduction) {
3606 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3609 MulAccCost = Ctx.TTI.getPartialReductionCost(
3610 Opcode, SrcTy, SrcTy2, RedTy, VF,
3620 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3624 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3626 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3634 ExtCost += Ext0->computeCost(VF, Ctx);
3636 ExtCost += Ext1->computeCost(VF, Ctx);
3638 ExtCost += OuterExt->computeCost(VF, Ctx);
3640 return MulAccCost.
isValid() &&
3641 MulAccCost < ExtCost + MulCost + RedCost;
3646 VPValue *VecOp = Red->getVecOp();
3666 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
3674 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
3692 if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3693 Ext0->getOpcode() == Ext1->getOpcode() &&
3694 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
3696 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
3697 *Ext0, Ext0->getDebugLoc());
3698 NewExt0->insertBefore(Ext0);
3703 Ext->getResultType(), *Ext1, *Ext1,
3704 Ext1->getDebugLoc());
3707 Mul->setOperand(0, NewExt0);
3708 Mul->setOperand(1, NewExt1);
3709 Red->setOperand(1,
Mul);
3722 auto IP = std::next(Red->getIterator());
3723 auto *VPBB = Red->getParent();
3733 Red->replaceAllUsesWith(AbstractR);
3763 for (
VPValue *VPV : VPValues) {
3765 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3773 if (
User->usesScalars(VPV))
3776 HoistPoint = HoistBlock->
begin();
3780 "All users must be in the vector preheader or dominated by it");
3785 VPV->replaceUsesWithIf(Broadcast,
3786 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3787 return Broadcast != &U && !U.usesScalars(VPV);
3795 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
3796 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
3830 auto *TCMO = Builder.createNaryOp(
3859 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
3861 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
3868 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
3878 DefR->replaceUsesWithIf(
3879 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3881 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
3895 for (
VPValue *Def : R.definedValues()) {
3908 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
3910 return U->usesScalars(Def) &&
3913 if (
none_of(Def->users(), IsCandidateUnpackUser))
3920 Unpack->insertAfter(&R);
3921 Def->replaceUsesWithIf(Unpack,
3922 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
3923 return IsCandidateUnpackUser(&U);
3933 bool RequiresScalarEpilogue) {
3935 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
3954 if (TailByMasking) {
3955 TC = Builder.createNaryOp(
3957 {TC, Builder.createNaryOp(
3969 Builder.createNaryOp(Instruction::URem, {TC, Step},
3978 if (RequiresScalarEpilogue) {
3980 "requiring scalar epilogue is not supported with fail folding");
3981 VPValue *IsZero = Builder.createICmp(
3983 R = Builder.createSelect(IsZero, Step, R);
3986 VPValue *Res = Builder.createNaryOp(
4005 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4012 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4016 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4021 VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
4031 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4039 const SCEV *Expr = ExpSCEV->getSCEV();
4042 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4047 ExpSCEV->eraseFromParent();
4050 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4051 "after any VPIRInstructions");
4054 auto EI = Entry->begin();
4064 return ExpandedSCEVs;
4084 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4093 unsigned VectorRegWidth) {
4094 if (!InterleaveR || InterleaveR->
getMask())
4097 Type *GroupElementTy =
nullptr;
4101 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4102 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4109 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4110 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4117 return IG->getFactor() == VF && IG->getNumMembers() == VF &&
4118 GroupSize == VectorRegWidth;
4126 return RepR && RepR->isSingleScalar();
4130 unsigned VectorRegWidth) {
4155 if (R.mayWriteToMemory() && !InterleaveR)
4177 if (InterleaveR->getStoredValues().empty())
4182 auto *Member0 = InterleaveR->getStoredValues()[0];
4184 all_of(InterleaveR->getStoredValues(),
4185 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4193 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4196 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4197 return IR && IR->getInterleaveGroup()->isFull() &&
4198 IR->getVPValue(Op.index()) == Op.value();
4207 InterleaveR->getStoredValues()[0]->getDefiningRecipe());
4210 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4212 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4213 R->getNumOperands() > 2)
4216 [WideMember0, Idx =
I](
const auto &
P) {
4217 const auto &[OpIdx, OpV] = P;
4218 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4225 if (StoreGroups.
empty())
4231 auto *R = V->getDefiningRecipe();
4238 *
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
4239 LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4240 false, {}, LoadGroup->getDebugLoc());
4241 L->insertBefore(LoadGroup);
4247 assert(RepR->isSingleScalar() &&
4249 "must be a single scalar load");
4250 NarrowedOps.
insert(RepR);
4255 VPValue *PtrOp = WideLoad->getAddr();
4257 PtrOp = VecPtr->getOperand(0);
4262 nullptr, *WideLoad);
4263 N->insertBefore(WideLoad);
4269 for (
auto *StoreGroup : StoreGroups) {
4271 VPValue *Member0 = StoreGroup->getStoredValues()[0];
4274 }
else if (
auto *WideMember0 =
4276 for (
unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
4277 WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx)));
4280 Res = NarrowOp(Member0);
4285 StoreGroup->getAddr(), Res,
nullptr,
true,
4286 false, {}, StoreGroup->getDebugLoc());
4287 S->insertBefore(StoreGroup);
4288 StoreGroup->eraseFromParent();
4298 ConstantInt::get(CanIV->getScalarType(), 1 * Plan.
getUF()));
4306 Inc->setOperand(1, UF);
4308 Plan.
getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
4325 "must have a BranchOnCond");
4328 if (VF.
isScalable() && VScaleForTuning.has_value())
4329 VectorStep *= *VScaleForTuning;
4330 assert(VectorStep > 0 &&
"trip count should not be zero");
4334 MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
4346 if (WideIntOrFp && WideIntOrFp->getTruncInst())
4353 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
4356 Start, VectorTC, Step);
4369 {EndValue, Start}, WideIV->
getDebugLoc(),
"bc.resume.val");
4370 return ResumePhiRecipe;
4382 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4393 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
4396 IVEndValues[WideIVR] = ResumePhi->getOperand(0);
4397 ScalarPhiIRI->addOperand(ResumePhi);
4404 "should only skip truncated wide inductions");
4412 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
4414 "Cannot handle loops with uncountable early exits");
4418 "vector.recur.extract");
4419 StringRef Name = IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx";
4421 {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
4431 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
4432 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4444 "Cannot handle loops with uncountable early exits");
4516 for (
VPUser *U : FOR->users()) {
4530 {},
"vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
VPValue * getMask() const
Return the mask used by this recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLanePerPart, Op0_t > m_ExtractLastLanePerPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
const SCEV * getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE)
Return the SCEV expression for V.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...