64 "llvm.loop.vectorize.followup_vectorized";
66 "llvm.loop.vectorize.followup_epilogue";
75 cl::desc(
"Use dot format instead of plain text when dumping VPlans"));
77#define DEBUG_TYPE "loop-vectorize"
79#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
93 return Builder.CreateSub(
getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
96 return Builder.getInt64(Lane);
101#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
112 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
125bool VPRecipeValue::isDefinedBy(
const VPDef *
D)
const {
135 return MultiDef->getDef();
166 "trying to delete a VPRecipeValue with remaining users");
171 assert(Def &&
"VPSingleDefValue requires a defining recipe");
172 Def->addDefinedValue(
this);
181 assert(Def &&
"VPMultiDefValue requires a defining recipe");
182 Def->addDefinedValue(
this);
200 for (
unsigned i = 0; i < WorkList.
size(); i++) {
201 T *Current = WorkList[i];
202 if (!Current->hasPredecessors())
204 auto &Predecessors = Current->getPredecessors();
231 assert(ParentPlan->
getEntry() ==
this &&
"Can only set plan on its entry.");
251 if (!Successors.empty() || !Parent)
253 assert(Parent->getExiting() ==
this &&
254 "Block w/o successors not the exiting block of its parent.");
255 return Parent->getEnclosingBlockWithSuccessors();
259 if (!Predecessors.empty() || !Parent)
261 assert(Parent->getEntry() ==
this &&
262 "Block w/o predecessors not the entry of its parent.");
263 return Parent->getEnclosingBlockWithPredecessors();
268 while (It !=
end() && It->isPhi())
283 return Def->getUnderlyingValue();
290 return Data.VPV2Scalars[Def][0];
301 auto *VecPart =
Data.VPV2Vector[Def];
302 if (!VecPart->getType()->isVectorTy()) {
308 auto *Extract =
Builder.CreateExtractElement(VecPart, LaneV);
318 Data.VPV2Scalars[Def].size() == 1)) &&
319 "Trying to access a single scalar per part but has multiple scalars "
326 return Data.VPV2Vector[Def];
328 auto GetBroadcastInstrs = [
this](
Value *V) {
337 Value *IRV = Def->getLiveInIRValue();
338 Value *
B = GetBroadcastInstrs(IRV);
347 set(Def, ScalarValue);
352 VPLane LastLane(IsSingleScalar ? 0 :
VF.getFixedValue() - 1);
357 assert(IsSingleScalar &&
"must be a single-scalar at this point");
364 ? LastInst->getParent()->getFirstNonPHIIt()
366 Builder.SetInsertPoint(&*NewIP);
367 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
368 set(Def, VectorValue);
378 Builder.GetInsertBlock()
380 ->shouldEmitDebugInfoForProfiling() &&
383 unsigned UF = Plan->getConcreteUF();
387 Builder.SetCurrentDebugLocation(*NewDIL);
390 << DIL->getFilename() <<
" Line: " << DIL->getLine());
392 Builder.SetCurrentDebugLocation(
DL);
402 for (
unsigned I = 0, E = StructTy->getNumElements();
I != E;
I++) {
403 Value *ScalarValue =
Builder.CreateExtractValue(ScalarInst,
I);
404 Value *VectorValue =
Builder.CreateExtractValue(WideValue,
I);
406 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
407 WideValue =
Builder.CreateInsertValue(WideValue, VectorValue,
I);
410 WideValue =
Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
416 auto &
CFG = State.CFG;
428 auto &
CFG = State.CFG;
433 Loop *ParentLoop = State.CurrentParentLoop;
438 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB :
this;
439 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
440 ParentLoop = State.LI->getLoopFor(
444 if (ParentLoop && !State.LI->getLoopFor(NewBB))
457 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
459 assert(
CFG.VPBB2IRBB.contains(PredVPBB) &&
460 "Predecessor basic-block not found building successor.");
466 assert(PredVPSuccessors.size() == 1 &&
467 "Predecessor ending w/o branch must have single successor.");
468 DebugLoc DL = PredBBTerminator->getDebugLoc();
469 PredBBTerminator->eraseFromParent();
473 UBI->setSuccessor(NewBB);
482 unsigned idx = PredVPSuccessors.front() ==
this ? 0 : 1;
484 assert((!TermBr->getSuccessor(idx) ||
486 (TermBr->getSuccessor(idx) == NewBB ||
487 PredVPBlock ==
getPlan()->getEntry()))) &&
488 "Trying to reset an existing successor block.");
489 TermBr->setSuccessor(idx, NewBB);
497 "VPIRBasicBlock can have at most two successors at the moment!");
500 IRBB->moveAfter(State->CFG.PrevBB);
501 State->Builder.SetInsertPoint(IRBB->getTerminator());
502 State->CFG.PrevBB = IRBB;
503 State->CFG.VPBB2IRBB[
this] = IRBB;
508 auto *Br = State->Builder.CreateBr(IRBB);
509 Br->setOperand(0,
nullptr);
510 IRBB->getTerminator()->eraseFromParent();
514 "other blocks must be terminated by a branch");
530 Loop *PrevParentLoop = State->CurrentParentLoop;
531 State->CurrentParentLoop = State->LI->AllocateLoop();
538 State->LI->addTopLevelLoop(State->CurrentParentLoop);
542 BasicBlock *NewBB = createEmptyBasicBlock(*State);
544 State->Builder.SetInsertPoint(NewBB);
547 State->Builder.SetInsertPoint(Terminator);
549 State->CFG.PrevBB = NewBB;
550 State->CFG.VPBB2IRBB[
this] = NewBB;
558 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
570 <<
" in BB: " << BB->
getName() <<
'\n');
572 State->CFG.PrevVPBB =
this;
575 State->setDebugLocFrom(Recipe.getDebugLoc());
576 Recipe.execute(*State);
583 assert((SplitAt ==
end() || SplitAt->getParent() ==
this) &&
584 "can only split at a position in the same block");
592 if (ParentRegion && ParentRegion->getExiting() ==
this)
606 if (
P &&
P->isReplicator()) {
610 assert((!
P || !
P->isReplicator()) &&
"unexpected nested replicate regions");
627 "block with multiple successors doesn't have a recipe as terminator");
632 [[maybe_unused]]
bool IsSwitch =
642 "block with multiple successors not terminated by "
643 "conditional branch nor switch recipe");
649 assert((IsSwitch || IsBranchOnTwoConds) &&
650 "block with more than 2 successors not terminated by a switch or "
651 "branch-on-two-conds recipe");
657 "block with 0 or 1 successors terminated by conditional branch recipe");
677#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685 O << Indent <<
"No successors\n";
687 O << Indent <<
"Successor(s): ";
690 O << LS << Succ->getName();
697 O << Indent <<
getName() <<
":\n";
699 auto RecipeIndent = Indent +
" ";
709std::pair<VPBlockBase *, VPBlockBase *>
717 Old2NewVPBlocks[BB] = NewBB;
718 if (InRegion && BB->getNumSuccessors() == 0) {
719 assert(!Exiting &&
"Multiple exiting blocks?");
723 assert((!InRegion || Exiting) &&
"regions must have a single exiting block");
730 NewPreds.
push_back(Old2NewVPBlocks[Pred]);
735 NewSuccs.
push_back(Old2NewVPBlocks[Succ]);
743 for (
const auto &[OldBB, NewBB] :
746 for (
const auto &[OldPred, NewPred] :
747 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
748 assert(NewPred == Old2NewVPBlocks[OldPred] &&
"Different predecessors");
750 for (
const auto &[OldSucc, NewSucc] :
751 zip(OldBB->successors(), NewBB->successors()))
752 assert(NewSucc == Old2NewVPBlocks[OldSucc] &&
"Different successors");
756 return std::make_pair(Old2NewVPBlocks[Entry],
757 Exiting ? Old2NewVPBlocks[Exiting] :
nullptr);
764 VPRegionBlock *NewRegion =
766 getName(), NewEntry, NewExiting)
767 : Plan.createReplicateRegion(NewEntry, NewExiting,
getName());
770 Block->setParent(NewRegion);
781 Cost += R.cost(VF, Ctx);
792 "must be in the entry block of a non-replicate region");
794 "loop region has a single predecessor (preheader), its entry block "
795 "has 2 incoming blocks");
799 Pred = Idx == 0 ?
Region->getSinglePredecessor() :
Region;
801 return Pred->getExitingBasicBlock();
813 : Ctx.TTI.getCFInstrCost(Instruction::UncondBr, Ctx.CostKind);
814 LLVM_DEBUG(
dbgs() <<
"Cost of " << BackedgeCost <<
" for VF " << VF
815 <<
": vector loop backedge\n");
816 Cost += BackedgeCost;
828 assert(VF.
isVector() &&
"Can only compute vector cost at the moment.");
830 return Then->cost(VF, Ctx);
833#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
837 auto NewIndent = Indent +
" ";
841 O <<
" = CANONICAL-IV\n";
847 O << Indent <<
"}\n";
857 if (CanIV->getNumUsers() > 0) {
859 auto *Zero = Plan.getZero(CanIV->getType());
862 VPBuilder HeaderBuilder(Header, Header->begin());
882 assert(CanIV &&
"Expected a canonical IV");
888 "VFxUF can be used only before it is materialized.");
890 return VPBuilder(ExitingLatch->getTerminator())
897 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
902 L->getUniqueExitBlocks(IRExitBlocks);
910 for (
auto *VPB : CreatedBlocks) {
915 for (
auto *Def : R.definedValues())
916 Def->replaceAllUsesWith(&DummyValue);
918 for (
unsigned I = 0, E = R.getNumOperands();
I != E;
I++)
919 R.setOperand(
I, &DummyValue);
922 CanIV->replaceAllUsesWith(&DummyValue);
929 delete BackedgeTakenCount;
952 "all region blocks must be dissolved before ::execute");
955 State->CFG.PrevVPBB =
nullptr;
956 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
960 State->VPDT.recalculate(*
this);
963 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
965 State->CFG.DTU.applyUpdates(
983 State->CFG.DTU.applyUpdates(
991 Block->execute(State);
1004 Loop *L = State->LI->getLoopFor(BB);
1006 [L](
BasicBlock *Succ) {
return L->contains(Succ); }))
1012 Loop *SuccLoop = State->LI->getLoopFor(Succ);
1018 Target = State->LI->getSmallestCommonLoop(
Target, SuccLoop);
1020 State->LI->removeBlock(BB);
1022 Target->addBasicBlockToLoop(BB, *State->LI);
1027 if (!ScalarPhVPBB) {
1033 if (R.getNumOperands() == 1)
1034 R.eraseFromParent();
1041 Blocks.push_back(ScalarPh);
1043 State->LI->erase(*OrigLoop->
begin());
1044 State->LI->erase(OrigLoop);
1045 for (
auto *BB : Blocks)
1046 State->LI->removeBlock(BB);
1050 State->CFG.DTU.flush();
1057 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1072 Value *Phi = State->get(PhiR, NeedsScalar);
1075 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1102 return R->isReplicator() ? nullptr : R;
1109 return R->isReplicator() ? nullptr : R;
1115 assert(LoopRegion &&
"expected a vector loop region");
1121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1125 if (VF.getNumUsers() > 0) {
1131 if (UF.getNumUsers() > 0) {
1137 if (VFxUF.getNumUsers() > 0) {
1143 if (VectorTripCount.getNumUsers() > 0) {
1146 O <<
" = vector-trip-count";
1149 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1151 BackedgeTakenCount->printAsOperand(O,
SlotTracker);
1152 O <<
" = backedge-taken count";
1160 O <<
" = original trip-count";
1169 O <<
"VPlan '" <<
getName() <<
"' {";
1186 RSO << Name <<
" for ";
1188 RSO <<
"VF={" << VFs[0];
1197 RSO <<
"UF={" << UFs[0];
1224 NewDeepRPOT(NewEntry);
1227 for (
const auto &[OldBB, NewBB] :
1230 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().
size() &&
1231 "blocks must have the same number of recipes");
1232 for (
const auto &[OldR, NewR] :
zip(*OldBB, *NewBB)) {
1233 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1234 "recipes must have the same number of operands");
1235 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1236 "recipes must define the same number of operands");
1237 for (
const auto &[OldV, NewV] :
1238 zip(OldR.definedValues(), NewR.definedValues()))
1239 Old2NewVPValues[OldV] = NewV;
1247 for (
unsigned I = 0,
E = NewR.getNumOperands();
I !=
E; ++
I) {
1249 NewR.setOperand(
I, NewOp);
1255 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1265 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1275 Old2NewVPValues[OldLiveIn] = NewPlan->getOrAddLiveIn(OldLiveIn);
1278 Old2NewVPValues[TripCountIRV] = NewPlan->getOrAddLiveIn(TripCountIRV);
1283 auto *OldCanIV = LoopRegion->getCanonicalIV();
1284 auto *NewCanIV = NewPlan->getVectorLoopRegion()->getCanonicalIV();
1285 assert(OldCanIV && NewCanIV &&
1286 "Loop regions of both plans must have canonical IVs.");
1287 Old2NewVPValues[OldCanIV] = NewCanIV;
1291 "All VPSymbolicValues must be handled below");
1293 if (BackedgeTakenCount)
1294 NewPlan->BackedgeTakenCount =
1298 for (
auto [OldSV, NewSV] :
1299 {std::pair{&VectorTripCount, &NewPlan->VectorTripCount},
1300 {&VF, &NewPlan->VF},
1301 {&UF, &NewPlan->UF},
1302 {&VFxUF, &NewPlan->VFxUF},
1303 {BackedgeTakenCount, NewPlan->BackedgeTakenCount}}) {
1306 Old2NewVPValues[OldSV] = NewSV;
1307 if (OldSV->isMaterialized())
1308 NewSV->markMaterialized();
1317 NewPlan->Name = Name;
1320 "TripCount must have been added to Old2NewVPValues");
1321 NewPlan->TripCount = Old2NewVPValues[TripCount];
1326 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1328 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1329 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[
I]);
1330 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1335 VPB != NewScalarHeader)
1344 CreatedBlocks.push_back(VPIRBB);
1356#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1364 const std::string &Name =
Block->getName();
1373 OS <<
"digraph VPlan {\n";
1374 OS <<
"graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1375 if (!Plan.getName().empty())
1382 Plan.printLiveIns(SS);
1385 for (
auto Line : Lines)
1390 OS <<
"node [shape=rect, fontname=Courier, fontsize=30]\n";
1391 OS <<
"edge [fontname=Courier, fontsize=30]\n";
1392 OS <<
"compound=true\n";
1410 bool Hidden,
const Twine &Label) {
1415 OS << Indent << getUID(
Tail) <<
" -> " << getUID(Head);
1416 OS <<
" [ label=\"" << Label <<
'\"';
1418 OS <<
" ltail=" << getUID(From);
1420 OS <<
" lhead=" << getUID(To);
1422 OS <<
"; splines=none";
1427 auto &Successors =
Block->getSuccessors();
1428 if (Successors.size() == 1)
1429 drawEdge(
Block, Successors.front(),
false,
"");
1430 else if (Successors.size() == 2) {
1431 drawEdge(
Block, Successors.front(),
false,
"T");
1432 drawEdge(
Block, Successors.back(),
false,
"F");
1434 unsigned SuccessorNumber = 0;
1443 OS << Indent << getUID(BasicBlock) <<
" [label =\n";
1446 raw_string_ostream
SS(Str);
1453 StringRef(Str).rtrim(
'\n').split(Lines,
"\n");
1455 auto EmitLine = [&](StringRef
Line, StringRef Suffix) {
1461 EmitLine(Line,
" +\n");
1462 EmitLine(
Lines.back(),
"\n");
1465 OS << Indent <<
"]\n";
1467 dumpEdges(BasicBlock);
1471 OS << Indent <<
"subgraph " << getUID(Region) <<
" {\n";
1473 OS << Indent <<
"fontname=Courier\n"
1474 << Indent <<
"label=\""
1478 if (
auto *CanIV =
Region->getCanonicalIV()) {
1479 OS << Indent <<
"\"";
1481 raw_string_ostream S(
Op);
1482 CanIV->printAsOperand(S, SlotTracker);
1484 OS <<
" = CANONICAL-IV\"\n";
1488 assert(
Region->getEntry() &&
"Region contains no inner blocks.");
1492 OS << Indent <<
"}\n";
1504 return DefR && (!DefR->
getParent()->getPlan()->getVectorLoopRegion() ||
1514 SV->markMaterialized();
1529 bool RemovedUser =
false;
1552#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1564void VPSlotTracker::assignName(
const VPValue *V) {
1565 assert(!VPValue2Name.contains(V) &&
"VPValue already has a name!");
1566 auto *UV = V->getUnderlyingValue();
1568 if (!UV && !(VPI && !VPI->getName().empty())) {
1569 VPValue2Name[V] = (
Twine(
"vp<%") +
Twine(NextSlot) +
">").str();
1580 Name = VPI->getName();
1582 assert(!Name.empty() &&
"Name cannot be empty.");
1584 std::string BaseName = (
Twine(Prefix) + Name +
Twine(
">")).str();
1587 const auto &[
A,
_] = VPValue2Name.try_emplace(V, BaseName);
1595 const auto &[
C, UseInserted] = BaseName2Version.
try_emplace(BaseName, 0);
1598 A->second = (BaseName +
Twine(
".") +
Twine(
C->second)).str();
1602void VPSlotTracker::assignNames(
const VPlan &Plan) {
1604 assignName(&Plan.VF);
1606 assignName(&Plan.UF);
1608 assignName(&Plan.VFxUF);
1609 assignName(&Plan.VectorTripCount);
1610 if (Plan.BackedgeTakenCount)
1611 assignName(Plan.BackedgeTakenCount);
1615 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1616 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.
getEntry()));
1617 for (
const VPBlockBase *VPB : RPOT) {
1625void VPSlotTracker::assignNames(
const VPBasicBlock *VPBB) {
1626 for (
const VPRecipeBase &Recipe : *VPBB)
1627 for (VPValue *Def : Recipe.definedValues())
1631std::string VPSlotTracker::getName(
const Value *V) {
1633 raw_string_ostream S(Name);
1635 V->printAsOperand(S,
false);
1644 if (
I->getParent()) {
1645 MST = std::make_unique<ModuleSlotTracker>(
I->getModule());
1646 MST->incorporateFunction(*
I->getFunction());
1648 MST = std::make_unique<ModuleSlotTracker>(
nullptr);
1651 V->printAsOperand(S,
false, *MST);
1656 std::string Name = VPValue2Name.lookup(V);
1670 "VPValue defined by a recipe in a VPlan?");
1673 if (
auto *UV = V->getUnderlyingValue()) {
1676 UV->printAsOperand(S,
false);
1677 return (
Twine(
"ir<") + Name +
">").str();
1687 .inferScalarType(ChainOp)
1689 "ChainOp must be i1 for AnyOf reduction");
1694 auto *Freeze =
createNaryOp(Instruction::Freeze, {OrReduce},
DL);
1695 return createSelect(Freeze, TrueVal, FalseVal,
DL,
"rdx.select");
1700 assert(!
Range.isEmpty() &&
"Trying to test an empty VF range.");
1701 bool PredicateAtRangeStart = Predicate(
Range.Start);
1704 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1709 return PredicateAtRangeStart;
1714 [VF](
const VPlanPtr &Plan) {
return Plan->hasVF(VF); }) ==
1716 "Multiple VPlans for VF.");
1718 for (
const VPlanPtr &Plan : VPlans) {
1719 if (Plan->hasVF(VF))
1729 bool IsUnrollMetadata =
false;
1730 MDNode *LoopID = L->getLoopID();
1739 if (S->getString().starts_with(
"llvm.loop.unroll.runtime.disable"))
1742 S->getString().starts_with(
"llvm.loop.unroll.disable");
1748 if (!IsUnrollMetadata) {
1750 LLVMContext &Context = L->getHeader()->getContext();
1753 MDString::get(Context,
"llvm.loop.unroll.runtime.disable"));
1759 L->setLoopID(NewLoopID);
1765 bool VectorizingEpilogue,
MDNode *OrigLoopID,
1766 std::optional<unsigned> OrigAverageTripCount,
1767 unsigned OrigLoopInvocationWeight,
unsigned EstimatedVFxUF,
1768 bool DisableRuntimeUnroll) {
1773 if (ScalarPH && !VectorizingEpilogue) {
1774 std::optional<MDNode *> RemainderLoopID =
1777 if (RemainderLoopID) {
1778 OrigLoop->setLoopID(*RemainderLoopID);
1780 if (DisableRuntimeUnroll)
1785 Hints.setAlreadyVectorized();
1791 if (ORE->enabled() && ScalarPH && ScalarPH->hasPredecessors())
1792 OrigLoop->addIntLoopAttribute(
"llvm.loop.vectorize.epilogue", 1);
1800 VectorLoop->
setLoopID(*VectorizedLoopID);
1807 if (!VectorizingEpilogue) {
1810 Hints.setAlreadyVectorized();
1818 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1835 unsigned AverageVectorTripCount = 0;
1836 unsigned RemainderAverageTripCount = 0;
1838 auto IsProfiled = EC && EC->getCount();
1839 if (!OrigAverageTripCount) {
1842 auto &SE = *PSE.getSE();
1843 AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
1847 RemainderAverageTripCount =
1848 SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
1850 OrigLoopInvocationWeight = 1;
1853 AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1855 RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1859 OrigLoopInvocationWeight);
1864 OrigLoopInvocationWeight);
1868#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1870 if (VPlans.empty()) {
1871 O <<
"LV: No VPlans built.\n";
1874 for (
const auto &Plan : VPlans)
1885 unsigned WideSize =
C->getBitWidth();
1887 ? TruncatedVal.
sext(WideSize)
1888 : TruncatedVal.
zext(WideSize);
1889 return ExtendedVal == *
C;
1907 "Scalarization overhead not supported for scalable vectors");
1912 for (
Type *VectorTy :
1914 ScalarizationCost +=
TTI.getScalarizationOverhead(
1924 for (
auto *
Op : Operands) {
1926 (!AlwaysIncludeReplicatingR &&
1934 return ScalarizationCost +
1935 TTI.getOperandsScalarizationOverhead(Tys,
CostKind, VIC);
1949 const VPlan &Plan = *R->getParent()->getPlan();
1954 assert(VPRB->isReplicator() &&
"must only contain replicate regions");
1965 Type *Ty =
Types.inferScalarType(RepR->getOperand(0));
1967 const Align Alignment =
1969 if (!
TTI.isLegalMaskedScatter(VTy, Alignment))
1979 return is_contained({Intrinsic::assume, Intrinsic::lifetime_end,
1980 Intrinsic::lifetime_start, Intrinsic::sideeffect,
1981 Intrinsic::pseudoprobe,
1982 Intrinsic::experimental_noalias_scope_decl},
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu next use AMDGPU Next Use Analysis Printer
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
The number of stores in a loop that are allowed to need predication.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file defines the SmallVector class.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static void addRuntimeUnrollDisableMetaData(Loop *L)
static T * getPlanEntry(T *Start)
static void printFinalVPlan(VPlan &)
To make RUN_VPLAN_PASS print final VPlan.
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
const char LLVMLoopVectorizeFollowupAll[]
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
const char LLVMLoopVectorizeFollowupVectorized[]
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
const char LLVMLoopVectorizeFollowupEpilogue[]
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isVector() const
One or more elements.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
Common base class shared among various IRBuilders.
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
A helper class to return the specified delimiter string after the first invocation of operator String...
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
void addIntLoopAttribute(StringRef Name, unsigned Value, ArrayRef< StringRef > RemovePrefixes={}) const
Add an integer metadata attribute to this loop's loop-ID node.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isVoidTy() const
Return true if this is 'void'.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
iterator begin()
Recipe iterator methods.
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
bool isExiting() const
Returns true if the block is exiting it's parent region.
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
iterator_range< VPBlockBase ** > successors()
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
size_t getNumPredecessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
VPBlockBase * getEnclosingBlockWithPredecessors()
bool hasSuccessors() const
Returns true if this block has any successors.
const VPBlocksTy & getPredecessors() const
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
const std::string & getName() const
VPBlockBase * getSinglePredecessor() const
const VPBlocksTy & getHierarchicalSuccessors()
VPBlockBase(const unsigned char SC, const std::string &N)
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Clone the CFG for all nodes reachable from Entry, including cloning the blocks and their recipes.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPBasicBlock * getInsertBlock() const
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
This class augments a recipe with a set of VPValues defined by the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
BasicBlock * getIRBasicBlock() const
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Kind getKind() const
Returns the Kind of lane offset.
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
LLVM_ABI_FOR_TEST VPMultiDefValue(VPRecipeBase *Def, Value *UV, Type *Ty)
~VPMultiDefValue() override
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
VPBasicBlock * getParent()
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
virtual LLVM_ABI_FOR_TEST ~VPRecipeValue()=0
VPRecipeValue(unsigned char SC, Value *UV, Type *Ty=nullptr)
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPValues defined by a VPRegionBlock, like the canonical IV.
Type * getType() const
Returns the type of the VPRegionValue.
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
~VPSingleDefValue() override
friend class VPSingleDefRecipe
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
An analysis for type-inference for VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
unsigned getVPValueID() const
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
void assertNotMaterialized() const
Assert that this VPValue has not been materialized, if it is a VPSymbolicValue.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
@ VPVSingleDefValueSC
A symbolic live-in VPValue without IR backing.
@ VPVSymbolicSC
A live-in VPValue wrapping an IR Value.
@ VPRegionValueSC
A VPValue defined by a multi-def recipe.
@ VPVMultiDefValueSC
A VPValue defined by a VPSingleDefRecipe.
void dump() const
Dump the value to stderr (for debugging).
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
LLVM_DUMP_METHOD void dump()
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
friend class VPSlotTracker
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
VPBasicBlock * getEntry()
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
void setName(const Twine &newName)
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
LLVM_ABI_FOR_TEST ~VPlan()
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
friend class VPlanPrinter
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void setEntry(VPBasicBlock *VPBB)
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
cl::opt< bool > ProfcheckDisableMetadataFixes
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Or
Bitwise or logical OR of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
std::optional< unsigned > NumPredStores
Number of predicated stores in the VPlan, computed on demand.
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
bool useEmulatedMaskMemRefHack(const VPReplicateRecipe *R, ElementCount VF)
Returns true if an artificially high cost for emulated masked memrefs should be used.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.