38#define DEBUG_TYPE "vplan"
46class PlainCFGBuilder {
57 std::unique_ptr<VPlan> Plan;
78 bool isExternalDef(
Value *Val);
85 : TheLoop(Lp), LI(LI), LVer(LVer),
86 Plan(std::make_unique<VPlan>(Lp, IdxTy)) {}
89 std::unique_ptr<VPlan> buildPlainCFG();
99 VPBBPreds.
push_back(getOrCreateVPBB(Pred));
104 return L && BB == L->getHeader();
108void PlainCFGBuilder::fixHeaderPhis() {
109 for (
auto *Phi : PhisToFix) {
110 assert(IRDef2VPValue.count(Phi) &&
"Missing VPInstruction for PHINode.");
111 VPValue *VPVal = IRDef2VPValue[
Phi];
114 assert(PhiR->getNumOperands() == 0 &&
"Expected VPPhi with no operands.");
116 "Expected Phi in header block.");
118 "header phi must have exactly 2 operands");
121 getOrCreateVPOperand(
Phi->getIncomingValueForBlock(Pred)));
127VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
128 if (
auto *VPBB = BB2VPBB.lookup(BB)) {
135 LLVM_DEBUG(
dbgs() <<
"Creating VPBasicBlock for " << Name <<
"\n");
136 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
147bool PlainCFGBuilder::isExternalDef(
Value *Val) {
163VPValue *PlainCFGBuilder::getOrCreateVPOperand(
Value *IRVal) {
164 auto VPValIt = IRDef2VPValue.find(IRVal);
165 if (VPValIt != IRDef2VPValue.end())
168 return VPValIt->second;
177 assert(isExternalDef(IRVal) &&
"Expected external definition as operand.");
181 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
182 IRDef2VPValue[IRVal] = NewVPVal;
189void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
193 for (Instruction &InstRef : *BB) {
198 assert(!IRDef2VPValue.count(Inst) &&
199 "Instruction shouldn't have been visited.");
208 VPValue *
Cond = getOrCreateVPOperand(Br->getCondition());
216 if (
SI->getNumCases() == 0)
219 for (
auto Case :
SI->cases())
220 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
226 VPSingleDefRecipe *NewR;
232 *Phi,
Phi->getType());
237 PhisToFix.push_back(Phi);
241 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
242 for (
unsigned I = 0;
I !=
Phi->getNumOperands(); ++
I) {
243 VPPredToIncomingValue[BB2VPBB[
Phi->getIncomingBlock(
I)]] =
244 getOrCreateVPOperand(
Phi->getIncomingValue(
I));
248 VPPredToIncomingValue.
lookup(Pred->getExitingBasicBlock()));
253 VPIRMetadata MD(*Inst);
255 const auto &[AliasScopeMD, NoAliasMD] =
258 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
260 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
271 CI->getType(), CI->getDebugLoc(),
276 LI->getDebugLoc(), MD);
282 Inst->
getOpcode(), VPOperands, Inst, VPIRFlags(*Inst), MD,
287 IRDef2VPValue[Inst] = NewR;
292std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
295 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
296 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
309 "Unexpected loop preheader");
310 for (
auto &
I : *ThePreheaderBB) {
311 if (
I.getType()->isVoidTy())
313 IRDef2VPValue[&
I] = Plan->getOrAddLiveIn(&
I);
316 LoopBlocksRPO RPO(TheLoop);
319 for (BasicBlock *BB : RPO) {
321 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
323 setVPBBPredsFromBB(VPBB, BB);
326 createVPInstructionsForVPBB(VPBB, BB);
333 getOrCreateVPBB(
SI->getDefaultDest())};
334 for (
auto Case :
SI->cases())
335 Succs.
push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
346 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
347 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
351 for (
auto *EB : Plan->getExitBlocks())
352 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
359 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->
getHeader()));
360 Plan->getEntry()->setPlan(&*Plan);
367 for (
auto *EB : Plan->getExitBlocks()) {
368 for (VPRecipeBase &R : EB->phis()) {
370 PHINode &
Phi = PhiR->getIRPhi();
371 assert(PhiR->getNumOperands() == 0 &&
372 "no phi operands should be added yet");
373 for (BasicBlock *Pred :
predecessors(EB->getIRBasicBlock()))
375 getOrCreateVPOperand(
Phi.getIncomingValueForBlock(Pred)));
380 return std::move(Plan);
393 if (Preds.
size() != 2)
396 auto *PreheaderVPBB = Preds[0];
397 auto *LatchVPBB = Preds[1];
398 if (!VPDT.
dominates(PreheaderVPBB, HeaderVPB) ||
402 if (!VPDT.
dominates(PreheaderVPBB, HeaderVPB) ||
419 if (LatchVPBB->getSingleSuccessor() ||
420 LatchVPBB->getSuccessors()[0] != HeaderVPB)
423 assert(LatchVPBB->getNumSuccessors() == 2 &&
"Must have 2 successors");
427 "terminator must be a BranchOnCond");
429 Not->insertBefore(Term);
430 Term->setOperand(0, Not);
431 LatchVPBB->swapSuccessors();
442 auto *OutermostHeaderVPBB =
454 bool IsOutermost = HeaderVPB == OutermostHeaderVPBB;
455 Type *CanIVTy =
nullptr;
466 R->setEntry(HeaderVPB);
467 R->setExiting(LatchVPBB);
476 auto *LatchTerm = LatchVPBB->getTerminator();
481 auto *CanonicalIVIncrement = Builder.createAdd(
482 R->getCanonicalIV(), &Plan.
getVFxUF(),
DL,
"index.next", {true, false});
485 auto *IsLatchExitTaken = Builder.createICmp(
487 LatchTerm->setOperand(1, IsLatchExitTaken);
492 DebugLoc LatchDL = LatchTerm->getDebugLoc();
496 LatchTerm->eraseFromParent();
510 VPValue *Exiting = ExitIRI->getIncomingValueForBlock(MiddleVPBB);
515 ExitIRI->setIncomingValueForBlock(MiddleVPBB, Exiting);
538 if (LatchVPBB->getNumSuccessors() == 2) {
543 LatchVPBB->swapSuccessors();
553 "Invalid backedge-taken count");
556 InductionTy, TheLoop);
580 "unexpected predecessor order of scalar ph");
581 for (
const auto &[PhiR, ScalarPhiR] :
584 VPValue *BackedgeVal = VectorPhiR->getOperand(1);
585 VPValue *ResumeFromVectorLoop =
592 {ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
593 VectorPhiR->getDebugLoc());
602 auto GetSimplifiedLiveInViaSCEV = [&](
VPValue *VPV) ->
VPValue * {
610 if (
VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
611 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
618std::unique_ptr<VPlan>
622 PlainCFGBuilder Builder(TheLoop, &LI, LVer, InductionTy);
623 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
640 "step must be loop invariant");
645 "Start VPValue must match IndDesc's start value");
659 VPUser *ExtractLastPartUser = ExtractLastPart->getSingleUser();
660 assert(ExtractLastPartUser &&
"must have a single user");
666 "last lane must be extracted in the middle block");
668 ExtractLastLane->replaceAllUsesWith(
670 ExtractLastLane->eraseFromParent();
671 ExtractLastPart->eraseFromParent();
677 Phi, Start, Step, &Plan.
getVFxUF(), IndDesc,
DL);
678 ReplaceExtractsWithExitingIVValue(WideIV);
684 "must have an integer or float induction at this point");
698 Phi, Start, Step, &Plan.
getVF(), IndDesc, Flags,
DL);
700 ReplaceExtractsWithExitingIVValue(WideIV);
714 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
717 if (SinkCandidate == Previous)
721 !Seen.
insert(SinkCandidate).second ||
734 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
737 "only recipes with a single defined value expected");
752 if (SinkCandidate == FOR)
755 SinkCandidate->moveAfter(Previous);
756 Previous = SinkCandidate;
782 [&VPDT, HoistPoint](
VPUser *U) {
783 auto *R = cast<VPRecipeBase>(U);
784 return HoistPoint == R ||
785 VPDT.properlyDominates(HoistPoint, R);
787 "HoistPoint must dominate all users of FOR");
789 auto NeedsHoisting = [HoistPoint, &VPDT,
791 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
795 if (!Visited.
insert(HoistCandidate).second)
801 return HoistCandidate;
811 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
814 "only recipes with a single defined value expected");
826 if (
auto *R = NeedsHoisting(
Op)) {
829 if (R->getNumDefinedValues() != 1)
843 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
860 return cast<VPFirstOrderRecurrencePHIRecipe>(&R);
867 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
868 while (
auto *PrevPhi =
870 assert(PrevPhi->getParent() == FOR->getParent() &&
871 "PrevPhi must be in same block as FOR");
873 "PrevPhi must not be visited multiple times");
874 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
877 assert(Previous &&
"Previous must be a recipe");
888 VPBuilder LoopBuilder(InsertBlock, InsertPt);
891 {FOR, FOR->getBackedgeValue()});
893 return &U != RecurSplice;
910 "header must dominate its latch");
916 assert(PhiR->getNumOperands() == 2 &&
917 "Must have 2 operands for header phis");
921 VPValue *BackedgeValue = PhiR->getOperand(1);
923 if (FixedOrderRecurrences.
contains(Phi)) {
931 auto InductionIt = Inductions.
find(Phi);
932 if (InductionIt != Inductions.
end())
935 PhiR->getDebugLoc());
941 "incoming value must match start value");
943 unsigned ScaleFactor = 1;
944 bool UseOrderedReductions = !AllowReordering && RdxDesc.
isOrdered();
958 PhiR->replaceAllUsesWith(HeaderPhiR);
959 PhiR->eraseFromParent();
965 for (
const auto &[HeaderPhiR, ScalarPhiR] :
969 ResumePhiR->setName(
"scalar.recur.init");
971 ExtractLastLane->setName(
"vector.recur.extract");
984 unsigned SCEVCheckThreshold,
990 for (
auto &R : HeaderVPBB->phis()) {
992 if (!WideIV || WideIV->getNoWrapPredicates().empty())
994 PredicatedIVs.
insert(WideIV);
995 for (
const auto *
P : WideIV->getNoWrapPredicates())
1006 if (!PredicatedIVs.
empty())
1014 if (!WideIV || !PredicatedIVs.
contains(WideIV))
1017 "outside-loop use via ExitingIVValue\n");
1022 if (TotalComplexity && OptForSize) {
1024 dbgs() <<
"LV: Not vectorizing: SCEV predicates needed for induction "
1025 "but optimizing for size\n");
1027 "Runtime SCEV check is required with -Os/-Oz",
1028 "runtime SCEV checks needed but optimizing for size",
1029 "CantVersionLoopWithOptForSize", ORE, TheLoop);
1033 if (TotalComplexity > SCEVCheckThreshold) {
1034 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: Too many SCEV checks needed ("
1035 << TotalComplexity <<
" > " << SCEVCheckThreshold
1038 "Too many SCEV checks needed",
1039 "Too many SCEV assumptions need to be made and checked at runtime",
1040 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1055 if (!PhiR || !PhiR->isInLoop() || (MinVF.
isScalar() && !PhiR->isOrdered()))
1058 RecurKind Kind = PhiR->getRecurrenceKind();
1062 "AnyOf and Find reductions are not allowed for in-loop reductions");
1064 bool IsFPRecurrence =
1072 for (
unsigned I = 0;
I != Worklist.
size(); ++
I) {
1076 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
1079 "U must be either in the loop region, the middle block or the "
1080 "scalar preheader.");
1087 Worklist.
insert(UserRecipe);
1101 assert(Blend->getNumIncomingValues() == 2 &&
1102 "Blend must have 2 incoming values");
1103 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
1104 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
1105 "PhiR must be an operand of the blend");
1106 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
1110 if (IsFPRecurrence) {
1115 ->getFastMathFlags();
1119 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
1127 "Expected current VPInstruction to be a call to the "
1128 "llvm.fmuladd intrinsic");
1129 assert(CurrentLink->getOperand(2) == PreviousLink &&
1130 "expected a call where the previous link is the added operand");
1138 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
1140 LinkVPBB->
insert(FMulRecipe, CurrentLink->getIterator());
1146 VPBuilder Builder(LinkVPBB, CurrentLink->getIterator());
1147 auto *
Sub = Builder.createSub(Zero, CurrentLink->getOperand(1),
1149 Sub->setUnderlyingValue(CurrentLinkI);
1153 unsigned IndexOfFirstOperand = 0;
1159 "must be a select recipe");
1160 IndexOfFirstOperand = 1;
1165 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
1166 ? IndexOfFirstOperand + 1
1167 : IndexOfFirstOperand;
1168 VecOp = CurrentLink->getOperand(VecOpId);
1170 VecOp != PreviousLink &&
1173 1 - (VecOpId - IndexOfFirstOperand)) == PreviousLink &&
1174 "PreviousLink must be the operand other than VecOp");
1177 assert(PhiR->getVFScaleFactor() == 1 &&
1178 "inloop reductions must be unscaled");
1181 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
1189 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->
end())));
1193 CurrentLink->replaceAllUsesWith(RedRecipe);
1198 if (!UserR || UserR->getParent() != LinkVPBB)
1202 UserR->moveAfter(RedRecipe);
1205 PreviousLink = RedRecipe;
1210 R->eraseFromParent();
1224 if (!VPI || VPI->getOpcode() != Instruction::Load) {
1225 assert(!R.mayReadFromMemory() &&
"unexpected recipe reading memory");
1230 VPValue *Ptr = VPI->getOperand(0);
1233 LLVM_DEBUG(
dbgs() <<
"LV: Not vectorizing: Found non-dereferenceable "
1234 "load with SCEVCouldNotCompute pointer\n");
1239 Type *LoadTy = VPI->getResultType();
1240 const SCEV *SizeSCEV =
1245 TheLoop, SE, DT, AC, &Preds))
1249 dbgs() <<
"LV: Not vectorizing: Auto-vectorization of loops with "
1250 "potentially faulting load is not supported.\n");
1280 if (Pred == MiddleVPBB)
1287 EarlyExitingVPBB->getTerminator()->eraseFromParent();
1299 if (MiddleVPBB->getNumSuccessors() == 1) {
1301 "must have ScalarPH as single successor");
1305 assert(MiddleVPBB->getNumSuccessors() == 2 &&
"must have 2 successors");
1323 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
1343 TopRegion->
setName(
"vector loop");
1349 "only a single-exit block is supported currently");
1352 "the exit block must have middle block as single predecessor");
1356 "The vector loop region must have the middle block as its single "
1357 "successor for now");
1360 Header->
splitAt(Header->getFirstNonPhi());
1366 VPBuilder Builder(Header, Header->getFirstNonPhi());
1374 [[maybe_unused]]
bool TermBranchOnCount =
1378 assert(TermBranchOnCount &&
1381 std::next(IVInc->getDefiningRecipe()->getIterator()) ==
1383 "Unexpected canonical iv increment");
1387 OrigLatch->
splitAt(IVInc->getDefiningRecipe()->getIterator());
1388 Latch->
setName(
"vector.latch");
1402 NeedsPhi[V].push_back(&R);
1405 Builder.setInsertPoint(Latch, Latch->
begin());
1407 for (
const auto &[V,
Users] : NeedsPhi) {
1414 "Value used by more than two reduction phis?");
1418 if (RdxPhi && !RdxPhi->isInLoop()) {
1423 VPInstruction *Phi = Builder.createScalarPhi({V, TailVal}, {},
"", Flags);
1425 U->replaceUsesOfWith(V, Phi);
1438 VPValue *LastActiveLane = Builder.createLastActiveLane(HeaderMask);
1441 R.getVPSingleValue()->replaceAllUsesWith(Ext);
1456 unsigned NumPreds = ScalarPH->getNumPredecessors();
1459 assert(Phi->getNumIncoming() == NumPreds - 1 &&
1460 "must have incoming values for all predecessors");
1461 Phi->addOperand(Phi->getOperand(NumPreds - 2));
1476 if (AddBranchWeights) {
1480 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1486 bool AddBranchWeights) {
1493 bool AddBranchWeights) {
1501 ElementCount MinProfitableTripCount,
bool RequiresScalarEpilogue,
1516 auto GetMinTripCount = [&]() ->
const SCEV * {
1525 const SCEV *MinProfitableTripCountSCEV =
1527 return SE.
getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1532 const SCEV *Step = GetMinTripCount();
1543 TripCountCheck = Plan.
getTrue();
1552 if (!MinTripCountVPV)
1554 TripCountCheck = Builder.createICmp(
1555 CmpPred, TripCountVPV, MinTripCountVPV,
DL,
"min.iters.check");
1564 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1575 RequiresScalarEpilogue,
false,
1581 VPlan &Plan,
Value *VectorTripCount,
bool RequiresScalarEpilogue,
1582 ElementCount EpilogueVF,
unsigned EpilogueUF,
unsigned MainLoopStep,
1596 auto *CheckMinIters = Builder.createICmp(
1607 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1608 const uint32_t Weights[] = {EstimatedSkipCount,
1609 MainLoopStep - EstimatedSkipCount};
1613 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1630 auto GetMinOrMaxCompareValue =
1644 if (MinOrMaxR->getOperand(0) == RedPhiR)
1645 return MinOrMaxR->getOperand(1);
1647 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1648 "Reduction phi operand expected");
1649 return MinOrMaxR->getOperand(0);
1654 MinOrMaxNumReductionsToHandle;
1655 bool HasUnsupportedPhi =
false;
1662 HasUnsupportedPhi =
true;
1666 Cur->getRecurrenceKind())) {
1667 HasUnsupportedPhi =
true;
1671 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1675 MinOrMaxNumReductionsToHandle.
emplace_back(Cur, MinOrMaxOp);
1678 if (MinOrMaxNumReductionsToHandle.
empty())
1696 for (
auto &R : *VPBB) {
1704 VPValue *AllNaNLanes =
nullptr;
1706 for (
const auto &[
_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1709 AllNaNLanes = AllNaNLanes ? LatchBuilder.
createOr(AllNaNLanes, RedNaNLanes)
1717 for (
const auto &[RedPhiR,
_] : MinOrMaxNumReductionsToHandle) {
1719 RedPhiR->getRecurrenceKind()) &&
1720 "unsupported reduction");
1725 assert(RdxResult &&
"must find a ComputeReductionResult");
1727 auto *NewSel = MiddleBuilder.
createSelect(AnyNaNLane, RedPhiR,
1728 RdxResult->getOperand(0));
1730 assert(!RdxResults.
contains(RdxResult) &&
"RdxResult already used");
1731 RdxResults.
insert(RdxResult);
1736 "Unexpected terminator");
1737 auto *IsLatchExitTaken = LatchBuilder.
createICmp(
1739 LatchExitingBranch->getOperand(1));
1740 auto *AnyExitTaken = LatchBuilder.
createOr(AnyNaNLane, IsLatchExitTaken);
1747 auto IsTC = [&Plan](
VPValue *V) {
1752 VPValue *VecV = ResumeR->getOperand(0);
1756 VPValue *DIVTC = DerivedIV->getOperand(1);
1757 if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
1761 DerivedIV->setOperand(1, NewSel);
1768 LLVM_DEBUG(
dbgs() <<
"Found resume phi we cannot update for VPlan with "
1769 "FMaxNum/FMinNum reduction.\n");
1779 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1816 PhiR->getRecurrenceKind()))
1826 VPValue *BackedgeSelect = PhiR->getBackedgeValue();
1827 VPValue *CondSelect = BackedgeSelect;
1831 if (HeaderMask && !
match(BackedgeSelect,
1836 VPValue *
Cond =
nullptr, *Op1 =
nullptr, *Op2 =
nullptr;
1845 assert(!Blend->isNormalized() &&
"must run before blend normalizaion");
1846 unsigned NumIncomingDataValues = 0;
1847 for (
unsigned I = 0;
I < Blend->getNumIncomingValues(); ++
I) {
1848 VPValue *Incoming = Blend->getIncomingValue(
I);
1849 if (Incoming != PhiR) {
1850 ++NumIncomingDataValues;
1851 Cond = Blend->getMask(
I);
1856 return NumIncomingDataValues == 1;
1863 !MatchBlend(SelectR))
1866 assert(
Cond != HeaderMask &&
"Cond must not be HeaderMask");
1872 assert(RdxResult &&
"Could not find reduction result");
1876 auto *MaskPHI = Builder.createWidenPhi(Plan.
getFalse());
1879 Builder.setInsertPoint(SelectR);
1887 assert(Op2 == PhiR &&
"data value must be selected if Cond is true");
1890 Cond = Builder.createLogicalAnd(HeaderMask,
Cond);
1894 MaskPHI->addOperand(MaskSelect);
1900 PhiR->setBackedgeValue(DataSelect);
1903 Builder.setInsertPoint(RdxResult);
1904 auto *ExtractLastActive =
1906 {PhiR->getStartValue(), DataSelect, MaskSelect},
1907 RdxResult->getDebugLoc());
1908 RdxResult->replaceAllUsesWith(ExtractLastActive);
1909 RdxResult->eraseFromParent();
1934 "inloop and ordered reductions not supported");
1936 "FindIV reduction must not be scaled");
1948 "backedge value must be a select");
1949 if (FindIVSelectR->getOperand(1) != WideIV &&
1964 WidenCanIV->insertBefore(WideIV);
1967 FindIVSelectR->setOperand(FindIVSelectR->getOperand(1) == WideIV ? 1 : 2,
2024 auto *FinalMinOrMaxCmp =
2029 auto *FinalIVSelect =
2030 Builder.createSelect(FinalMinOrMaxCmp, LastIVExiting, MaxIV);
2043 DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
2044 FinalCanIV = DerivedIVRecipe;
2052 VPValue *FinalIV = Builder.createSelect(
2053 AlwaysFalse, FindIVSelect->
getOperand(2), FinalCanIV);
2070 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
2081 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
2084 "only min/max recurrences support users outside the reduction chain");
2099 assert(MinOrMaxOp->getNumUsers() == 2 &&
2100 "MinOrMaxOp must have exactly 2 users");
2101 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
2102 if (MinOrMaxOpValue == MinOrMaxPhiR)
2103 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
2110 if (!Cmp || Cmp->getNumUsers() != 1 ||
2111 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
2114 if (MinOrMaxOpValue != CmpOpB)
2120 if (MinOrMaxPhiR->getNumUsers() != 2)
2126 "one user must be MinOrMaxOp");
2127 assert(MinOrMaxResult &&
"MinOrMaxResult must be a user of MinOrMaxOp");
2144 FindIVPhiR->getRecurrenceKind()))
2147 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
2148 "cannot handle inloop/ordered reductions yet");
2154 FindIVPhiR->getBackedgeValue());
2156 "must be able to retrieve the FindIVResult VPInstruction");
2168 bool IsValidKindPred = [RdxKind, Pred]() {
2182 if (!IsValidKindPred) {
2185 DEBUG_TYPE,
"VectorizationMultiUseReductionPredicate",
2187 <<
"Multi-use reduction with predicate "
2189 <<
" incompatible with reduction kind";
2195 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
2198 "both results must be computed in the same block");
2202 MinOrMaxResult->
moveBefore(*FindIVRdxResult->getParent(),
2203 FindIVRdxResult->getIterator());
2206 if (IsStrictPredicate) {
2209 MinOrMaxResult, FindIVSelect, FindIVCmp,
2240 auto *FinalMinOrMaxCmp =
2243 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
2244 auto *FinalIVSelect =
2245 B.createSelect(FinalMinOrMaxCmp, LastIVExiting,
Sentinel);
2246 FindIVRdxResult->setOperand(0, FinalIVSelect);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
iv Induction Variable Users
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
const SmallVectorImpl< MachineOperand > & Cond
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static bool handleFirstArgMinOrMax(VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR, VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV, VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect, VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult)
Given a first argmin/argmax pattern with strict predicate consisting of 1) a MinOrMax reduction MinOr...
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to sink users of FOR after Previous.
static bool tryToSinkOrHoistRecurrenceUsers(VPBasicBlock *HeaderVPBB, VPDominatorTree &VPDT)
Sink users of fixed-order recurrences past or hoist before the recipe defining the previous value,...
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to hoist Previous and its operands before all users of FOR.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Check if all loads in the loop are dereferenceable.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB, DebugLoc DL)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static constexpr uint32_t CheckBypassWeights[]
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
static FastMathFlags getFast()
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
LLVM_ABI std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEVPredicate & getPredicate() const
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
FastMathFlags getFastMathFlags() const
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
This class represents an analyzed expression in the program.
static constexpr auto FlagNUW
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
void insert(VPRecipeBase *Recipe, iterator InsertPt)
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
void setName(const Twine &newName)
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
VPBlockBase * getSinglePredecessor() const
void swapPredecessors()
Swap predecessors of the block.
const VPBasicBlock * getEntryBasicBlock() const
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
void setParent(VPRegionBlock *P)
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static VPBasicBlock * getPlainCFGMiddleBlock(const VPlan &Plan)
Returns the middle block of Plan in plain CFG form (before regions are formed).
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static std::pair< VPBasicBlock *, VPBasicBlock * > getPlainCFGHeaderAndLatch(const VPlan &Plan)
Returns the header and latch of the outermost loop of Plan in plain CFG form (before regions are form...
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
RecurKind getRecurKind() const
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
@ FirstOrderRecurrenceSplice
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Lightweight SCEV-to-VPlan expander.
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A Recipe for widening the canonical induction variable of the vector loop.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
bool isCanonical() const
Returns true if the induction is canonical, i.e.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
SmallVector< VPBasicBlock * > vp_rpo_plain_cfg_loop_body(VPBasicBlock *Header)
Returns the VPBasicBlocks forming the loop body of a plain (pre-region) VPlan in reverse post-order s...
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
auto dyn_cast_or_null(const Y &Val)
void sort(IteratorTy Start, IteratorTy End)
UncountableExitStyle
Different methods of handling early exits.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the scalar type of this symbolic value.