58 if (!VPBB->getParent())
61 auto EndIter = Term ? Term->getIterator() : VPBB->end();
66 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
83 false , *VPI, Ingredient.getDebugLoc());
86 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
87 nullptr ,
false , *VPI,
88 Ingredient.getDebugLoc());
91 Ingredient.operands(), *VPI,
92 Ingredient.getDebugLoc(),
GEP);
104 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
109 if (VectorID == Intrinsic::assume ||
110 VectorID == Intrinsic::lifetime_end ||
111 VectorID == Intrinsic::lifetime_start ||
112 VectorID == Intrinsic::sideeffect ||
113 VectorID == Intrinsic::pseudoprobe) {
118 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
119 VectorID != Intrinsic::pseudoprobe;
123 Ingredient.getDebugLoc());
126 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
127 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
131 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
135 *VPI, Ingredient.getDebugLoc());
139 "inductions must be created earlier");
148 "Only recpies with zero or one defined values expected");
149 Ingredient.eraseFromParent();
160 const Loop *L =
nullptr;
165 if (
A->getOpcode() != Instruction::Store ||
166 B->getOpcode() != Instruction::Store)
179 const APInt *Distance;
185 Type *TyA =
A->getOperand(0)->getScalarType();
187 Type *TyB =
B->getOperand(0)->getScalarType();
193 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
195 auto VFs =
B->getParent()->getPlan()->vectorFactors();
199 return Distance->
abs().
uge(
207 : ExcludeRecipes(ExcludeRecipes.begin(), ExcludeRecipes.end()),
208 GroupLeader(GroupLeader), PSE(&PSE), L(&L) {}
217 return ExcludeRecipes.contains(Store) ||
218 (Store && isNoAliasViaDistance(Store, &GroupLeader));
231 std::optional<SinkStoreInfo> SinkInfo = {}) {
232 bool CheckReads = SinkInfo.has_value();
236 if (SinkInfo && SinkInfo->shouldSkip(R))
240 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
265template <
unsigned Opcode>
270 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
271 "Only Load and Store opcodes supported");
272 constexpr bool IsLoad = (Opcode == Instruction::Load);
275 RecipesByAddressAndType;
280 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
284 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
288 RecipesByAddressAndType[{AddrSCEV, LoadStoreTy}].push_back(RepR);
293 for (
auto &Group :
Groups) {
308 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
320 if (Candidate->getParent() == SinkTo ||
325 if (!ScalarVFOnly && RepR->isSingleScalar())
328 WorkList.
insert({SinkTo, Candidate});
340 for (
auto &Recipe : *VPBB)
342 InsertIfValidSinkCandidate(VPBB,
Op);
346 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
349 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
354 auto UsersOutsideSinkTo =
356 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
358 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
359 return !U->usesFirstLaneOnly(SinkCandidate);
362 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
364 if (NeedsDuplicating) {
368 if (
auto *SinkCandidateRepR =
373 SinkCandidateRepR->getOpcode(), SinkCandidate->
operands(),
374 nullptr, *SinkCandidateRepR, *SinkCandidateRepR,
378 Clone = SinkCandidate->
clone();
388 InsertIfValidSinkCandidate(SinkTo,
Op);
398 if (!EntryBB || EntryBB->size() != 1 ||
408 if (EntryBB->getNumSuccessors() != 2)
413 if (!Succ0 || !Succ1)
416 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
418 if (Succ0->getSingleSuccessor() == Succ1)
420 if (Succ1->getSingleSuccessor() == Succ0)
437 if (!Region1->isReplicator())
439 auto *MiddleBasicBlock =
441 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
446 if (!Region2 || !Region2->isReplicator())
451 if (!Mask1 || Mask1 != Mask2)
454 assert(Mask1 && Mask2 &&
"both region must have conditions");
460 if (TransformedRegions.
contains(Region1))
467 if (!Then1 || !Then2)
487 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
493 if (Phi1ToMove.getVPSingleValue()->user_empty()) {
494 Phi1ToMove.eraseFromParent();
497 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
511 TransformedRegions.
insert(Region1);
514 return !TransformedRegions.
empty();
522 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
523 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
524 auto *BlockInMask = PredRecipe->
getMask();
545 Region->setParent(ParentRegion);
551 RecipeWithoutMask->getDebugLoc());
552 Exiting->appendRecipe(PHIRecipe);
565 if (RepR->isPredicated())
584 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
596 if (!VPBB->getParent())
600 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
609 R.moveBefore(*PredVPBB, PredVPBB->
end());
611 auto *ParentRegion = VPBB->getParent();
612 if (ParentRegion && ParentRegion->getExiting() == VPBB)
613 ParentRegion->setExiting(PredVPBB);
617 return !WorkList.
empty();
624 bool ShouldSimplify =
true;
625 while (ShouldSimplify) {
641 if (!
IV ||
IV->getTruncInst())
656 for (
auto *U : FindMyCast->
users()) {
658 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
659 FoundUserCast = UserCast;
666 FindMyCast = FoundUserCast;
668 if (FindMyCast !=
IV)
683 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
692 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
698 if (ResultTy != StepTy) {
705 Builder.setInsertPoint(VecPreheader);
706 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
708 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
734 WideCanIV->getDebugLoc(), Builder));
735 WideCanIV->eraseFromParent();
752 WideCanIV->replaceAllUsesWith(WidenIV);
753 WideCanIV->eraseFromParent();
762 if (PHICost > BroadcastCost)
771 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
783 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
784 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
785 WideCanIV->replaceAllUsesWith(NewWideIV);
786 WideCanIV->eraseFromParent();
794 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
796 if (IsConditionalAssume)
799 if (R.mayHaveSideEffects())
803 return all_of(R.definedValues(), [](
VPValue *V) { return V->user_empty(); });
823 VPUser *PhiUser = PhiR->getSingleUser();
829 PhiR->replaceAllUsesWith(Start);
830 PhiR->eraseFromParent();
838 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
841 Users.insert_range(V->users());
843 return Users.takeVector();
857 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
894 Def->user_empty() || !Def->getUnderlyingValue() ||
895 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
908 Def->getUnderlyingInstr()->getOpcode(), Def->operands(),
910 Def->getUnderlyingInstr());
911 Clone->insertAfter(Def);
912 Def->replaceAllUsesWith(Clone);
923 PtrIV->replaceAllUsesWith(PtrAdd);
930 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
931 return U->usesScalars(WideIV);
937 Plan,
ID.getKind(),
ID.getInductionOpcode(),
939 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
940 WideIV->getDebugLoc(), Builder);
943 if (!HasOnlyVectorVFs) {
945 "plans containing a scalar VF cannot also include scalable VFs");
946 WideIV->replaceAllUsesWith(Steps);
949 WideIV->replaceUsesWithIf(Steps,
950 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
952 return U.usesFirstLaneOnly(WideIV);
953 return U.usesScalars(WideIV);
969 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
974 if (!Def || Def->getNumOperands() != 2)
982 auto IsWideIVInc = [&]() {
983 auto &
ID = WideIV->getInductionDescriptor();
986 VPValue *IVStep = WideIV->getStepValue();
987 switch (
ID.getInductionOpcode()) {
988 case Instruction::Add:
990 case Instruction::FAdd:
992 case Instruction::FSub:
995 case Instruction::Sub: {
1015 return IsWideIVInc() ? WideIV :
nullptr;
1032 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1043 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1044 FirstActiveLane =
B.createScalarZExtOrTrunc(
1045 FirstActiveLane, CanonicalIVType, FirstActiveLane->
getScalarType(),
DL);
1046 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1051 if (Incoming != WideIV) {
1053 EndValue =
B.createAdd(EndValue, One,
DL);
1058 VPIRValue *Start = WideIV->getStartValue();
1059 VPValue *Step = WideIV->getStepValue();
1060 EndValue =
B.createDerivedIV(
1062 Start, EndValue, Step);
1076 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1086 Start, VectorTC, Step);
1122 assert(EndValue &&
"Must have computed the end value up front");
1127 if (Incoming != WideIV)
1139 auto *Zero = Plan.
getZero(StepTy);
1140 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1145 return B.createNaryOp(
1146 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1148 : Instruction::FAdd,
1149 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1160 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1170 EndValues[WideIV] = EndValue;
1180 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1181 R.eraseFromParent();
1190 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1192 if (PredVPBB == MiddleVPBB)
1194 Plan, ExitIRI->getOperand(Idx), EndValues, PSE);
1197 Plan, ExitIRI->getOperand(Idx), PSE);
1199 ExitIRI->setOperand(Idx, Escape);
1216 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1220 ExpR->replaceAllUsesWith(V->second);
1224 ExpR->eraseFromParent();
1233 while (!WorkList.
empty()) {
1235 if (!Seen.
insert(Cur).second)
1243 R->eraseFromParent();
1250static std::optional<std::pair<bool, unsigned>>
1253 std::optional<std::pair<bool, unsigned>>>(R)
1256 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1258 return std::make_pair(
true,
I->getVectorIntrinsicID());
1260 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1266 I->getVPRecipeID());
1268 .
Default([](
auto *) {
return std::nullopt; });
1293 VPlan &Plan = *R.getParent()->getPlan();
1294 auto FoldToIRValue = [&]() ->
Value * {
1296 if (OpcodeOrIID->first) {
1298 return Folder.FoldIntrinsic(OpcodeOrIID->second,
Ops, R.getScalarType(),
1299 RFlags ? RFlags->getFastMathFlagsOrNone()
1302 unsigned Opcode = OpcodeOrIID->second;
1308 R.getVPSingleValue()->getScalarType());
1311 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1313 case Instruction::Select:
1314 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1315 case Instruction::ICmp:
1316 case Instruction::FCmp:
1319 case Instruction::GetElementPtr: {
1322 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1332 case Instruction::ExtractElement:
1339 if (
Value *V = FoldToIRValue())
1346 bool CanCreateNewRecipe) {
1347 VPlan *Plan = Def->getParent()->getPlan();
1357 Def->replaceAllUsesWith(
X);
1358 Def->eraseFromParent();
1370 Def->replaceAllUsesWith(
X);
1382 Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1388 Def->replaceAllUsesWith(
X);
1394 Def->replaceAllUsesWith(Plan->
getFalse());
1400 Def->replaceAllUsesWith(
X);
1405 if (CanCreateNewRecipe &&
1410 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1411 !Def->getOperand(1)->hasMoreThanOneUniqueUser())) {
1412 Def->replaceAllUsesWith(
1413 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1420 Def->replaceAllUsesWith(Def->getOperand(1));
1427 Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1433 Def->replaceAllUsesWith(Plan->
getFalse());
1438 Def->replaceAllUsesWith(
X);
1444 if (CanCreateNewRecipe &&
1446 Def->replaceAllUsesWith(Builder.createNot(
C));
1452 Def->setOperand(0,
C);
1453 Def->setOperand(1,
Y);
1454 Def->setOperand(2,
X);
1459 if (CanCreateNewRecipe &&
1463 Y->getScalarType()->isIntegerTy(1)) {
1464 Def->replaceAllUsesWith(
1465 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1474 VPlan *Plan = Def->getParent()->getPlan();
1480 return Def->replaceAllUsesWith(V);
1486 PredPHI->replaceAllUsesWith(
Op);
1493 RepR && RepR->isPredicated() && RepR->getOpcode() == Instruction::Store &&
1497 RepR->getUnderlyingInstr(), RepR->operandsWithoutMask(),
1498 RepR->isSingleScalar(),
nullptr, *RepR, *RepR,
1499 RepR->getDebugLoc());
1500 Unmasked->insertBefore(RepR);
1501 RepR->replaceAllUsesWith(Unmasked);
1502 RepR->eraseFromParent();
1516 bool CanCreateNewRecipe =
1521 Type *TruncTy = Def->getScalarType();
1522 Type *ATy =
A->getScalarType();
1523 if (TruncTy == ATy) {
1524 Def->replaceAllUsesWith(
A);
1533 : Instruction::ZExt;
1536 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1538 Ext->setUnderlyingValue(UnderlyingExt);
1540 Def->replaceAllUsesWith(Ext);
1542 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1543 Def->replaceAllUsesWith(Trunc);
1553 return Def->replaceAllUsesWith(
A);
1556 return Def->replaceAllUsesWith(
A);
1559 return Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1565 return Def->replaceAllUsesWith(Builder.createSub(
1566 Plan->
getZero(
A->getScalarType()),
A, Def->getDebugLoc(),
"", NW));
1569 if (CanCreateNewRecipe &&
1577 ->hasNoSignedWrap()};
1578 return Def->replaceAllUsesWith(
1579 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1588 MulR->hasNoSignedWrap() &&
1590 return Def->replaceAllUsesWith(Builder.createNaryOp(
1592 {A, Plan->getConstantInt(APC->getBitWidth(), ShiftAmt)}, NW,
1593 Def->getDebugLoc()));
1598 return Def->replaceAllUsesWith(Builder.createNaryOp(
1600 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1605 return Def->replaceAllUsesWith(
A);
1620 R->setOperand(1,
Y);
1621 R->setOperand(2,
X);
1625 R->replaceAllUsesWith(Cmp);
1630 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1631 Cmp->setDebugLoc(Def->getDebugLoc());
1643 if (
Op->getNumUsers() > 1 ||
1647 }
else if (!UnpairedCmp) {
1648 UnpairedCmp =
Op->getDefiningRecipe();
1652 UnpairedCmp =
nullptr;
1659 if (NewOps.
size() < Def->getNumOperands()) {
1661 return Def->replaceAllUsesWith(NewAnyOf);
1668 if (CanCreateNewRecipe &&
1674 return Def->replaceAllUsesWith(NewCmp);
1680 Def->getOperand(1)->getScalarType() == Def->getScalarType())
1681 return Def->replaceAllUsesWith(Def->getOperand(1));
1685 Type *WideStepTy = Def->getScalarType();
1686 if (
X->getScalarType() != WideStepTy)
1687 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1688 Def->replaceAllUsesWith(
X);
1697 Def->getScalarType()->isIntegerTy(1)) {
1698 Def->setOperand(1, Def->getOperand(0));
1699 Def->setOperand(0,
Y);
1706 return Def->replaceAllUsesWith(Def->getOperand(0));
1712 Def->replaceAllUsesWith(
1713 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1718 return Def->replaceAllUsesWith(
X);
1721 return Def->replaceAllUsesWith(
A);
1724 return Def->replaceAllUsesWith(
A);
1730 Def->replaceAllUsesWith(
1731 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1738 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1743 Def->replaceAllUsesWith(
1751 Def->replaceUsesWithIf(Def->getOperand(0), [Def](
VPUser &U,
unsigned) {
1752 return U.usesFirstLaneOnly(Def);
1761 "broadcast operand must be single-scalar");
1762 Def->setOperand(0,
C);
1767 return Def->replaceUsesWithIf(
1768 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1771 if (Def->getNumOperands() == 1) {
1772 Def->replaceAllUsesWith(Def->getOperand(0));
1777 Phi->replaceAllUsesWith(Phi->getOperand(0));
1783 if (Def->getNumOperands() == 1 &&
1785 return Def->replaceAllUsesWith(IRV);
1798 return Def->replaceAllUsesWith(
A);
1805 return Def->replaceAllUsesWith(WidenIV->getRegion()->getCanonicalIV());
1808 Def->replaceAllUsesWith(Builder.createNaryOp(
1809 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1823 auto *IVInc = Def->getOperand(0);
1824 if (IVInc->getNumUsers() == 2) {
1829 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1830 Def->replaceAllUsesWith(IVInc);
1832 Inc->replaceAllUsesWith(Phi);
1833 Phi->setOperand(0,
Y);
1849 Steps->replaceAllUsesWith(Steps->getOperand(0));
1857 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1859 return PhiR && PhiR->isInLoop();
1865 return Def->replaceAllUsesWith(
A);
1884 R.getVPSingleValue()->replaceAllUsesWith(
X);
1900 while (!Worklist.
empty()) {
1909 R->replaceAllUsesWith(
1910 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1914static std::optional<Instruction::BinaryOps>
1917 case Intrinsic::masked_udiv:
1918 return Instruction::UDiv;
1919 case Intrinsic::masked_sdiv:
1920 return Instruction::SDiv;
1921 case Intrinsic::masked_urem:
1922 return Instruction::URem;
1923 case Intrinsic::masked_srem:
1924 return Instruction::SRem;
1941 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1945 if (RepR && RepR->getOpcode() == Instruction::Store &&
1948 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1949 true ,
nullptr , *RepR ,
1950 *RepR , RepR->getDebugLoc());
1951 Clone->insertBefore(RepOrWidenR);
1953 VPValue *ExtractOp = Clone->getOperand(0);
1959 Clone->setOperand(0, ExtractOp);
1960 RepR->eraseFromParent();
1972 VPValue *SafeDivisor = Builder.createSelect(
1973 IntrR->getOperand(2), IntrR->getOperand(1),
1975 VPValue *Clone = Builder.createNaryOp(
1976 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1979 IntrR->eraseFromParent();
1988 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1997 return !U->usesScalars(
Op);
2001 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
2004 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
2005 IntroducesBCastOf(Op)))
2009 auto *IRV = dyn_cast<VPIRValue>(Op);
2010 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
2011 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
2012 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
2019 RepOrWidenR->getUnderlyingInstr());
2020 Clone->insertBefore(RepOrWidenR);
2021 RepOrWidenR->replaceAllUsesWith(Clone);
2023 RepOrWidenR->eraseFromParent();
2059 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
2060 UniqueValues.
insert(Blend->getIncomingValue(0));
2061 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
2063 UniqueValues.
insert(Blend->getIncomingValue(
I));
2065 if (UniqueValues.
size() == 1) {
2066 Blend->replaceAllUsesWith(*UniqueValues.
begin());
2067 Blend->eraseFromParent();
2071 if (Blend->isNormalized())
2077 unsigned StartIndex = 0;
2078 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2090 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2092 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2093 if (
I == StartIndex)
2095 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2096 OperandsWithMask.
push_back(Blend->getMask(
I));
2101 OperandsWithMask, *Blend, Blend->getDebugLoc());
2102 NewBlend->insertBefore(&R);
2104 VPValue *DeadMask = Blend->getMask(StartIndex);
2106 Blend->eraseFromParent();
2111 if (NewBlend->getNumOperands() == 3 &&
2113 VPValue *Inc0 = NewBlend->getOperand(0);
2114 VPValue *Inc1 = NewBlend->getOperand(1);
2115 VPValue *OldMask = NewBlend->getOperand(2);
2116 NewBlend->setOperand(0, Inc1);
2117 NewBlend->setOperand(1, Inc0);
2118 NewBlend->setOperand(2, NewMask);
2145 APInt MaxVal = AlignedTC - 1;
2148 unsigned NewBitWidth =
2154 bool MadeChange =
false;
2179 "canonical IV is not expected to have a truncation");
2184 NewWideIV->insertBefore(WideIV);
2191 Cmp->replaceAllUsesWith(
2192 VPBuilder(Cmp).createICmp(Cmp->getPredicate(), NewWideIV, NewBTC));
2206 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2208 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2222 const SCEV *VectorTripCount =
2227 "Trip count SCEV must be computable");
2248 auto *Term = &ExitingVPBB->
back();
2261 for (
unsigned Part = 0; Part < UF; ++Part) {
2267 Extracts[Part] = Ext;
2279 match(Phi->getBackedgeValue(),
2281 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2298 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2305 "Expected incoming values of Phi to be ActiveLaneMasks");
2310 EntryALM->setOperand(2, ALMMultiplier);
2311 LoopALM->setOperand(2, ALMMultiplier);
2315 ExtractFromALM(EntryALM, EntryExtracts);
2320 ExtractFromALM(LoopALM, LoopExtracts);
2322 Not->setOperand(0, LoopExtracts[0]);
2325 for (
unsigned Part = 0; Part < UF; ++Part) {
2326 Phis[Part]->setStartValue(EntryExtracts[Part]);
2327 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2340 auto *Term = &ExitingVPBB->
back();
2352 const SCEV *VectorTripCount =
2358 "Trip count SCEV must be computable");
2377 Term->setOperand(1, Plan.
getTrue());
2382 {}, Term->getDebugLoc());
2384 Term->eraseFromParent();
2417 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2427 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2428 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2446 RecurKind RK = PhiR->getRecurrenceKind();
2453 RecWithFlags->dropPoisonGeneratingFlags();
2459struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2468 return GEP->getSourceElementType();
2471 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2472 [](
auto *
I) {
return I->getSourceElementType(); })
2473 .
Default([](
auto *) {
return nullptr; });
2477 static bool canHandle(
const VPSingleDefRecipe *Def) {
2486 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2487 C->second == Instruction::ExtractValue)))
2493 return !
Def->mayReadFromMemory();
2497 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2500 getGEPSourceElementType(Def),
Def->getScalarType(),
2503 if (RFlags->hasPredicate())
2506 return hash_combine(Result, SIVSteps->getInductionOpcode());
2511 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2512 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2514 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2516 !
equal(
L->operands(),
R->operands()))
2519 "must have valid opcode info for both recipes");
2521 if (LFlags->hasPredicate() &&
2522 LFlags->getPredicate() !=
2526 if (LSIV->getInductionOpcode() !=
2532 const VPRegionBlock *RegionL =
L->getRegion();
2533 const VPRegionBlock *RegionR =
R->getRegion();
2536 L->getParent() !=
R->getParent())
2538 return L->getScalarType() ==
R->getScalarType();
2554 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2558 if (!VPDT.
dominates(V->getParent(), VPBB))
2563 Def->replaceAllUsesWith(V);
2576 bool Sinking =
false) {
2605 "Expected vector prehader's successor to be the vector loop region");
2613 return !Op->isDefinedOutsideLoopRegions();
2616 R.moveBefore(*Preheader, Preheader->
end());
2636 assert(!RepR->isPredicated() &&
2637 "Expected prior transformation of predicated replicates to "
2638 "replicate regions");
2643 if (!RepR->isSingleScalar())
2647 if (RepR->getOpcode() == Instruction::Store &&
2648 !RepR->getOperand(1)->isDefinedOutsideLoopRegions())
2653 assert((!R.mayWriteToMemory() ||
2654 (RepR && RepR->getOpcode() == Instruction::Store &&
2655 RepR->getOperand(1)->isDefinedOutsideLoopRegions())) &&
2656 "The only recipes that may write to memory are expected to be "
2657 "stores with invariant pointer-operand");
2667 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2668 auto *UserR = cast<VPRecipeBase>(U);
2669 VPBasicBlock *Parent = UserR->getParent();
2671 if (SinkBB && SinkBB != Parent)
2676 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2677 Parent->getSinglePredecessor() != LoopRegion;
2687 "Defining block must dominate sink block");
2712 VPValue *ResultVPV = R.getVPSingleValue();
2714 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2715 if (!NewResSizeInBits)
2728 (void)OldResSizeInBits;
2736 VPW->dropPoisonGeneratingFlags();
2738 assert((OldResSizeInBits != NewResSizeInBits ||
2740 "Only ICmps should not need extending the result.");
2746 if (OldResSizeInBits != NewResSizeInBits) {
2748 Instruction::ZExt, ResultVPV, OldResTy);
2750 Ext->setOperand(0, ResultVPV);
2760 unsigned OpSizeInBits =
Op->getScalarType()->getScalarSizeInBits();
2761 if (OpSizeInBits == NewResSizeInBits)
2763 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2764 auto [ProcessedIter, Inserted] = ProcessedTruncs.
try_emplace(
Op);
2770 Builder.setInsertPoint(&R);
2771 ProcessedIter->second =
2772 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2774 Op = ProcessedIter->second;
2778 NWR->insertBefore(&R);
2782 VPValue *Replacement = NWR->getVPSingleValue();
2783 if (OldResSizeInBits != NewResSizeInBits)
2789 R.eraseFromParent();
2795 std::optional<VPDominatorTree> VPDT;
2803 bool SimplifiedPhi =
false;
2813 assert(VPBB->getNumSuccessors() == 2 &&
2814 "Two successors expected for BranchOnCond");
2815 unsigned RemovedIdx;
2826 "There must be a single edge between VPBB and its successor");
2829 auto Phis = RemovedSucc->
phis();
2832 SimplifiedPhi |= !std::empty(Phis);
2836 VPBB->back().eraseFromParent();
2848 if (Reachable.contains(
B))
2859 for (
VPValue *Def : R.definedValues())
2860 Def->replaceAllUsesWith(&Tmp);
2861 R.eraseFromParent();
2865 return SimplifiedPhi;
2920 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2931 auto *EntryIncrement = Builder.createOverflowingOp(
2933 DL,
"index.part.next");
2939 {EntryIncrement, TC, ALMMultiplier},
DL,
2940 "active.lane.mask.entry");
2947 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2952 Builder.setInsertPoint(OriginalTerminator);
2953 auto *InLoopIncrement = Builder.createOverflowingOp(
2955 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2957 {InLoopIncrement, TC, ALMMultiplier},
DL,
2958 "active.lane.mask.next");
2959 LaneMaskPhi->addBackedgeValue(ALM);
2963 auto *NotMask = Builder.createNot(ALM,
DL);
2970 bool UseActiveLaneMaskForControlFlow) {
2972 auto *WideCanonicalIV =
2974 assert(WideCanonicalIV &&
2975 "Must have widened canonical IV when tail folding!");
2978 if (UseActiveLaneMaskForControlFlow) {
2987 nullptr,
"active.lane.mask");
3003 template <
typename OpTy>
bool match(OpTy *V)
const {
3014template <
typename Op0_t,
typename Op1_t>
3022 case Intrinsic::masked_udiv:
3023 return Intrinsic::vp_udiv;
3024 case Intrinsic::masked_sdiv:
3025 return Intrinsic::vp_sdiv;
3026 case Intrinsic::masked_urem:
3027 return Intrinsic::vp_urem;
3028 case Intrinsic::masked_srem:
3029 return Intrinsic::vp_srem;
3031 return std::nullopt;
3046 VPValue *Addr, *Mask, *EndPtr;
3049 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3051 EVLEndPtr->insertBefore(&CurRecipe);
3056 EVLEndPtr->setOperand(1, EVLAsVF);
3060 auto GetVPReverse = [&CurRecipe, &EVL, Plan,
3065 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
3066 V->getScalarType(), {}, {},
DL);
3067 Reverse->insertBefore(&CurRecipe);
3071 if (
match(&CurRecipe,
3076 if (
match(&CurRecipe,
3080 Mask = GetVPReverse(Mask);
3081 Addr = AdjustEndPtr(EndPtr);
3084 LoadR->insertBefore(&CurRecipe);
3088 LoadR->getScalarType(), {}, {},
DL);
3099 NewLoad->setOperand(2, Mask);
3100 NewLoad->setOperand(3, &EVL);
3108 StoredVal, EVL, Mask);
3110 if (
match(&CurRecipe,
3114 Mask = GetVPReverse(Mask);
3115 Addr = AdjustEndPtr(EndPtr);
3118 Intrinsic::vector_splice_right, {StoredVal,
Poison, &EVL},
3122 SpliceR, EVL, Mask);
3126 if (Rdx->isConditional() &&
3131 if (Interleave->getMask() &&
3139 Intrinsic::vp_merge, {Mask ? Mask : Plan->
getTrue(),
LHS,
RHS, &EVL},
3140 LHS->getScalarType(), {}, {},
DL);
3153 if (
match(&CurRecipe,
3158 LHS->getScalarType(), {}, {},
DL);
3164 {IntrR->getOperand(0),
3165 IntrR->getOperand(1),
3166 Mask ? Mask : Plan->
getTrue(), &EVL},
3167 IntrR->getScalarType(), {}, {},
DL);
3176 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3181 HeaderMask = R.getVPSingleValue();
3192 NewR->insertBefore(R);
3193 for (
auto [Old, New] :
3194 zip_equal(R->definedValues(), NewR->definedValues()))
3195 Old->replaceAllUsesWith(New);
3208 Mask->getScalarType(), {}, {}, LogicalAnd->getDebugLoc());
3209 Merge->insertBefore(LogicalAnd);
3210 LogicalAnd->replaceAllUsesWith(
Merge);
3226 R->getVPSingleValue()->replaceAllUsesWith(
X);
3240 Intrinsic::experimental_vp_reverse, {
X, Plan.
getTrue(), EVL},
3241 X->getScalarType(), {}, {}, R->getDebugLoc());
3242 VPReverse->insertBefore(R);
3243 R->getVPSingleValue()->replaceAllUsesWith(VPReverse);
3249 R->eraseFromParent();
3270 auto IsAllowedUser =
3271 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3272 VPWidenIntOrFpInductionRecipe,
3273 VPWidenMemIntrinsicRecipe>;
3274 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3275 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3277 return IsAllowedUser(U);
3279 "User of VF that we can't transform to EVL.");
3289 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3290 "increment of the canonical induction.");
3306 MaxEVL = Builder.createScalarZExtOrTrunc(
3310 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3311 VPValue *PrevEVL = Builder.createScalarPhi(
3325 Intrinsic::experimental_vp_splice,
3326 {
V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3327 R.getVPSingleValue()->getScalarType(), {}, {}, R.getDebugLoc());
3329 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3342 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3343 m_VPValue(), m_VPValue()))))
3344 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3345 Plan.getVectorLoopRegion();
3357 VPValue *EVLMask = Builder.createICmp(
3417 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3429 auto *CurrentIteration =
3431 CurrentIteration->insertBefore(*Header, Header->begin());
3432 VPBuilder Builder(Header, Header->getFirstNonPhi());
3435 VPPhi *AVLPhi = Builder.createScalarPhi(
3439 if (MaxSafeElements) {
3449 Builder.setInsertPoint(CanonicalIVIncrement);
3453 OpVPEVL = Builder.createScalarZExtOrTrunc(
3454 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3456 auto *NextIter = Builder.createAdd(
3457 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3458 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3459 CurrentIteration->addBackedgeValue(NextIter);
3463 "avl.next", {
true,
false});
3471 CanonicalIV->replaceUsesWithIf(CurrentIteration,
3472 [CanonicalIVIncrement](
VPUser &U,
unsigned) {
3473 return &U != CanonicalIVIncrement;
3488 assert(!CurrentIteration &&
3489 "Found multiple CurrentIteration. Only one expected");
3490 CurrentIteration = PhiR;
3494 if (!CurrentIteration)
3505 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3514 CanIVInc->eraseFromParent();
3523 if (Header->empty())
3532 if (!
match(EVLPhi->getBackedgeValue(),
3545 [[maybe_unused]]
bool FoundAVLNext =
3548 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3556 [[maybe_unused]]
bool FoundIncrement =
match(
3563 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3568 LatchBr->setOperand(
3580 "expected to run before loop regions are created");
3582 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3585 return VPDT.
dominates(Preheader, Parent);
3588 for (
const SCEV *Stride : StridesMap.
values()) {
3591 const APInt *StrideConst;
3614 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3621 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3624 if (NewSCEV != ScevExpr) {
3626 ExpSCEV->replaceAllUsesWith(NewExp);
3637 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3642 while (!Worklist.
empty()) {
3645 if (!Visited.
insert(CurRec).second)
3667 RecWithFlags->isDisjoint()) {
3670 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3671 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3672 RecWithFlags->replaceAllUsesWith(New);
3673 RecWithFlags->eraseFromParent();
3676 RecWithFlags->dropPoisonGeneratingFlags();
3681 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3682 "found instruction with poison generating flags not covered by "
3683 "VPRecipeWithIRFlags");
3688 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3696 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3708 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3709 if (AddrDef && WidenRec->isConsecutive() &&
3710 IsNotHeaderMask(WidenRec->getMask()))
3711 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3713 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3714 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3715 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3725 const bool &EpilogueAllowed) {
3726 if (InterleaveGroups.empty())
3737 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3744 for (
const auto *IG : InterleaveGroups) {
3749 return !IRMemberToRecipe.contains(Member);
3753 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3757 StoredValues.
push_back(StoreR->getStoredValue());
3758 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3764 StoredValues.
push_back(StoreR->getStoredValue());
3768 bool NeedsMaskForGaps =
3769 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3770 (!StoredValues.
empty() && !IG->isFull());
3773 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3782 VPValue *Addr = Start->getAddr();
3791 assert(IG->getIndex(IRInsertPos) != 0 &&
3792 "index of insert position shouldn't be zero");
3796 IG->getIndex(IRInsertPos),
3800 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3806 if (IG->isReverse()) {
3809 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3810 ReversePtr->insertBefore(InsertPosR);
3814 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3816 VPIG->insertBefore(InsertPosR);
3819 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3822 if (!Member->getType()->isVoidTy()) {
3880 AddOp = Instruction::Add;
3881 MulOp = Instruction::Mul;
3883 AddOp =
ID.getInductionOpcode();
3884 MulOp = Instruction::FMul;
3892 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3893 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3902 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3907 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3908 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3926 if (R->getParent()->getEnclosingLoopRegion())
3927 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3932 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3935 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3937 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3944 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3947 WidePHI->addIncoming(
Next);
3974 VPlan *Plan = R->getParent()->getPlan();
3975 VPValue *Start = R->getStartValue();
3976 VPValue *Step = R->getStepValue();
3977 VPValue *VF = R->getVFValue();
3979 assert(R->getInductionDescriptor().getKind() ==
3981 "Not a pointer induction according to InductionDescriptor!");
3982 assert(R->getScalarType()->isPointerTy() &&
"Unexpected type.");
3984 "Recipe should have been replaced");
3990 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3994 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3997 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3999 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
4000 R->replaceAllUsesWith(PtrAdd);
4005 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
4006 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
4009 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
4017 VPValue *Step = R->getStepValue();
4018 VPValue *Index = R->getIndex();
4022 ? Builder.createScalarSExtOrTrunc(
4024 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
4026 switch (R->getInductionKind()) {
4028 assert(Index->getScalarType() == Start->getScalarType() &&
4029 "Index type does not match StartValue type");
4030 return R->replaceAllUsesWith(Builder.createAdd(
4031 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
4034 return R->replaceAllUsesWith(Builder.createPtrAdd(
4035 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
4040 (FPBinOp->
getOpcode() == Instruction::FAdd ||
4041 FPBinOp->
getOpcode() == Instruction::FSub) &&
4042 "Original BinOp should be defined for FP induction");
4044 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
4045 return R->replaceAllUsesWith(
4046 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
4059 if (!R->isReplicator())
4063 R->dissolveToCFGLoop();
4084 assert(Br->getNumOperands() == 2 &&
4085 "BranchOnTwoConds must have exactly 2 conditions");
4089 assert(Successors.size() == 3 &&
4090 "BranchOnTwoConds must have exactly 3 successors");
4095 VPValue *Cond0 = Br->getOperand(0);
4096 VPValue *Cond1 = Br->getOperand(1);
4103 if (Succ0 == Succ1) {
4105 VPValue *Combined = Builder.createOr(Cond0, Cond1,
DL);
4109 Br->eraseFromParent();
4114 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4127 Br->eraseFromParent();
4138 WidenIVR->eraseFromParent();
4148 WidenIVR->replaceAllUsesWith(PtrAdd);
4149 WidenIVR->eraseFromParent();
4153 WidenIVR->eraseFromParent();
4159 DerivedIVR->eraseFromParent();
4164 VPValue *CanIV = WideCanIV->getCanonicalIV();
4166 VPValue *Step = WideCanIV->getStepValue();
4169 "Expected unroller to have materialized step for UF != 1");
4174 Step = Builder.createAdd(
4177 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4178 WideCanIV->getNoWrapFlags());
4180 WideCanIV->eraseFromParent();
4187 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4188 Select = Builder.createSelect(Blend->getMask(
I),
4189 Blend->getIncomingValue(
I),
Select,
4190 R.getDebugLoc(),
"predphi", *Blend);
4191 Blend->replaceAllUsesWith(
Select);
4192 Blend->eraseFromParent();
4197 if (!VEPR->getOffset()) {
4199 "Expected unroller to have materialized offset for UF != 1");
4200 VEPR->materializeOffset();
4207 Expr->eraseFromParent();
4217 for (
VPValue *
Op : LastActiveL->operands()) {
4218 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4223 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4224 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4230 Builder.createSub(FirstInactiveLane, One,
4231 LastActiveL->getDebugLoc(),
"last.active.lane");
4234 LastActiveL->eraseFromParent();
4241 assert(VPI->isMasked() &&
4242 "Unmasked MaskedCond should be simplified earlier");
4243 VPI->replaceAllUsesWith(Builder.createNaryOp(
4245 VPI->eraseFromParent();
4255 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4256 VPI->getDebugLoc());
4257 VPI->replaceAllUsesWith(
Add);
4258 VPI->eraseFromParent();
4266 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4269 BranchOnCountInst->eraseFromParent();
4284 ? Instruction::UIToFP
4285 : Instruction::Trunc;
4286 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4292 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4298 MulOpc = Instruction::FMul;
4299 Flags = VPI->getFastMathFlagsOrNone();
4301 MulOpc = Instruction::Mul;
4306 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4308 VPI->replaceAllUsesWith(VectorStep);
4309 VPI->eraseFromParent();
4319static std::optional<VPValue *>
4372 VPValue *UncountableCondition =
nullptr;
4376 return std::nullopt;
4379 Worklist.
push_back(UncountableCondition);
4380 while (!Worklist.
empty()) {
4384 if (V->isDefinedOutsideLoopRegions())
4390 if (V->getNumUsers() > 1)
4391 return std::nullopt;
4403 return std::nullopt;
4407 return std::nullopt;
4415 return std::nullopt;
4423 return std::nullopt;
4425 return UncountableCondition;
4481 for (
auto &Exit : Exits) {
4482 if (Exit.EarlyExitingVPBB == LatchVPBB)
4486 cast<VPIRPhi>(&R)->removeIncomingValueFor(Exit.EarlyExitingVPBB);
4487 Exit.EarlyExitingVPBB->getTerminator()->eraseFromParent();
4498 std::optional<VPValue *>
Cond =
4514 assert(Load &&
"Couldn't find exactly one load");
4517 "Uncountable exit condition load is conditional.");
4531 DL.getTypeStoreSize(Load->getScalarType()).getFixedValue());
4555 while (InsertIt != HeaderVPBB->
end() &&
4557 erase(ConditionRecipes, &*InsertIt);
4560 for (
auto *Recipe :
reverse(ConditionRecipes))
4561 Recipe->moveBefore(*HeaderVPBB, InsertIt);
4565 VPBuilder MaskBuilder(HeaderVPBB, InsertIt);
4567 Type *IVScalarTy =
IV->getScalarType();
4574 {Zero, FirstActive, ALMMultiplier},
4575 DebugLoc(),
"uncountable.exit.mask");
4580 if (R.mayReadOrWriteMemory() && &R != Load) {
4582 if (!VPDT.
dominates(R.getParent(), LatchVPBB))
4592 "Expected BranchOnCond terminator for MiddleVPBB");
4603 auto Phis = ScalarPH->
phis();
4613 "Continuing from different IV");
4629 if (Pred == MiddleVPBB)
4634 VPValue *CondOfEarlyExitingVPBB;
4635 [[maybe_unused]]
bool Matched =
4636 match(EarlyExitingVPBB->getTerminator(),
4638 assert(Matched &&
"Terminator must be BranchOnCond");
4642 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4643 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4645 TrueSucc == ExitBlock
4646 ? CondOfEarlyExitingVPBB
4647 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4653 "exit condition must dominate the latch");
4662 assert(!Exits.
empty() &&
"must have at least one early exit");
4669 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4672 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4678 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4679 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4681 Exits[
I].EarlyExitingVPBB) &&
4682 "RPO sort must place dominating exits before dominated ones");
4688 VPValue *Combined = Exits[0].CondToExit;
4701 "Unexpected terminator");
4702 VPValue *IsLatchExitTaken = LatchExitingBranch->getOperand(0);
4703 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4704 LatchExitingBranch->eraseFromParent();
4707 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4713 LatchVPBB->
setSuccessors({MiddleVPBB, MiddleVPBB, HeaderVPBB});
4717 Plan, Exits, HeaderVPBB, LatchVPBB, MiddleVPBB, TheLoop, PSE, DT, AC);
4722 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4726 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4734 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4737 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4769 for (
auto [Exit, VectorEarlyExitVPBB] :
4770 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4771 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4783 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4784 VPValue *NewIncoming = IncomingVal;
4786 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4791 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4792 ExitIRI->addIncoming(NewIncoming);
4795 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4829 bool IsLastDispatch = (
I + 2 == Exits.
size());
4831 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4837 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4840 CurrentBB = FalseBB;
4855 VPValue *VecOp = Red->getVecOp();
4857 assert(!Red->isPartialReduction() &&
4858 "This path does not support partial reductions");
4861 auto IsExtendedRedValidAndClampRange =
4874 "getExtendedReductionCost only supports integer types");
4875 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4876 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4877 Red->getFastMathFlagsOrNone(),
CostKind);
4878 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4886 IsExtendedRedValidAndClampRange(
4907 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4908 Opcode != Instruction::FAdd)
4911 assert(!Red->isPartialReduction() &&
4912 "This path does not support partial reductions");
4916 auto IsMulAccValidAndClampRange =
4928 (Ext0->getOpcode() != Ext1->getOpcode() ||
4929 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4933 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4935 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4942 ExtCost += Ext0->computeCost(VF, Ctx);
4944 ExtCost += Ext1->computeCost(VF, Ctx);
4946 ExtCost += OuterExt->computeCost(VF, Ctx);
4948 return MulAccCost.
isValid() &&
4949 MulAccCost < ExtCost + MulCost + RedCost;
4954 VPValue *VecOp = Red->getVecOp();
4992 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4994 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4995 Mul->setOperand(1, ExtB);
5005 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
5010 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
5017 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
5034 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
5043 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
5044 Ext0->getOpcode() == Ext1->getOpcode() &&
5045 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
5047 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
5048 *Ext0, *Ext0, Ext0->getDebugLoc());
5049 NewExt0->insertBefore(Ext0);
5054 Ext->getScalarType(),
nullptr, *Ext1,
5055 *Ext1, Ext1->getDebugLoc());
5058 auto *NewMul =
Mul->cloneWithOperands({NewExt0, NewExt1});
5059 NewMul->insertBefore(
Mul);
5060 Ext->replaceAllUsesWith(NewMul);
5061 Ext->eraseFromParent();
5062 Mul->eraseFromParent();
5076 assert(!Red->isPartialReduction() &&
5077 "This path does not support partial reductions");
5080 auto IP = std::next(Red->getIterator());
5081 auto *VPBB = Red->getParent();
5091 Red->replaceAllUsesWith(AbstractR);
5121 for (
VPValue *VPV : VPValues) {
5130 if (
User->usesScalars(VPV))
5133 HoistPoint = HoistBlock->
begin();
5137 "All users must be in the vector preheader or dominated by it");
5142 VPV->replaceUsesWithIf(Broadcast,
5143 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
5144 return Broadcast != &U && !U.usesScalars(VPV);
5155 return CommonMetadata;
5158template <
unsigned Opcode>
5163 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
5164 "Only Load and Store opcodes supported");
5165 [[maybe_unused]]
constexpr bool IsLoad = (Opcode == Instruction::Load);
5172 for (
auto Recipes :
Groups) {
5173 if (Recipes.size() < 2)
5178 "Expected all recipes in group to have the same load-store type");
5185 VPValue *MaskI = RecipeI->getMask();
5191 bool HasComplementaryMask =
false;
5196 VPValue *MaskJ = RecipeJ->getMask();
5205 if (HasComplementaryMask) {
5206 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
5216template <
typename InstType>
5234 for (
auto &Group :
Groups) {
5254 return R->isSingleScalar() == IsSingleScalar;
5256 "all members in group must agree on IsSingleScalar");
5261 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
5262 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
5264 UnpredicatedLoad->insertBefore(EarliestLoad);
5268 Load->replaceAllUsesWith(UnpredicatedLoad);
5269 Load->eraseFromParent();
5278 if (!StoreLoc || !StoreLoc->AATags.Scope)
5285 SinkStoreInfo SinkInfo(StoresToSink, *StoresToSink[0], PSE, L);
5297 for (
auto &Group :
Groups) {
5310 VPValue *SelectedValue = Group[0]->getOperand(0);
5313 bool IsSingleScalar = Group[0]->isSingleScalar();
5314 for (
unsigned I = 1;
I < Group.size(); ++
I) {
5315 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
5316 "all members in group must agree on IsSingleScalar");
5317 VPValue *Mask = Group[
I]->getMask();
5319 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
5328 StoreWithMinAlign->getUnderlyingInstr(),
5329 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
5330 nullptr, *LastStore, CommonMetadata);
5331 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
5335 Store->eraseFromParent();
5342 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
5343 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5406 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5408 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5415 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5425 DefR->replaceUsesWithIf(
5426 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5428 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5442 for (
VPValue *Def : R.definedValues()) {
5452 unsigned NumFirstLaneUsers =
count_if(Def->users(), [&Def](
VPUser *U) {
5453 return U->usesFirstLaneOnly(Def);
5455 if (!NumFirstLaneUsers || NumFirstLaneUsers == Def->getNumUsers())
5462 Unpack->insertAfter(&R);
5463 Def->replaceUsesWithIf(Unpack, [&Def](
VPUser &U,
unsigned) {
5464 return U.usesFirstLaneOnly(Def);
5473 bool RequiresScalarEpilogue,
VPValue *Step,
5474 std::optional<uint64_t> MaxRuntimeStep) {
5486 "Step VPBB must dominate VectorPHVPBB");
5488 InsertPt = std::next(StepR->getIterator());
5490 VPBuilder Builder(VectorPHVPBB, InsertPt);
5496 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5497 TCVal->
urem(*MaxRuntimeStep) == 0) {
5508 if (TailByMasking) {
5509 TC = Builder.createAdd(
5520 Builder.createNaryOp(Instruction::URem, {TC, Step},
5529 if (RequiresScalarEpilogue) {
5531 "requiring scalar epilogue is not supported with fail folding");
5534 R = Builder.createSelect(IsZero, Step, R);
5548 "VF and VFxUF must be materialized together");
5560 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5567 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5571 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5575 VPValue *MulByUF = Builder.createOverflowingOp(
5588 auto *AliasMask = Builder.createNaryOp(
5592 if (HeaderMaskDef->isPhi())
5593 Builder =
VPBuilder(&*HeaderMaskDef->getParent()->getFirstNonPhi());
5598 auto *ClampedHeaderMask = Builder.createAnd(HeaderMask, AliasMask);
5600 return &U != ClampedHeaderMask;
5611 assert(IncomingAliasMask &&
"Expected an alias mask!");
5621 if (
Check.NeedsFreeze) {
5631 Intrinsic::loop_dependence_war_mask,
5635 AliasMask = Builder.createAnd(AliasMask, WARMask);
5637 AliasMask = WARMask;
5642 VPValue *NumActive = Builder.createNaryOp(
5645 VPValue *ClampedVF = Builder.createScalarZExtOrTrunc(
5671 VPValue *DistanceToMax = Builder.createSub(MaxUIntTripCount, TripCount);
5679 VPValue *TripCountCheck = Builder.createICmp(
5682 VPValue *
Cond = Builder.createOr(IsScalar, TripCountCheck,
DL);
5693 "Clamped VF not supported with interleaving");
5701 VPBuilder Builder(Entry, Entry->begin());
5713 if (!ExpSCEV || ExpSCEV->user_empty())
5715 Builder.setInsertPoint(ExpSCEV);
5724 ExpSCEV->eraseFromParent();
5733 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5740 const SCEV *Expr = ExpSCEV->getSCEV();
5743 ExpandedSCEVs[Expr] = Res;
5748 ExpSCEV->eraseFromParent();
5751 "all VPExpandSCEVRecipes must have been expanded");
5754 auto EI = Entry->begin();
5764 return ExpandedSCEVs;
5778 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5783 if (Member0Op == OpV)
5793 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5796 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5811 if (R->getScalarType() != WideMember0->getScalarType())
5813 if (R->hasPredicate() && R->getPredicate() != WideMember0->getPredicate())
5817 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5820 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5825 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5826 const auto &[
OpIdx, OpV] =
P;
5838static std::optional<ElementCount>
5842 if (!InterleaveR || InterleaveR->
getMask())
5843 return std::nullopt;
5845 Type *GroupElementTy =
nullptr;
5849 return Op->getScalarType() == GroupElementTy;
5851 return std::nullopt;
5855 return Op->getScalarType() == GroupElementTy;
5857 return std::nullopt;
5861 if (IG->getFactor() != IG->getNumMembers())
5862 return std::nullopt;
5868 assert(
Size.isScalable() == VF.isScalable() &&
5869 "if Size is scalable, VF must be scalable and vice versa");
5870 return Size.getKnownMinValue();
5874 unsigned MinVal = VF.getKnownMinValue();
5876 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5879 return std::nullopt;
5887 return RepR && RepR->isSingleScalar();
5901 if (V->isDefinedOutsideLoopRegions()) {
5904 return M->isDefinedOutsideLoopRegions() &&
5905 M->getScalarType() == V->getScalarType();
5907 "expected distinct loop-invariant values of matching scalar type");
5922 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx) {
5924 for (
VPValue *Member : Members)
5925 OpsI.
push_back(Member->getDefiningRecipe()->getOperand(Idx));
5926 WideMember0->setOperand(
5935 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5937 LoadGroup->getMask(),
true,
5938 *LoadGroup, LoadGroup->getDebugLoc());
5939 L->insertBefore(LoadGroup);
5945 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5946 "must be a single scalar load");
5947 NarrowedOps.
insert(RepR);
5952 VPValue *PtrOp = WideLoad->getAddr();
5954 PtrOp = VecPtr->getOperand(0);
5959 nullptr, {}, *WideLoad);
5960 N->insertBefore(WideLoad);
5965std::unique_ptr<VPlan>
5985 "unexpected branch-on-count");
5988 std::optional<ElementCount> VFToOptimize;
6002 if (R.mayWriteToMemory() && !InterleaveR)
6008 return any_of(V->users(), [&](VPUser *U) {
6009 auto *UR = cast<VPRecipeBase>(U);
6010 return UR->getParent()->getParent() != VectorLoop;
6027 std::optional<ElementCount> NarrowedVF =
6029 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
6031 VFToOptimize = NarrowedVF;
6034 if (InterleaveR->getStoredValues().empty())
6039 auto *Member0 = InterleaveR->getStoredValues()[0];
6049 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
6052 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
6053 return IR && IR->getInterleaveGroup()->isFull() &&
6054 IR->getVPValue(Op.index()) == Op.value();
6063 VFToOptimize->isScalable()))
6068 if (StoreGroups.empty())
6072 bool RequiresScalarEpilogue =
6083 std::unique_ptr<VPlan> NewPlan;
6085 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
6086 Plan.
setVF(*VFToOptimize);
6087 NewPlan->removeVF(*VFToOptimize);
6094 for (
auto *StoreGroup : StoreGroups) {
6096 NarrowedOps, Preheader);
6101 StoreGroup->getDebugLoc());
6102 S->insertBefore(StoreGroup);
6103 StoreGroup->eraseFromParent();
6109 Type *CanIVTy = VectorLoop->getCanonicalIVType();
6115 if (VFToOptimize->isScalable()) {
6118 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
6126 materializeVectorTripCount(Plan, VectorPH,
false,
6127 RequiresScalarEpilogue, Step);
6132 removeDeadRecipes(Plan);
6135 "All VPVectorPointerRecipes should have been removed");
6151 "must have a BranchOnCond");
6154 if (VF.
isScalable() && VScaleForTuning.has_value())
6155 VectorStep *= *VScaleForTuning;
6156 assert(VectorStep > 0 &&
"trip count should not be zero");
6160 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
6179 "Cannot handle loops with uncountable early exits");
6186 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
6193 if (
any_of(RecurSplice->users(),
6194 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
6275 {},
"vector.recur.extract.for.phi");
6278 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
6292 VPValue *WidenIVCandidate = BinOp->getOperand(0);
6293 VPValue *InvariantCandidate = BinOp->getOperand(1);
6295 std::swap(WidenIVCandidate, InvariantCandidate);
6309 auto *ClonedOp = BinOp->
clone();
6310 if (ClonedOp->getOperand(0) == WidenIV) {
6311 ClonedOp->setOperand(0, ScalarIV);
6313 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
6314 ClonedOp->setOperand(1, ScalarIV);
6329 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
6330 bool UseMax) -> std::optional<APSInt> {
6332 for (
bool Signed : {
true,
false}) {
6341 return std::nullopt;
6349 PhiR->getRecurrenceKind()))
6358 VPValue *BackedgeVal = PhiR->getBackedgeValue();
6372 !
match(FindLastSelect,
6381 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
6387 "IVOfExpressionToSink not being an AddRec must imply "
6388 "FindLastExpression not being an AddRec.");
6399 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
6400 bool UseSigned = SentinelVal && SentinelVal->isSigned();
6407 if (IVOfExpressionToSink) {
6408 const SCEV *FindLastExpressionSCEV =
6410 if (
match(FindLastExpressionSCEV,
6413 if (
auto NewSentinel =
6414 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
6417 SentinelVal = *NewSentinel;
6418 UseSigned = NewSentinel->isSigned();
6420 IVSCEV = FindLastExpressionSCEV;
6421 IVOfExpressionToSink =
nullptr;
6431 if (AR->hasNoSignedWrap())
6433 else if (AR->hasNoUnsignedWrap())
6443 VPValue *NewFindLastSelect = BackedgeVal;
6445 if (!SentinelVal || IVOfExpressionToSink) {
6448 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
6449 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
6450 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
6451 SelectCond = LoopBuilder.
createNot(SelectCond);
6458 if (SelectCond !=
Cond || IVOfExpressionToSink) {
6461 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
6470 VPIRFlags Flags(MinMaxKind,
false,
false,
6476 NewFindLastSelect, Flags, ExitDL);
6479 VPValue *VectorRegionExitingVal = ReducedIV;
6480 if (IVOfExpressionToSink)
6481 VectorRegionExitingVal =
6483 ReducedIV, IVOfExpressionToSink);
6486 VPValue *StartVPV = PhiR->getStartValue();
6493 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
6503 AnyOfPhi->insertAfter(PhiR);
6510 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6523 PhiR->hasUsesOutsideReductionChain());
6524 NewPhiR->insertBefore(PhiR);
6525 PhiR->replaceAllUsesWith(NewPhiR);
6526 PhiR->eraseFromParent();
6533struct ReductionExtend {
6534 Type *SrcType =
nullptr;
6535 ExtendKind Kind = ExtendKind::PR_None;
6541struct ExtendedReductionOperand {
6545 ReductionExtend ExtendA, ExtendB;
6553struct VPPartialReductionChain {
6556 VPWidenRecipe *ReductionBinOp =
nullptr;
6558 ExtendedReductionOperand ExtendedOp;
6565 unsigned AccumulatorOpIdx;
6566 unsigned ScaleFactor;
6569 VPBlendRecipe *Blend =
nullptr;
6581 if (!
Op->hasOneUse() ||
6587 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6588 Op->getOperand(1), NarrowTy);
6590 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6599 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6601 assert(Ext->getOpcode() ==
6603 "Expected both the LHS and RHS extends to be the same");
6604 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6607 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6608 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6609 auto *
Max = Builder.insert(
6611 {FreezeX, FreezeY}, SrcTy));
6612 auto *Min = Builder.insert(
6614 {FreezeX, FreezeY}, SrcTy));
6617 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6618 Op->getScalarType());
6630 if (!
Mul->hasOneUse() ||
6631 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6632 MulLHS->getOpcode() != MulRHS->getOpcode())
6635 auto *NewLHS = Builder.createWidenCast(
6636 MulLHS->getOpcode(), MulLHS->getOperand(0), Ext->getScalarType());
6637 auto *NewRHS = MulLHS == MulRHS
6639 : Builder.createWidenCast(MulRHS->getOpcode(),
6640 MulRHS->getOperand(0),
6641 Ext->getScalarType());
6642 auto *NewMul =
Mul->cloneWithOperands({NewLHS, NewRHS});
6643 Builder.insert(NewMul);
6644 Op->replaceAllUsesWith(NewMul);
6645 Op->eraseFromParent();
6646 Mul->eraseFromParent();
6655 VPValue *VecOp = Red->getVecOp();
6709static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6717 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6720 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp);
6736 if ((WidenRecipe->
getOpcode() == Instruction::Sub &&
6738 (WidenRecipe->
getOpcode() == Instruction::FSub &&
6743 if (WidenRecipe->
getOpcode() == Instruction::FSub) {
6753 Builder.insert(NegRecipe);
6754 ExtendedOp = NegRecipe;
6779 assert((!ExitValue || IsLastInChain) &&
6780 "if we found ExitValue, it must match RdxPhi's backedge value");
6791 PartialRed->insertBefore(WidenRecipe);
6801 E->insertBefore(WidenRecipe);
6802 PartialRed->replaceAllUsesWith(
E);
6815 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6816 StartInst->setOperand(2, NewScaleFactor);
6824 VPValue *OldStartValue = StartInst->getOperand(0);
6825 StartInst->setOperand(0, StartInst->getOperand(1));
6829 assert(RdxResult &&
"Could not find reduction result");
6832 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6833 : Instruction::BinaryOps::Sub;
6839 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6845 const VPPartialReductionChain &Link,
6848 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6849 std::optional<unsigned> BinOpc = std::nullopt;
6851 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6852 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6854 std::optional<llvm::FastMathFlags>
Flags;
6858 auto GetLinkOpcode = [&Link]() ->
unsigned {
6861 return Instruction::Add;
6863 return Instruction::FAdd;
6865 return Link.ReductionBinOp->
getOpcode();
6870 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6871 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6892static std::optional<ExtendedReductionOperand>
6895 "Op should be operand of UpdateR");
6903 if (
Op->hasOneUse() &&
6912 Type *RHSInputType =
Y->getScalarType();
6913 if (LHSInputType != RHSInputType ||
6914 LHSExt->getOpcode() != RHSExt->getOpcode())
6915 return std::nullopt;
6918 return ExtendedReductionOperand{
6920 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6924 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6927 VPValue *CastSource = CastRecipe->getOperand(0);
6928 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6938 return ExtendedReductionOperand{
6945 if (!
Op->hasOneUse())
6946 return std::nullopt;
6951 return std::nullopt;
6961 return std::nullopt;
6965 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6968 const APInt *RHSConst =
nullptr;
6974 return std::nullopt;
6978 if (Cast && OuterExtKind &&
6979 getPartialReductionExtendKind(Cast) != OuterExtKind)
6980 return std::nullopt;
6982 Type *RHSInputType = LHSInputType;
6983 ExtendKind RHSExtendKind = LHSExtendKind;
6986 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6989 return ExtendedReductionOperand{
6990 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6997static std::optional<SmallVector<VPPartialReductionChain>>
7004 return std::nullopt;
7014 VPValue *CurrentValue = ExitValue;
7015 while (CurrentValue != RedPhiR) {
7021 return std::nullopt;
7026 return std::nullopt;
7033 std::optional<ExtendedReductionOperand> ExtendedOp =
7034 matchExtendedReductionOperand(UpdateR,
Op);
7036 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
7038 return std::nullopt;
7046 return std::nullopt;
7048 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
7051 return std::nullopt;
7053 VPPartialReductionChain Link(
7054 {UpdateR, *ExtendedOp, RK,
7059 CurrentValue = PrevValue;
7064 std::reverse(Chain.
begin(), Chain.
end());
7083 if (
auto Chains = getScaledReductions(RedPhiR))
7084 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
7087 if (ChainsByPhi.
empty())
7095 for (
const auto &[
_, Chains] : ChainsByPhi)
7096 for (
const VPPartialReductionChain &Chain : Chains) {
7097 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
7099 PartialReductionBlends.
insert(Chain.Blend);
7100 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
7106 auto ExtendUsersValid = [&](
VPValue *Ext) {
7108 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
7112 auto IsProfitablePartialReductionChainForVF =
7119 for (
const VPPartialReductionChain &Link : Chain) {
7120 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
7121 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
7125 PartialCost += LinkCost;
7126 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
7128 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
7129 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
7132 RegularCost += Extend->computeCost(VF, CostCtx);
7134 return PartialCost.
isValid() && PartialCost < RegularCost;
7142 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
7143 for (
const VPPartialReductionChain &Chain : Chains) {
7144 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
7148 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
7150 return PhiR == RedPhiR;
7154 return Blend == Chain.Blend || PartialReductionBlends.
contains(Blend);
7156 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
7162 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
7171 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
7172 return RepR && RepR->getOpcode() == Instruction::Store;
7183 return IsProfitablePartialReductionChainForVF(Chains, VF);
7189 for (
auto &[Phi, Chains] : ChainsByPhi)
7190 for (
const VPPartialReductionChain &Chain : Chains)
7191 transformToPartialReduction(Chain, Plan, Phi);
7220 if (VPI && VPI->getUnderlyingValue() &&
7231 auto ProcessSubset = [&](
VPlan &,
auto ProcessVPInst) {
7234 if (!ProcessVPInst(VPI))
7243 New->insertBefore(VPI);
7244 if (VPI->
getOpcode() == Instruction::Load)
7259 "lowerMemoryIdioms", ProcessSubset, Plan, [&](
VPInstruction *VPI) {
7261 VPI, FinalRedStoresBuilder))
7270 return ReplaceWith(VPI, Histogram);
7283 "scalarizeMemOpsWithIrregularTypes", ProcessSubset, Plan,
7287 return Scalarize(VPI);
7294 "makeVPlanMemOpDecision", ProcessSubset, Plan, [&](
VPInstruction *VPI) {
7296 bool IsLoad = VPI->
getOpcode() == Instruction::Load;
7306 const SCEV *PtrSCEV =
7308 bool IsSingleScalarLoad =
7314 I, Ptr, IsSingleScalarLoad,
7322 "widenConsecutiveMemOps", ProcessSubset, Plan, [&](
VPInstruction *VPI) {
7327 bool IsLoad = VPI->
getOpcode() == Instruction::Load;
7340 VectorPtr->insertBefore(VPI);
7351 return ReplaceWith(VPI, WidenedR);
7358 return ReplaceWith(VPI, Recipe);
7360 return Scalarize(VPI);
7383 if (VPI->mayHaveSideEffects())
7387 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
7392 if (VPI->getOpcode() == Instruction::Add &&
7401 VPI->getOpcode(), VPI->operandsWithoutMask(),
nullptr, *VPI,
7402 *VPI, VPI->getDebugLoc(),
I);
7403 Recipe->insertBefore(VPI);
7404 VPI->replaceAllUsesWith(Recipe);
7405 VPI->eraseFromParent();
7415 switch (Param.ParamKind) {
7416 case VFParamKind::Vector:
7417 case VFParamKind::GlobalPredicate:
7419 case VFParamKind::OMP_Uniform:
7420 return SE->isSCEVable(Args[Param.ParamPos]->getScalarType()) &&
7421 SE->isLoopInvariant(
7422 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7424 case VFParamKind::OMP_Linear:
7425 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7426 m_scev_AffineAddRec(
7427 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
7428 m_SpecificLoop(L)));
7445 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
7446 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
7449 if (It == Mappings.end())
7456struct CallWideningDecision {
7457 enum class KindTy { Scalarize,
Intrinsic, VectorVariant };
7458 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
7481 return CallWideningDecision::KindTy::Scalarize;
7491 return CallWideningDecision::KindTy::Scalarize;
7495 false, VF, CostCtx);
7510 return CallWideningDecision::KindTy::Intrinsic;
7514 if (VecFunc && ScalarCost >= VecCallCost)
7515 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
7517 return CallWideningDecision::KindTy::Scalarize;
7527 if (!VPI || !VPI->getUnderlyingValue() ||
7528 VPI->getOpcode() != Instruction::Call)
7533 VPI->op_begin() + CI->arg_size());
7535 CallWideningDecision Decision =
7544 switch (Decision.Kind) {
7545 case CallWideningDecision::KindTy::Intrinsic: {
7549 *VPI, VPI->getDebugLoc());
7552 case CallWideningDecision::KindTy::VectorVariant: {
7556 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
7557 Ops.push_back(Mask);
7559 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
7561 *VPI, VPI->getDebugLoc());
7564 case CallWideningDecision::KindTy::Scalarize:
7570 VPI->replaceAllUsesWith(Replacement);
7571 VPI->eraseFromParent();
7594 if (!LoadR || LoadR->isConsecutive())
7613 Align Alignment = LoadR->getAlign();
7616 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
7621 Intrinsic::experimental_vp_strided_load, DataTy,
7622 LoadR->isMasked(), Alignment, Ctx);
7623 return StridedLoadStoreCost < CurrentCost;
7634 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
7639 I32VF = Builder.createScalarZExtOrTrunc(
7652 "Stride type from SCEV must match the index type");
7653 VPValue *CanIV = Builder.createScalarSExtOrTrunc(
7657 auto *
Offset = Builder.createOverflowingOp(
7658 Instruction::Mul, {CanIV, StrideInBytes},
7659 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
7660 auto *BasePtr = Builder.createNoWrapPtrAdd(
7666 VPValue *NewPtr = Builder.createVectorPointer(
7668 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
7670 VPValue *Mask = LoadR->getMask();
7673 auto *StridedLoad = Builder.createWidenMemIntrinsic(
7674 Intrinsic::experimental_vp_strided_load,
7675 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
7676 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This is the interface for a metadata-based scoped no-alias analysis.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(ArrayRef< VPReplicateRecipe * > ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L)
SinkStoreInfo(VPReplicateRecipe &GroupLeader)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
unsigned getBitWidth() const
Return the number of bits in the APInt.
int32_t exactLogBase2() const
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
Get the first element.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class represents a constant integer value.
ConstantInt * getValue() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
void clearPredecessors()
Remove all the predecessor of this block.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
static VPSingleDefRecipe * createSingleScalarOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPValue *Mask, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL, Instruction *UV)
Create a single-scalar recipe with Opcode and Operands without inserting it.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B) const
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
unsigned getOpcode() const
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
bool prefersVectorizedAddressing() const
Returns true if the target prefers vectorized addressing.
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPSingleDefRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a replicating or single-scalar recipe for VPI.
bool isPredicatedInst(Instruction *I) const
Returns true if I needs to be predicated (i.e.
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
operand_range operandsWithoutMask()
Return the recipe's operands, excluding the mask of a predicated recipe.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Lightweight SCEV-to-VPlan expander.
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this value has been materialized.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
VPIRValue * getPoison(Type *Ty)
Return a VPIRValue wrapping a poison value of type Ty.
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
canonical_widen_iv_match m_CanonicalWidenIV()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
auto m_AnyNeg(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPValue * findIncomingAliasMask(const VPlan &Plan)
Finds the incoming alias-mask within the vector preheader.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool isUsedByLoadStoreAddress(const VPValue *V)
Returns true if V is used as part of the address of another load or store.
GEPNoWrapFlags getGEPFlagsForPtr(VPValue *Ptr)
Returns the GEP nowrap flags for Ptr, looking through pointer casts mirroring Value::stripPointerCast...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
SmallVector< VPBasicBlock * > vp_rpo_plain_cfg_loop_body(VPBasicBlock *Header)
Returns the VPBasicBlocks forming the loop body of a plain (pre-region) VPlan in reverse post-order s...
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
constexpr auto bind_back(FnT &&Fn, BindArgsT &&...BindArgs)
C++23 bind_back.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr size_t range_size(R &&Range)
Returns the size of the Range, i.e., the number of elements.
void sort(IteratorTy Start, IteratorTy End)
bool hasIrregularType(Type *Ty, const DataLayout &DL)
A helper function that returns true if the given type is irregular.
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
T * find_singleton(R &&Range, Predicate P, bool AllowRepeats=false)
Return the single value in Range that satisfies P(<member of Range> *, AllowRepeats)->T * returning n...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
LLVM_ABI std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
If AR is an affine AddRec for Lp with a constant step, return the step in units of AccessTy's allocat...
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
VPBasicBlock * EarlyExitingVPBB
VPIRBasicBlock * EarlyExitVPBB
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...