58 if (!VPBB->getParent())
61 auto EndIter = Term ? Term->getIterator() : VPBB->end();
66 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
83 false , *VPI, Ingredient.getDebugLoc());
86 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
87 nullptr ,
false , *VPI,
88 Ingredient.getDebugLoc());
91 Ingredient.operands(), *VPI,
92 Ingredient.getDebugLoc(),
GEP);
104 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
109 if (VectorID == Intrinsic::assume ||
110 VectorID == Intrinsic::lifetime_end ||
111 VectorID == Intrinsic::lifetime_start ||
112 VectorID == Intrinsic::sideeffect ||
113 VectorID == Intrinsic::pseudoprobe) {
118 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
119 VectorID != Intrinsic::pseudoprobe;
123 Ingredient.getDebugLoc());
126 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
127 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
131 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
135 *VPI, Ingredient.getDebugLoc());
139 "inductions must be created earlier");
148 "Only recpies with zero or one defined values expected");
149 Ingredient.eraseFromParent();
160 const Loop *L =
nullptr;
165 if (
A->getOpcode() != Instruction::Store ||
166 B->getOpcode() != Instruction::Store)
179 const APInt *Distance;
185 Type *TyA =
A->getOperand(0)->getScalarType();
187 Type *TyB =
B->getOperand(0)->getScalarType();
193 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
195 auto VFs =
B->getParent()->getPlan()->vectorFactors();
199 return Distance->
abs().
uge(
207 : ExcludeRecipes(ExcludeRecipes.begin(), ExcludeRecipes.end()),
208 GroupLeader(GroupLeader), PSE(&PSE), L(&L) {}
217 return ExcludeRecipes.contains(Store) ||
218 (Store && isNoAliasViaDistance(Store, &GroupLeader));
231 std::optional<SinkStoreInfo> SinkInfo = {}) {
232 bool CheckReads = SinkInfo.has_value();
239 if (SinkInfo && SinkInfo->shouldSkip(R))
243 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
268template <
unsigned Opcode>
273 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
274 "Only Load and Store opcodes supported");
275 constexpr bool IsLoad = (Opcode == Instruction::Load);
278 RecipesByAddressAndType;
283 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
287 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
291 RecipesByAddressAndType[{AddrSCEV, LoadStoreTy}].push_back(RepR);
296 for (
auto &Group :
Groups) {
311 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
323 if (Candidate->getParent() == SinkTo ||
328 if (!ScalarVFOnly && RepR->isSingleScalar())
331 WorkList.
insert({SinkTo, Candidate});
343 for (
auto &Recipe : *VPBB)
345 InsertIfValidSinkCandidate(VPBB,
Op);
349 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
352 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
357 auto UsersOutsideSinkTo =
359 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
361 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
362 return !U->usesFirstLaneOnly(SinkCandidate);
365 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
367 if (NeedsDuplicating) {
371 if (
auto *SinkCandidateRepR =
376 SinkCandidateRepR->getOpcode(), SinkCandidate->
operands(),
377 nullptr, *SinkCandidateRepR, *SinkCandidateRepR,
381 Clone = SinkCandidate->
clone();
391 InsertIfValidSinkCandidate(SinkTo,
Op);
401 if (!EntryBB || EntryBB->size() != 1 ||
411 if (EntryBB->getNumSuccessors() != 2)
416 if (!Succ0 || !Succ1)
419 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
421 if (Succ0->getSingleSuccessor() == Succ1)
423 if (Succ1->getSingleSuccessor() == Succ0)
440 if (!Region1->isReplicator())
442 auto *MiddleBasicBlock =
444 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
449 if (!Region2 || !Region2->isReplicator())
454 if (!Mask1 || Mask1 != Mask2)
457 assert(Mask1 && Mask2 &&
"both region must have conditions");
463 if (TransformedRegions.
contains(Region1))
470 if (!Then1 || !Then2)
490 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
496 if (Phi1ToMove.getVPSingleValue()->user_empty()) {
497 Phi1ToMove.eraseFromParent();
500 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
514 TransformedRegions.
insert(Region1);
517 return !TransformedRegions.
empty();
525 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
526 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
527 auto *BlockInMask = PredRecipe->
getMask();
548 Region->setParent(ParentRegion);
554 RecipeWithoutMask->getDebugLoc());
555 Exiting->appendRecipe(PHIRecipe);
568 if (RepR->isPredicated())
587 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
599 if (!VPBB->getParent())
603 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
612 R.moveBefore(*PredVPBB, PredVPBB->
end());
614 auto *ParentRegion = VPBB->getParent();
615 if (ParentRegion && ParentRegion->getExiting() == VPBB)
616 ParentRegion->setExiting(PredVPBB);
620 return !WorkList.
empty();
627 bool ShouldSimplify =
true;
628 while (ShouldSimplify) {
644 if (!
IV ||
IV->getTruncInst())
659 for (
auto *U : FindMyCast->
users()) {
661 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
662 FoundUserCast = UserCast;
669 FindMyCast = FoundUserCast;
671 if (FindMyCast !=
IV)
686 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
695 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
701 if (ResultTy != StepTy) {
708 Builder.setInsertPoint(VecPreheader);
709 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
711 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
737 WideCanIV->getDebugLoc(), Builder));
738 WideCanIV->eraseFromParent();
755 WideCanIV->replaceAllUsesWith(WidenIV);
756 WideCanIV->eraseFromParent();
765 if (PHICost > BroadcastCost)
774 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
786 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
787 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
788 WideCanIV->replaceAllUsesWith(NewWideIV);
789 WideCanIV->eraseFromParent();
797 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
799 if (IsConditionalAssume)
802 if (R.mayHaveSideEffects())
806 return all_of(R.definedValues(), [](
VPValue *V) { return V->user_empty(); });
826 VPUser *PhiUser = PhiR->getSingleUser();
832 PhiR->replaceAllUsesWith(Start);
833 PhiR->eraseFromParent();
841 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
844 Users.insert_range(V->users());
846 return Users.takeVector();
860 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
897 Def->user_empty() || !Def->getUnderlyingValue() ||
898 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
911 Def->getUnderlyingInstr()->getOpcode(), Def->operands(),
913 Def->getUnderlyingInstr());
914 Clone->insertAfter(Def);
915 Def->replaceAllUsesWith(Clone);
926 PtrIV->replaceAllUsesWith(PtrAdd);
933 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
934 return U->usesScalars(WideIV);
940 Plan,
ID.getKind(),
ID.getInductionOpcode(),
942 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
943 WideIV->getDebugLoc(), Builder);
946 if (!HasOnlyVectorVFs) {
948 "plans containing a scalar VF cannot also include scalable VFs");
949 WideIV->replaceAllUsesWith(Steps);
952 WideIV->replaceUsesWithIf(Steps,
953 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
955 return U.usesFirstLaneOnly(WideIV);
956 return U.usesScalars(WideIV);
972 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
977 if (!Def || Def->getNumOperands() != 2)
985 auto IsWideIVInc = [&]() {
986 auto &
ID = WideIV->getInductionDescriptor();
989 VPValue *IVStep = WideIV->getStepValue();
990 switch (
ID.getInductionOpcode()) {
991 case Instruction::Add:
993 case Instruction::FAdd:
995 case Instruction::FSub:
998 case Instruction::Sub: {
1018 return IsWideIVInc() ? WideIV :
nullptr;
1035 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1046 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1047 FirstActiveLane =
B.createScalarZExtOrTrunc(
1048 FirstActiveLane, CanonicalIVType, FirstActiveLane->
getScalarType(),
DL);
1049 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1054 if (Incoming != WideIV) {
1056 EndValue =
B.createAdd(EndValue, One,
DL);
1061 VPIRValue *Start = WideIV->getStartValue();
1062 VPValue *Step = WideIV->getStepValue();
1063 EndValue =
B.createDerivedIV(
1065 Start, EndValue, Step);
1079 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1089 Start, VectorTC, Step);
1119 assert(EndValue &&
"Must have computed the end value up front");
1124 if (Incoming != WideIV)
1136 auto *Zero = Plan.
getZero(StepTy);
1137 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1142 return B.createNaryOp(
1143 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1145 : Instruction::FAdd,
1146 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1157 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1167 EndValues[WideIV] = EndValue;
1177 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1178 R.eraseFromParent();
1187 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1189 if (PredVPBB == MiddleVPBB)
1191 Plan, ExitIRI->getOperand(Idx), EndValues, PSE);
1194 Plan, ExitIRI->getOperand(Idx), PSE);
1196 ExitIRI->setOperand(Idx, Escape);
1213 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1217 ExpR->replaceAllUsesWith(V->second);
1221 ExpR->eraseFromParent();
1230 while (!WorkList.
empty()) {
1232 if (!Seen.
insert(Cur).second)
1240 R->eraseFromParent();
1247static std::optional<std::pair<bool, unsigned>>
1250 std::optional<std::pair<bool, unsigned>>>(R)
1253 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1255 return std::make_pair(
true,
I->getVectorIntrinsicID());
1257 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1263 I->getVPRecipeID());
1265 .
Default([](
auto *) {
return std::nullopt; });
1290 VPlan &Plan = *R.getParent()->getPlan();
1291 auto FoldToIRValue = [&]() ->
Value * {
1293 if (OpcodeOrIID->first) {
1295 return Folder.FoldIntrinsic(OpcodeOrIID->second,
Ops, R.getScalarType(),
1296 RFlags ? RFlags->getFastMathFlagsOrNone()
1299 unsigned Opcode = OpcodeOrIID->second;
1305 R.getVPSingleValue()->getScalarType());
1308 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1310 case Instruction::Select:
1311 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1312 case Instruction::ICmp:
1313 case Instruction::FCmp:
1316 case Instruction::GetElementPtr: {
1319 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1329 case Instruction::ExtractElement:
1336 if (
Value *V = FoldToIRValue())
1343 bool CanCreateNewRecipe) {
1344 VPlan *Plan = Def->getParent()->getPlan();
1354 Def->replaceAllUsesWith(
X);
1355 Def->eraseFromParent();
1367 Def->replaceAllUsesWith(
X);
1379 Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1385 Def->replaceAllUsesWith(
X);
1391 Def->replaceAllUsesWith(Plan->
getFalse());
1397 Def->replaceAllUsesWith(
X);
1402 if (CanCreateNewRecipe &&
1407 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1408 !Def->getOperand(1)->hasMoreThanOneUniqueUser())) {
1409 Def->replaceAllUsesWith(
1410 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1417 Def->replaceAllUsesWith(Def->getOperand(1));
1424 Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1430 Def->replaceAllUsesWith(Plan->
getFalse());
1435 Def->replaceAllUsesWith(
X);
1441 if (CanCreateNewRecipe &&
1443 Def->replaceAllUsesWith(Builder.createNot(
C));
1449 Def->setOperand(0,
C);
1450 Def->setOperand(1,
Y);
1451 Def->setOperand(2,
X);
1456 if (CanCreateNewRecipe &&
1460 Y->getScalarType()->isIntegerTy(1)) {
1461 Def->replaceAllUsesWith(
1462 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1471 VPlan *Plan = Def->getParent()->getPlan();
1477 return Def->replaceAllUsesWith(V);
1483 PredPHI->replaceAllUsesWith(
Op);
1490 RepR && RepR->isPredicated() && RepR->getOpcode() == Instruction::Store &&
1494 RepR->getUnderlyingInstr(), RepR->operandsWithoutMask(),
1495 RepR->isSingleScalar(),
nullptr, *RepR, *RepR,
1496 RepR->getDebugLoc());
1497 Unmasked->insertBefore(RepR);
1498 RepR->replaceAllUsesWith(Unmasked);
1499 RepR->eraseFromParent();
1513 bool CanCreateNewRecipe =
1518 Type *TruncTy = Def->getScalarType();
1519 Type *ATy =
A->getScalarType();
1520 if (TruncTy == ATy) {
1521 Def->replaceAllUsesWith(
A);
1530 : Instruction::ZExt;
1533 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1535 Ext->setUnderlyingValue(UnderlyingExt);
1537 Def->replaceAllUsesWith(Ext);
1539 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1540 Def->replaceAllUsesWith(Trunc);
1550 return Def->replaceAllUsesWith(
A);
1553 return Def->replaceAllUsesWith(
A);
1556 return Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1562 return Def->replaceAllUsesWith(Builder.createSub(
1563 Plan->
getZero(
A->getScalarType()),
A, Def->getDebugLoc(),
"", NW));
1566 if (CanCreateNewRecipe &&
1574 ->hasNoSignedWrap()};
1575 return Def->replaceAllUsesWith(
1576 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1582 return Def->replaceAllUsesWith(Builder.createNaryOp(
1584 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1589 return Def->replaceAllUsesWith(Builder.createNaryOp(
1591 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1596 return Def->replaceAllUsesWith(
A);
1611 R->setOperand(1,
Y);
1612 R->setOperand(2,
X);
1616 R->replaceAllUsesWith(Cmp);
1621 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1622 Cmp->setDebugLoc(Def->getDebugLoc());
1634 if (
Op->getNumUsers() > 1 ||
1638 }
else if (!UnpairedCmp) {
1639 UnpairedCmp =
Op->getDefiningRecipe();
1643 UnpairedCmp =
nullptr;
1650 if (NewOps.
size() < Def->getNumOperands()) {
1652 return Def->replaceAllUsesWith(NewAnyOf);
1659 if (CanCreateNewRecipe &&
1665 return Def->replaceAllUsesWith(NewCmp);
1671 Def->getOperand(1)->getScalarType() == Def->getScalarType())
1672 return Def->replaceAllUsesWith(Def->getOperand(1));
1676 Type *WideStepTy = Def->getScalarType();
1677 if (
X->getScalarType() != WideStepTy)
1678 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1679 Def->replaceAllUsesWith(
X);
1688 Def->getScalarType()->isIntegerTy(1)) {
1689 Def->setOperand(1, Def->getOperand(0));
1690 Def->setOperand(0,
Y);
1697 return Def->replaceAllUsesWith(Def->getOperand(0));
1703 Def->replaceAllUsesWith(
1704 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1709 return Def->replaceAllUsesWith(
X);
1712 return Def->replaceAllUsesWith(
A);
1715 return Def->replaceAllUsesWith(
A);
1721 Def->replaceAllUsesWith(
1722 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1729 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1734 Def->replaceAllUsesWith(
1744 "broadcast operand must be single-scalar");
1745 Def->setOperand(0,
C);
1750 return Def->replaceUsesWithIf(
1751 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1754 if (Def->getNumOperands() == 1) {
1755 Def->replaceAllUsesWith(Def->getOperand(0));
1760 Phi->replaceAllUsesWith(Phi->getOperand(0));
1766 if (Def->getNumOperands() == 1 &&
1768 return Def->replaceAllUsesWith(IRV);
1781 return Def->replaceAllUsesWith(
A);
1788 return Def->replaceAllUsesWith(WidenIV->getRegion()->getCanonicalIV());
1791 Def->replaceAllUsesWith(Builder.createNaryOp(
1792 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1806 auto *IVInc = Def->getOperand(0);
1807 if (IVInc->getNumUsers() == 2) {
1812 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1813 Def->replaceAllUsesWith(IVInc);
1815 Inc->replaceAllUsesWith(Phi);
1816 Phi->setOperand(0,
Y);
1832 Steps->replaceAllUsesWith(Steps->getOperand(0));
1840 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1842 return PhiR && PhiR->isInLoop();
1848 return Def->replaceAllUsesWith(
A);
1867 R.getVPSingleValue()->replaceAllUsesWith(
X);
1883 while (!Worklist.
empty()) {
1892 R->replaceAllUsesWith(
1893 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1897static std::optional<Instruction::BinaryOps>
1900 case Intrinsic::masked_udiv:
1901 return Instruction::UDiv;
1902 case Intrinsic::masked_sdiv:
1903 return Instruction::SDiv;
1904 case Intrinsic::masked_urem:
1905 return Instruction::URem;
1906 case Intrinsic::masked_srem:
1907 return Instruction::SRem;
1924 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1928 if (RepR && RepR->getOpcode() == Instruction::Store &&
1931 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1932 true ,
nullptr , *RepR ,
1933 *RepR , RepR->getDebugLoc());
1934 Clone->insertBefore(RepOrWidenR);
1936 VPValue *ExtractOp = Clone->getOperand(0);
1942 Clone->setOperand(0, ExtractOp);
1943 RepR->eraseFromParent();
1955 VPValue *SafeDivisor = Builder.createSelect(
1956 IntrR->getOperand(2), IntrR->getOperand(1),
1958 VPValue *Clone = Builder.createNaryOp(
1959 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1962 IntrR->eraseFromParent();
1971 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1980 return !U->usesScalars(
Op);
1984 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1987 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1988 IntroducesBCastOf(Op)))
1992 auto *IRV = dyn_cast<VPIRValue>(Op);
1993 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1994 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1995 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
2002 RepOrWidenR->getUnderlyingInstr());
2003 Clone->insertBefore(RepOrWidenR);
2004 RepOrWidenR->replaceAllUsesWith(Clone);
2006 RepOrWidenR->eraseFromParent();
2042 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
2043 UniqueValues.
insert(Blend->getIncomingValue(0));
2044 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
2046 UniqueValues.
insert(Blend->getIncomingValue(
I));
2048 if (UniqueValues.
size() == 1) {
2049 Blend->replaceAllUsesWith(*UniqueValues.
begin());
2050 Blend->eraseFromParent();
2054 if (Blend->isNormalized())
2060 unsigned StartIndex = 0;
2061 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2073 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2075 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2076 if (
I == StartIndex)
2078 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2079 OperandsWithMask.
push_back(Blend->getMask(
I));
2084 OperandsWithMask, *Blend, Blend->getDebugLoc());
2085 NewBlend->insertBefore(&R);
2087 VPValue *DeadMask = Blend->getMask(StartIndex);
2089 Blend->eraseFromParent();
2094 if (NewBlend->getNumOperands() == 3 &&
2096 VPValue *Inc0 = NewBlend->getOperand(0);
2097 VPValue *Inc1 = NewBlend->getOperand(1);
2098 VPValue *OldMask = NewBlend->getOperand(2);
2099 NewBlend->setOperand(0, Inc1);
2100 NewBlend->setOperand(1, Inc0);
2101 NewBlend->setOperand(2, NewMask);
2128 APInt MaxVal = AlignedTC - 1;
2131 unsigned NewBitWidth =
2137 bool MadeChange =
false;
2162 "canonical IV is not expected to have a truncation");
2167 NewWideIV->insertBefore(WideIV);
2174 Cmp->replaceAllUsesWith(
2175 VPBuilder(Cmp).createICmp(Cmp->getPredicate(), NewWideIV, NewBTC));
2189 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2191 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2205 const SCEV *VectorTripCount =
2210 "Trip count SCEV must be computable");
2231 auto *Term = &ExitingVPBB->
back();
2244 for (
unsigned Part = 0; Part < UF; ++Part) {
2250 Extracts[Part] = Ext;
2262 match(Phi->getBackedgeValue(),
2264 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2281 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2288 "Expected incoming values of Phi to be ActiveLaneMasks");
2293 EntryALM->setOperand(2, ALMMultiplier);
2294 LoopALM->setOperand(2, ALMMultiplier);
2298 ExtractFromALM(EntryALM, EntryExtracts);
2303 ExtractFromALM(LoopALM, LoopExtracts);
2305 Not->setOperand(0, LoopExtracts[0]);
2308 for (
unsigned Part = 0; Part < UF; ++Part) {
2309 Phis[Part]->setStartValue(EntryExtracts[Part]);
2310 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2323 auto *Term = &ExitingVPBB->
back();
2335 const SCEV *VectorTripCount =
2341 "Trip count SCEV must be computable");
2360 Term->setOperand(1, Plan.
getTrue());
2365 {}, Term->getDebugLoc());
2367 Term->eraseFromParent();
2400 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2410 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2411 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2429 RecurKind RK = PhiR->getRecurrenceKind();
2436 RecWithFlags->dropPoisonGeneratingFlags();
2442struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2451 return GEP->getSourceElementType();
2454 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2455 [](
auto *
I) {
return I->getSourceElementType(); })
2456 .
Default([](
auto *) {
return nullptr; });
2460 static bool canHandle(
const VPSingleDefRecipe *Def) {
2469 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2470 C->second == Instruction::ExtractValue)))
2476 return !
Def->mayReadFromMemory();
2480 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2483 getGEPSourceElementType(Def),
Def->getScalarType(),
2486 if (RFlags->hasPredicate())
2489 return hash_combine(Result, SIVSteps->getInductionOpcode());
2494 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2495 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2497 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2499 !
equal(
L->operands(),
R->operands()))
2502 "must have valid opcode info for both recipes");
2504 if (LFlags->hasPredicate() &&
2505 LFlags->getPredicate() !=
2509 if (LSIV->getInductionOpcode() !=
2515 const VPRegionBlock *RegionL =
L->getRegion();
2516 const VPRegionBlock *RegionR =
R->getRegion();
2519 L->getParent() !=
R->getParent())
2521 return L->getScalarType() ==
R->getScalarType();
2537 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2541 if (!VPDT.
dominates(V->getParent(), VPBB))
2546 Def->replaceAllUsesWith(V);
2559 bool Sinking =
false) {
2588 "Expected vector prehader's successor to be the vector loop region");
2596 return !Op->isDefinedOutsideLoopRegions();
2599 R.moveBefore(*Preheader, Preheader->
end());
2619 assert(!RepR->isPredicated() &&
2620 "Expected prior transformation of predicated replicates to "
2621 "replicate regions");
2626 if (!RepR->isSingleScalar())
2630 if (RepR->getOpcode() == Instruction::Store &&
2631 !RepR->getOperand(1)->isDefinedOutsideLoopRegions())
2636 assert((!R.mayWriteToMemory() ||
2637 (RepR && RepR->getOpcode() == Instruction::Store &&
2638 RepR->getOperand(1)->isDefinedOutsideLoopRegions())) &&
2639 "The only recipes that may write to memory are expected to be "
2640 "stores with invariant pointer-operand");
2650 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2651 auto *UserR = cast<VPRecipeBase>(U);
2652 VPBasicBlock *Parent = UserR->getParent();
2654 if (SinkBB && SinkBB != Parent)
2659 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2660 Parent->getSinglePredecessor() != LoopRegion;
2670 "Defining block must dominate sink block");
2695 VPValue *ResultVPV = R.getVPSingleValue();
2697 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2698 if (!NewResSizeInBits)
2711 (void)OldResSizeInBits;
2719 VPW->dropPoisonGeneratingFlags();
2721 assert((OldResSizeInBits != NewResSizeInBits ||
2723 "Only ICmps should not need extending the result.");
2729 if (OldResSizeInBits != NewResSizeInBits) {
2731 Instruction::ZExt, ResultVPV, OldResTy);
2733 Ext->setOperand(0, ResultVPV);
2743 unsigned OpSizeInBits =
Op->getScalarType()->getScalarSizeInBits();
2744 if (OpSizeInBits == NewResSizeInBits)
2746 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2747 auto [ProcessedIter, Inserted] = ProcessedTruncs.
try_emplace(
Op);
2753 Builder.setInsertPoint(&R);
2754 ProcessedIter->second =
2755 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2757 Op = ProcessedIter->second;
2761 NWR->insertBefore(&R);
2765 VPValue *Replacement = NWR->getVPSingleValue();
2766 if (OldResSizeInBits != NewResSizeInBits)
2772 R.eraseFromParent();
2778 std::optional<VPDominatorTree> VPDT;
2786 bool SimplifiedPhi =
false;
2796 assert(VPBB->getNumSuccessors() == 2 &&
2797 "Two successors expected for BranchOnCond");
2798 unsigned RemovedIdx;
2809 "There must be a single edge between VPBB and its successor");
2812 auto Phis = RemovedSucc->
phis();
2815 SimplifiedPhi |= !std::empty(Phis);
2819 VPBB->back().eraseFromParent();
2831 if (Reachable.contains(
B))
2842 for (
VPValue *Def : R.definedValues())
2843 Def->replaceAllUsesWith(&Tmp);
2844 R.eraseFromParent();
2848 return SimplifiedPhi;
2903 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2914 auto *EntryIncrement = Builder.createOverflowingOp(
2916 DL,
"index.part.next");
2922 {EntryIncrement, TC, ALMMultiplier},
DL,
2923 "active.lane.mask.entry");
2930 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2935 Builder.setInsertPoint(OriginalTerminator);
2936 auto *InLoopIncrement = Builder.createOverflowingOp(
2938 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2940 {InLoopIncrement, TC, ALMMultiplier},
DL,
2941 "active.lane.mask.next");
2942 LaneMaskPhi->addBackedgeValue(ALM);
2946 auto *NotMask = Builder.createNot(ALM,
DL);
2953 bool UseActiveLaneMaskForControlFlow) {
2955 auto *WideCanonicalIV =
2957 assert(WideCanonicalIV &&
2958 "Must have widened canonical IV when tail folding!");
2961 if (UseActiveLaneMaskForControlFlow) {
2970 nullptr,
"active.lane.mask");
2986 template <
typename OpTy>
bool match(OpTy *V)
const {
2997template <
typename Op0_t,
typename Op1_t>
3005 case Intrinsic::masked_udiv:
3006 return Intrinsic::vp_udiv;
3007 case Intrinsic::masked_sdiv:
3008 return Intrinsic::vp_sdiv;
3009 case Intrinsic::masked_urem:
3010 return Intrinsic::vp_urem;
3011 case Intrinsic::masked_srem:
3012 return Intrinsic::vp_srem;
3014 return std::nullopt;
3029 VPValue *Addr, *Mask, *EndPtr;
3032 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3034 EVLEndPtr->insertBefore(&CurRecipe);
3039 EVLEndPtr->setOperand(1, EVLAsVF);
3043 auto GetVPReverse = [&CurRecipe, &EVL, Plan,
3048 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
3049 V->getScalarType(), {}, {},
DL);
3050 Reverse->insertBefore(&CurRecipe);
3054 if (
match(&CurRecipe,
3059 if (
match(&CurRecipe,
3063 Mask = GetVPReverse(Mask);
3064 Addr = AdjustEndPtr(EndPtr);
3067 LoadR->insertBefore(&CurRecipe);
3071 LoadR->getScalarType(), {}, {},
DL);
3082 NewLoad->setOperand(2, Mask);
3083 NewLoad->setOperand(3, &EVL);
3091 StoredVal, EVL, Mask);
3093 if (
match(&CurRecipe,
3097 Mask = GetVPReverse(Mask);
3098 Addr = AdjustEndPtr(EndPtr);
3101 Intrinsic::vector_splice_right, {StoredVal,
Poison, &EVL},
3105 SpliceR, EVL, Mask);
3109 if (Rdx->isConditional() &&
3114 if (Interleave->getMask() &&
3122 Intrinsic::vp_merge, {Mask ? Mask : Plan->
getTrue(),
LHS,
RHS, &EVL},
3123 LHS->getScalarType(), {}, {},
DL);
3136 if (
match(&CurRecipe,
3141 LHS->getScalarType(), {}, {},
DL);
3147 {IntrR->getOperand(0),
3148 IntrR->getOperand(1),
3149 Mask ? Mask : Plan->
getTrue(), &EVL},
3150 IntrR->getScalarType(), {}, {},
DL);
3159 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3164 HeaderMask = R.getVPSingleValue();
3175 NewR->insertBefore(R);
3176 for (
auto [Old, New] :
3177 zip_equal(R->definedValues(), NewR->definedValues()))
3178 Old->replaceAllUsesWith(New);
3191 Mask->getScalarType(), {}, {}, LogicalAnd->getDebugLoc());
3192 Merge->insertBefore(LogicalAnd);
3193 LogicalAnd->replaceAllUsesWith(
Merge);
3209 R->getVPSingleValue()->replaceAllUsesWith(
X);
3223 Intrinsic::experimental_vp_reverse, {
X, Plan.
getTrue(), EVL},
3224 X->getScalarType(), {}, {}, R->getDebugLoc());
3225 VPReverse->insertBefore(R);
3226 R->getVPSingleValue()->replaceAllUsesWith(VPReverse);
3232 R->eraseFromParent();
3253 auto IsAllowedUser =
3254 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3255 VPWidenIntOrFpInductionRecipe,
3256 VPWidenMemIntrinsicRecipe>;
3257 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3258 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3260 return IsAllowedUser(U);
3262 "User of VF that we can't transform to EVL.");
3272 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3273 "increment of the canonical induction.");
3289 MaxEVL = Builder.createScalarZExtOrTrunc(
3293 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3294 VPValue *PrevEVL = Builder.createScalarPhi(
3308 Intrinsic::experimental_vp_splice,
3309 {
V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3310 R.getVPSingleValue()->getScalarType(), {}, {}, R.getDebugLoc());
3312 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3325 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3326 m_VPValue(), m_VPValue()))))
3327 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3328 Plan.getVectorLoopRegion();
3340 VPValue *EVLMask = Builder.createICmp(
3400 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3412 auto *CurrentIteration =
3414 CurrentIteration->insertBefore(*Header, Header->begin());
3415 VPBuilder Builder(Header, Header->getFirstNonPhi());
3418 VPPhi *AVLPhi = Builder.createScalarPhi(
3422 if (MaxSafeElements) {
3432 Builder.setInsertPoint(CanonicalIVIncrement);
3436 OpVPEVL = Builder.createScalarZExtOrTrunc(
3437 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3439 auto *NextIter = Builder.createAdd(
3440 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3441 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3442 CurrentIteration->addBackedgeValue(NextIter);
3446 "avl.next", {
true,
false});
3454 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3455 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3469 assert(!CurrentIteration &&
3470 "Found multiple CurrentIteration. Only one expected");
3471 CurrentIteration = PhiR;
3475 if (!CurrentIteration)
3486 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3495 CanIVInc->eraseFromParent();
3504 if (Header->empty())
3513 if (!
match(EVLPhi->getBackedgeValue(),
3526 [[maybe_unused]]
bool FoundAVLNext =
3529 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3537 [[maybe_unused]]
bool FoundIncrement =
match(
3544 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3549 LatchBr->setOperand(
3561 "expected to run before loop regions are created");
3563 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3566 return VPDT.
dominates(Preheader, Parent);
3569 for (
const SCEV *Stride : StridesMap.
values()) {
3572 const APInt *StrideConst;
3595 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3602 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3605 if (NewSCEV != ScevExpr) {
3607 ExpSCEV->replaceAllUsesWith(NewExp);
3618 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3623 while (!Worklist.
empty()) {
3626 if (!Visited.
insert(CurRec).second)
3648 RecWithFlags->isDisjoint()) {
3651 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3652 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3653 RecWithFlags->replaceAllUsesWith(New);
3654 RecWithFlags->eraseFromParent();
3657 RecWithFlags->dropPoisonGeneratingFlags();
3662 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3663 "found instruction with poison generating flags not covered by "
3664 "VPRecipeWithIRFlags");
3669 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3677 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3689 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3690 if (AddrDef && WidenRec->isConsecutive() &&
3691 IsNotHeaderMask(WidenRec->getMask()))
3692 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3694 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3695 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3696 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3706 const bool &EpilogueAllowed) {
3707 if (InterleaveGroups.empty())
3718 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3725 for (
const auto *IG : InterleaveGroups) {
3730 return !IRMemberToRecipe.contains(Member);
3734 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3738 StoredValues.
push_back(StoreR->getStoredValue());
3739 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3745 StoredValues.
push_back(StoreR->getStoredValue());
3749 bool NeedsMaskForGaps =
3750 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3751 (!StoredValues.
empty() && !IG->isFull());
3754 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3763 VPValue *Addr = Start->getAddr();
3772 assert(IG->getIndex(IRInsertPos) != 0 &&
3773 "index of insert position shouldn't be zero");
3777 IG->getIndex(IRInsertPos),
3781 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3787 if (IG->isReverse()) {
3790 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3791 ReversePtr->insertBefore(InsertPosR);
3795 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3797 VPIG->insertBefore(InsertPosR);
3800 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3803 if (!Member->getType()->isVoidTy()) {
3861 AddOp = Instruction::Add;
3862 MulOp = Instruction::Mul;
3864 AddOp =
ID.getInductionOpcode();
3865 MulOp = Instruction::FMul;
3873 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3874 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3883 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3888 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3889 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3907 if (R->getParent()->getEnclosingLoopRegion())
3908 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3913 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3916 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3918 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3925 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3928 WidePHI->addIncoming(
Next);
3955 VPlan *Plan = R->getParent()->getPlan();
3956 VPValue *Start = R->getStartValue();
3957 VPValue *Step = R->getStepValue();
3958 VPValue *VF = R->getVFValue();
3960 assert(R->getInductionDescriptor().getKind() ==
3962 "Not a pointer induction according to InductionDescriptor!");
3963 assert(R->getScalarType()->isPointerTy() &&
"Unexpected type.");
3965 "Recipe should have been replaced");
3971 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3975 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3978 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3980 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3981 R->replaceAllUsesWith(PtrAdd);
3986 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3987 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3990 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3998 VPValue *Step = R->getStepValue();
3999 VPValue *Index = R->getIndex();
4003 ? Builder.createScalarSExtOrTrunc(
4005 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
4007 switch (R->getInductionKind()) {
4009 assert(Index->getScalarType() == Start->getScalarType() &&
4010 "Index type does not match StartValue type");
4011 return R->replaceAllUsesWith(Builder.createAdd(
4012 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
4015 return R->replaceAllUsesWith(Builder.createPtrAdd(
4016 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
4021 (FPBinOp->
getOpcode() == Instruction::FAdd ||
4022 FPBinOp->
getOpcode() == Instruction::FSub) &&
4023 "Original BinOp should be defined for FP induction");
4025 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
4026 return R->replaceAllUsesWith(
4027 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
4040 if (!R->isReplicator())
4044 R->dissolveToCFGLoop();
4065 assert(Br->getNumOperands() == 2 &&
4066 "BranchOnTwoConds must have exactly 2 conditions");
4070 assert(Successors.size() == 3 &&
4071 "BranchOnTwoConds must have exactly 3 successors");
4076 VPValue *Cond0 = Br->getOperand(0);
4077 VPValue *Cond1 = Br->getOperand(1);
4084 if (Succ0 == Succ1) {
4086 VPValue *Combined = Builder.createOr(Cond0, Cond1,
DL);
4090 Br->eraseFromParent();
4095 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4108 Br->eraseFromParent();
4119 WidenIVR->eraseFromParent();
4129 WidenIVR->replaceAllUsesWith(PtrAdd);
4130 WidenIVR->eraseFromParent();
4134 WidenIVR->eraseFromParent();
4140 DerivedIVR->eraseFromParent();
4145 VPValue *CanIV = WideCanIV->getCanonicalIV();
4147 VPValue *Step = WideCanIV->getStepValue();
4150 "Expected unroller to have materialized step for UF != 1");
4155 Step = Builder.createAdd(
4158 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4159 WideCanIV->getNoWrapFlags());
4161 WideCanIV->eraseFromParent();
4168 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4169 Select = Builder.createSelect(Blend->getMask(
I),
4170 Blend->getIncomingValue(
I),
Select,
4171 R.getDebugLoc(),
"predphi", *Blend);
4172 Blend->replaceAllUsesWith(
Select);
4173 Blend->eraseFromParent();
4178 if (!VEPR->getOffset()) {
4180 "Expected unroller to have materialized offset for UF != 1");
4181 VEPR->materializeOffset();
4188 Expr->eraseFromParent();
4198 for (
VPValue *
Op : LastActiveL->operands()) {
4199 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4204 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4205 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4211 Builder.createSub(FirstInactiveLane, One,
4212 LastActiveL->getDebugLoc(),
"last.active.lane");
4215 LastActiveL->eraseFromParent();
4222 assert(VPI->isMasked() &&
4223 "Unmasked MaskedCond should be simplified earlier");
4224 VPI->replaceAllUsesWith(Builder.createNaryOp(
4226 VPI->eraseFromParent();
4236 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4237 VPI->getDebugLoc());
4238 VPI->replaceAllUsesWith(
Add);
4239 VPI->eraseFromParent();
4247 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4250 BranchOnCountInst->eraseFromParent();
4265 ? Instruction::UIToFP
4266 : Instruction::Trunc;
4267 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4273 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4279 MulOpc = Instruction::FMul;
4280 Flags = VPI->getFastMathFlagsOrNone();
4282 MulOpc = Instruction::Mul;
4287 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4289 VPI->replaceAllUsesWith(VectorStep);
4290 VPI->eraseFromParent();
4300static std::optional<VPValue *>
4353 VPValue *UncountableCondition =
nullptr;
4357 return std::nullopt;
4360 Worklist.
push_back(UncountableCondition);
4361 while (!Worklist.
empty()) {
4365 if (V->isDefinedOutsideLoopRegions())
4371 if (V->getNumUsers() > 1)
4372 return std::nullopt;
4384 return std::nullopt;
4388 return std::nullopt;
4396 return std::nullopt;
4404 return std::nullopt;
4406 return UncountableCondition;
4462 for (
auto &Exit : Exits) {
4463 if (Exit.EarlyExitingVPBB == LatchVPBB)
4467 cast<VPIRPhi>(&R)->removeIncomingValueFor(Exit.EarlyExitingVPBB);
4468 Exit.EarlyExitingVPBB->getTerminator()->eraseFromParent();
4479 std::optional<VPValue *>
Cond =
4495 assert(Load &&
"Couldn't find exactly one load");
4498 "Uncountable exit condition load is conditional.");
4512 DL.getTypeStoreSize(Load->getScalarType()).getFixedValue());
4536 while (InsertIt != HeaderVPBB->
end() &&
4538 erase(ConditionRecipes, &*InsertIt);
4541 for (
auto *Recipe :
reverse(ConditionRecipes))
4542 Recipe->moveBefore(*HeaderVPBB, InsertIt);
4546 VPBuilder MaskBuilder(HeaderVPBB, InsertIt);
4548 Type *IVScalarTy =
IV->getScalarType();
4555 {Zero, FirstActive, ALMMultiplier},
4556 DebugLoc(),
"uncountable.exit.mask");
4561 if (R.mayReadOrWriteMemory() && &R != Load) {
4563 if (!VPDT.
dominates(R.getParent(), LatchVPBB))
4573 "Expected BranchOnCond terminator for MiddleVPBB");
4584 auto Phis = ScalarPH->
phis();
4594 "Continuing from different IV");
4610 if (Pred == MiddleVPBB)
4615 VPValue *CondOfEarlyExitingVPBB;
4616 [[maybe_unused]]
bool Matched =
4617 match(EarlyExitingVPBB->getTerminator(),
4619 assert(Matched &&
"Terminator must be BranchOnCond");
4623 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4624 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4626 TrueSucc == ExitBlock
4627 ? CondOfEarlyExitingVPBB
4628 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4634 "exit condition must dominate the latch");
4643 assert(!Exits.
empty() &&
"must have at least one early exit");
4650 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4653 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4659 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4660 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4662 Exits[
I].EarlyExitingVPBB) &&
4663 "RPO sort must place dominating exits before dominated ones");
4669 VPValue *Combined = Exits[0].CondToExit;
4682 "Unexpected terminator");
4683 VPValue *IsLatchExitTaken = LatchExitingBranch->getOperand(0);
4684 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4685 LatchExitingBranch->eraseFromParent();
4688 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4694 LatchVPBB->
setSuccessors({MiddleVPBB, MiddleVPBB, HeaderVPBB});
4698 Plan, Exits, HeaderVPBB, LatchVPBB, MiddleVPBB, TheLoop, PSE, DT, AC);
4703 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4707 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4715 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4718 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4750 for (
auto [Exit, VectorEarlyExitVPBB] :
4751 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4752 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4764 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4765 VPValue *NewIncoming = IncomingVal;
4767 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4772 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4773 ExitIRI->addIncoming(NewIncoming);
4776 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4810 bool IsLastDispatch = (
I + 2 == Exits.
size());
4812 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4818 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4821 CurrentBB = FalseBB;
4836 VPValue *VecOp = Red->getVecOp();
4838 assert(!Red->isPartialReduction() &&
4839 "This path does not support partial reductions");
4842 auto IsExtendedRedValidAndClampRange =
4855 "getExtendedReductionCost only supports integer types");
4856 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4857 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4858 Red->getFastMathFlagsOrNone(),
CostKind);
4859 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4867 IsExtendedRedValidAndClampRange(
4888 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4889 Opcode != Instruction::FAdd)
4892 assert(!Red->isPartialReduction() &&
4893 "This path does not support partial reductions");
4897 auto IsMulAccValidAndClampRange =
4909 (Ext0->getOpcode() != Ext1->getOpcode() ||
4910 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4914 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4916 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4923 ExtCost += Ext0->computeCost(VF, Ctx);
4925 ExtCost += Ext1->computeCost(VF, Ctx);
4927 ExtCost += OuterExt->computeCost(VF, Ctx);
4929 return MulAccCost.
isValid() &&
4930 MulAccCost < ExtCost + MulCost + RedCost;
4935 VPValue *VecOp = Red->getVecOp();
4973 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4975 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4976 Mul->setOperand(1, ExtB);
4986 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4991 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4998 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
5015 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
5024 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
5025 Ext0->getOpcode() == Ext1->getOpcode() &&
5026 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
5028 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
5029 *Ext0, *Ext0, Ext0->getDebugLoc());
5030 NewExt0->insertBefore(Ext0);
5035 Ext->getScalarType(),
nullptr, *Ext1,
5036 *Ext1, Ext1->getDebugLoc());
5039 auto *NewMul =
Mul->cloneWithOperands({NewExt0, NewExt1});
5040 NewMul->insertBefore(
Mul);
5041 Ext->replaceAllUsesWith(NewMul);
5042 Ext->eraseFromParent();
5043 Mul->eraseFromParent();
5057 assert(!Red->isPartialReduction() &&
5058 "This path does not support partial reductions");
5061 auto IP = std::next(Red->getIterator());
5062 auto *VPBB = Red->getParent();
5072 Red->replaceAllUsesWith(AbstractR);
5102 for (
VPValue *VPV : VPValues) {
5111 if (
User->usesScalars(VPV))
5114 HoistPoint = HoistBlock->
begin();
5118 "All users must be in the vector preheader or dominated by it");
5123 VPV->replaceUsesWithIf(Broadcast,
5124 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
5125 return Broadcast != &U && !U.usesScalars(VPV);
5136 return CommonMetadata;
5139template <
unsigned Opcode>
5144 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
5145 "Only Load and Store opcodes supported");
5146 [[maybe_unused]]
constexpr bool IsLoad = (Opcode == Instruction::Load);
5153 for (
auto Recipes :
Groups) {
5154 if (Recipes.size() < 2)
5159 "Expected all recipes in group to have the same load-store type");
5166 VPValue *MaskI = RecipeI->getMask();
5172 bool HasComplementaryMask =
false;
5177 VPValue *MaskJ = RecipeJ->getMask();
5186 if (HasComplementaryMask) {
5187 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
5197template <
typename InstType>
5215 for (
auto &Group :
Groups) {
5235 return R->isSingleScalar() == IsSingleScalar;
5237 "all members in group must agree on IsSingleScalar");
5242 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
5243 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
5245 UnpredicatedLoad->insertBefore(EarliestLoad);
5249 Load->replaceAllUsesWith(UnpredicatedLoad);
5250 Load->eraseFromParent();
5259 if (!StoreLoc || !StoreLoc->AATags.Scope)
5266 SinkStoreInfo SinkInfo(StoresToSink, *StoresToSink[0], PSE, L);
5278 for (
auto &Group :
Groups) {
5291 VPValue *SelectedValue = Group[0]->getOperand(0);
5294 bool IsSingleScalar = Group[0]->isSingleScalar();
5295 for (
unsigned I = 1;
I < Group.size(); ++
I) {
5296 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
5297 "all members in group must agree on IsSingleScalar");
5298 VPValue *Mask = Group[
I]->getMask();
5300 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
5309 StoreWithMinAlign->getUnderlyingInstr(),
5310 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
5311 nullptr, *LastStore, CommonMetadata);
5312 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
5316 Store->eraseFromParent();
5323 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
5324 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5387 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5389 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5396 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5406 DefR->replaceUsesWithIf(
5407 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5409 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5423 for (
VPValue *Def : R.definedValues()) {
5433 unsigned NumFirstLaneUsers =
count_if(Def->users(), [&Def](
VPUser *U) {
5434 return U->usesFirstLaneOnly(Def);
5436 if (!NumFirstLaneUsers || NumFirstLaneUsers == Def->getNumUsers())
5443 Unpack->insertAfter(&R);
5444 Def->replaceUsesWithIf(Unpack, [&Def](
VPUser &U,
unsigned) {
5445 return U.usesFirstLaneOnly(Def);
5454 bool RequiresScalarEpilogue,
VPValue *Step,
5455 std::optional<uint64_t> MaxRuntimeStep) {
5467 "Step VPBB must dominate VectorPHVPBB");
5469 InsertPt = std::next(StepR->getIterator());
5471 VPBuilder Builder(VectorPHVPBB, InsertPt);
5477 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5478 TCVal->
urem(*MaxRuntimeStep) == 0) {
5489 if (TailByMasking) {
5490 TC = Builder.createAdd(
5501 Builder.createNaryOp(Instruction::URem, {TC, Step},
5510 if (RequiresScalarEpilogue) {
5512 "requiring scalar epilogue is not supported with fail folding");
5515 R = Builder.createSelect(IsZero, Step, R);
5529 "VF and VFxUF must be materialized together");
5541 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5548 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5552 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5556 VPValue *MulByUF = Builder.createOverflowingOp(
5569 auto *AliasMask = Builder.createNaryOp(
5573 if (HeaderMaskDef->isPhi())
5574 Builder =
VPBuilder(&*HeaderMaskDef->getParent()->getFirstNonPhi());
5579 auto *ClampedHeaderMask = Builder.createAnd(HeaderMask, AliasMask);
5581 return &U != ClampedHeaderMask;
5592 assert(IncomingAliasMask &&
"Expected an alias mask!");
5602 if (
Check.NeedsFreeze) {
5612 Intrinsic::loop_dependence_war_mask,
5616 AliasMask = Builder.createAnd(AliasMask, WARMask);
5618 AliasMask = WARMask;
5623 VPValue *NumActive = Builder.createNaryOp(
5626 VPValue *ClampedVF = Builder.createScalarZExtOrTrunc(
5652 VPValue *DistanceToMax = Builder.createSub(MaxUIntTripCount, TripCount);
5660 VPValue *TripCountCheck = Builder.createICmp(
5663 VPValue *
Cond = Builder.createOr(IsScalar, TripCountCheck,
DL);
5674 "Clamped VF not supported with interleaving");
5682 VPBuilder Builder(Entry, Entry->begin());
5694 if (!ExpSCEV || ExpSCEV->user_empty())
5696 Builder.setInsertPoint(ExpSCEV);
5705 ExpSCEV->eraseFromParent();
5714 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5721 const SCEV *Expr = ExpSCEV->getSCEV();
5724 ExpandedSCEVs[Expr] = Res;
5729 ExpSCEV->eraseFromParent();
5732 "all VPExpandSCEVRecipes must have been expanded");
5735 auto EI = Entry->begin();
5745 return ExpandedSCEVs;
5759 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5764 if (Member0Op == OpV)
5774 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5777 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5792 if (R->getScalarType() != WideMember0->getScalarType())
5794 if (R->hasPredicate() && R->getPredicate() != WideMember0->getPredicate())
5798 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5801 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5806 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5807 const auto &[
OpIdx, OpV] =
P;
5819static std::optional<ElementCount>
5823 if (!InterleaveR || InterleaveR->
getMask())
5824 return std::nullopt;
5826 Type *GroupElementTy =
nullptr;
5830 return Op->getScalarType() == GroupElementTy;
5832 return std::nullopt;
5836 return Op->getScalarType() == GroupElementTy;
5838 return std::nullopt;
5842 if (IG->getFactor() != IG->getNumMembers())
5843 return std::nullopt;
5849 assert(
Size.isScalable() == VF.isScalable() &&
5850 "if Size is scalable, VF must be scalable and vice versa");
5851 return Size.getKnownMinValue();
5855 unsigned MinVal = VF.getKnownMinValue();
5857 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5860 return std::nullopt;
5868 return RepR && RepR->isSingleScalar();
5882 if (V->isDefinedOutsideLoopRegions()) {
5885 return M->isDefinedOutsideLoopRegions() &&
5886 M->getScalarType() == V->getScalarType();
5888 "expected distinct loop-invariant values of matching scalar type");
5903 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx) {
5905 for (
VPValue *Member : Members)
5906 OpsI.
push_back(Member->getDefiningRecipe()->getOperand(Idx));
5907 WideMember0->setOperand(
5916 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5918 LoadGroup->getMask(),
true,
5919 *LoadGroup, LoadGroup->getDebugLoc());
5920 L->insertBefore(LoadGroup);
5926 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5927 "must be a single scalar load");
5928 NarrowedOps.
insert(RepR);
5933 VPValue *PtrOp = WideLoad->getAddr();
5935 PtrOp = VecPtr->getOperand(0);
5940 nullptr, {}, *WideLoad);
5941 N->insertBefore(WideLoad);
5946std::unique_ptr<VPlan>
5966 "unexpected branch-on-count");
5969 std::optional<ElementCount> VFToOptimize;
5983 if (R.mayWriteToMemory() && !InterleaveR)
5989 return any_of(V->users(), [&](VPUser *U) {
5990 auto *UR = cast<VPRecipeBase>(U);
5991 return UR->getParent()->getParent() != VectorLoop;
6008 std::optional<ElementCount> NarrowedVF =
6010 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
6012 VFToOptimize = NarrowedVF;
6015 if (InterleaveR->getStoredValues().empty())
6020 auto *Member0 = InterleaveR->getStoredValues()[0];
6030 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
6033 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
6034 return IR && IR->getInterleaveGroup()->isFull() &&
6035 IR->getVPValue(Op.index()) == Op.value();
6044 VFToOptimize->isScalable()))
6049 if (StoreGroups.empty())
6053 bool RequiresScalarEpilogue =
6064 std::unique_ptr<VPlan> NewPlan;
6066 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
6067 Plan.
setVF(*VFToOptimize);
6068 NewPlan->removeVF(*VFToOptimize);
6075 for (
auto *StoreGroup : StoreGroups) {
6077 NarrowedOps, Preheader);
6082 StoreGroup->getDebugLoc());
6083 S->insertBefore(StoreGroup);
6084 StoreGroup->eraseFromParent();
6090 Type *CanIVTy = VectorLoop->getCanonicalIVType();
6096 if (VFToOptimize->isScalable()) {
6099 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
6107 materializeVectorTripCount(Plan, VectorPH,
false,
6108 RequiresScalarEpilogue, Step);
6113 removeDeadRecipes(Plan);
6116 "All VPVectorPointerRecipes should have been removed");
6132 "must have a BranchOnCond");
6135 if (VF.
isScalable() && VScaleForTuning.has_value())
6136 VectorStep *= *VScaleForTuning;
6137 assert(VectorStep > 0 &&
"trip count should not be zero");
6141 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
6160 "Cannot handle loops with uncountable early exits");
6167 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
6174 if (
any_of(RecurSplice->users(),
6175 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
6256 {},
"vector.recur.extract.for.phi");
6259 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
6273 VPValue *WidenIVCandidate = BinOp->getOperand(0);
6274 VPValue *InvariantCandidate = BinOp->getOperand(1);
6276 std::swap(WidenIVCandidate, InvariantCandidate);
6290 auto *ClonedOp = BinOp->
clone();
6291 if (ClonedOp->getOperand(0) == WidenIV) {
6292 ClonedOp->setOperand(0, ScalarIV);
6294 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
6295 ClonedOp->setOperand(1, ScalarIV);
6310 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
6311 bool UseMax) -> std::optional<APSInt> {
6313 for (
bool Signed : {
true,
false}) {
6322 return std::nullopt;
6330 PhiR->getRecurrenceKind()))
6339 VPValue *BackedgeVal = PhiR->getBackedgeValue();
6353 !
match(FindLastSelect,
6362 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
6368 "IVOfExpressionToSink not being an AddRec must imply "
6369 "FindLastExpression not being an AddRec.");
6380 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
6381 bool UseSigned = SentinelVal && SentinelVal->isSigned();
6388 if (IVOfExpressionToSink) {
6389 const SCEV *FindLastExpressionSCEV =
6391 if (
match(FindLastExpressionSCEV,
6394 if (
auto NewSentinel =
6395 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
6398 SentinelVal = *NewSentinel;
6399 UseSigned = NewSentinel->isSigned();
6401 IVSCEV = FindLastExpressionSCEV;
6402 IVOfExpressionToSink =
nullptr;
6412 if (AR->hasNoSignedWrap())
6414 else if (AR->hasNoUnsignedWrap())
6424 VPValue *NewFindLastSelect = BackedgeVal;
6426 if (!SentinelVal || IVOfExpressionToSink) {
6429 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
6430 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
6431 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
6432 SelectCond = LoopBuilder.
createNot(SelectCond);
6439 if (SelectCond !=
Cond || IVOfExpressionToSink) {
6442 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
6451 VPIRFlags Flags(MinMaxKind,
false,
false,
6457 NewFindLastSelect, Flags, ExitDL);
6460 VPValue *VectorRegionExitingVal = ReducedIV;
6461 if (IVOfExpressionToSink)
6462 VectorRegionExitingVal =
6464 ReducedIV, IVOfExpressionToSink);
6467 VPValue *StartVPV = PhiR->getStartValue();
6474 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
6484 AnyOfPhi->insertAfter(PhiR);
6491 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6504 PhiR->hasUsesOutsideReductionChain());
6505 NewPhiR->insertBefore(PhiR);
6506 PhiR->replaceAllUsesWith(NewPhiR);
6507 PhiR->eraseFromParent();
6514struct ReductionExtend {
6515 Type *SrcType =
nullptr;
6516 ExtendKind Kind = ExtendKind::PR_None;
6522struct ExtendedReductionOperand {
6526 ReductionExtend ExtendA, ExtendB;
6534struct VPPartialReductionChain {
6537 VPWidenRecipe *ReductionBinOp =
nullptr;
6539 ExtendedReductionOperand ExtendedOp;
6546 unsigned AccumulatorOpIdx;
6547 unsigned ScaleFactor;
6559 if (!
Op->hasOneUse() ||
6565 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6566 Op->getOperand(1), NarrowTy);
6568 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6577 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6579 assert(Ext->getOpcode() ==
6581 "Expected both the LHS and RHS extends to be the same");
6582 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6585 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6586 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6587 auto *
Max = Builder.insert(
6589 {FreezeX, FreezeY}, SrcTy));
6590 auto *Min = Builder.insert(
6592 {FreezeX, FreezeY}, SrcTy));
6595 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6596 Op->getScalarType());
6608 if (!
Mul->hasOneUse() ||
6609 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6610 MulLHS->getOpcode() != MulRHS->getOpcode())
6613 auto *NewLHS = Builder.createWidenCast(
6614 MulLHS->getOpcode(), MulLHS->getOperand(0), Ext->getScalarType());
6615 auto *NewRHS = MulLHS == MulRHS
6617 : Builder.createWidenCast(MulRHS->getOpcode(),
6618 MulRHS->getOperand(0),
6619 Ext->getScalarType());
6620 auto *NewMul =
Mul->cloneWithOperands({NewLHS, NewRHS});
6621 Builder.insert(NewMul);
6622 Op->replaceAllUsesWith(NewMul);
6623 Op->eraseFromParent();
6624 Mul->eraseFromParent();
6633 VPValue *VecOp = Red->getVecOp();
6687static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6695 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6698 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp);
6714 if ((WidenRecipe->
getOpcode() == Instruction::Sub &&
6716 (WidenRecipe->
getOpcode() == Instruction::FSub &&
6721 if (WidenRecipe->
getOpcode() == Instruction::FSub) {
6731 Builder.insert(NegRecipe);
6732 ExtendedOp = NegRecipe;
6743 assert((!ExitValue || IsLastInChain) &&
6744 "if we found ExitValue, it must match RdxPhi's backedge value");
6755 PartialRed->insertBefore(WidenRecipe);
6763 E->insertBefore(WidenRecipe);
6764 PartialRed->replaceAllUsesWith(
E);
6777 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6778 StartInst->setOperand(2, NewScaleFactor);
6786 VPValue *OldStartValue = StartInst->getOperand(0);
6787 StartInst->setOperand(0, StartInst->getOperand(1));
6791 assert(RdxResult &&
"Could not find reduction result");
6794 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6795 : Instruction::BinaryOps::Sub;
6801 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6807 const VPPartialReductionChain &Link,
6810 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6811 std::optional<unsigned> BinOpc = std::nullopt;
6813 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6814 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6816 std::optional<llvm::FastMathFlags>
Flags;
6820 auto GetLinkOpcode = [&Link]() ->
unsigned {
6823 return Instruction::Add;
6825 return Instruction::FAdd;
6827 return Link.ReductionBinOp->
getOpcode();
6832 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6833 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6854static std::optional<ExtendedReductionOperand>
6857 "Op should be operand of UpdateR");
6865 if (
Op->hasOneUse() &&
6874 Type *RHSInputType =
Y->getScalarType();
6875 if (LHSInputType != RHSInputType ||
6876 LHSExt->getOpcode() != RHSExt->getOpcode())
6877 return std::nullopt;
6880 return ExtendedReductionOperand{
6882 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6886 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6889 VPValue *CastSource = CastRecipe->getOperand(0);
6890 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6900 return ExtendedReductionOperand{
6907 if (!
Op->hasOneUse())
6908 return std::nullopt;
6913 return std::nullopt;
6923 return std::nullopt;
6927 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6930 const APInt *RHSConst =
nullptr;
6936 return std::nullopt;
6940 if (Cast && OuterExtKind &&
6941 getPartialReductionExtendKind(Cast) != OuterExtKind)
6942 return std::nullopt;
6944 Type *RHSInputType = LHSInputType;
6945 ExtendKind RHSExtendKind = LHSExtendKind;
6948 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6951 return ExtendedReductionOperand{
6952 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6959static std::optional<SmallVector<VPPartialReductionChain>>
6966 return std::nullopt;
6976 VPValue *CurrentValue = ExitValue;
6977 while (CurrentValue != RedPhiR) {
6980 return std::nullopt;
6987 std::optional<ExtendedReductionOperand> ExtendedOp =
6988 matchExtendedReductionOperand(UpdateR,
Op);
6990 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6992 return std::nullopt;
6996 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6999 return std::nullopt;
7001 VPPartialReductionChain Link(
7002 {UpdateR, *ExtendedOp, RK,
7006 CurrentValue = PrevValue;
7011 std::reverse(Chain.
begin(), Chain.
end());
7030 if (
auto Chains = getScaledReductions(RedPhiR))
7031 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
7034 if (ChainsByPhi.
empty())
7041 for (
const auto &[
_, Chains] : ChainsByPhi)
7042 for (
const VPPartialReductionChain &Chain : Chains) {
7043 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
7044 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
7050 auto ExtendUsersValid = [&](
VPValue *Ext) {
7052 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
7056 auto IsProfitablePartialReductionChainForVF =
7063 for (
const VPPartialReductionChain &Link : Chain) {
7064 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
7065 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
7069 PartialCost += LinkCost;
7070 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
7072 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
7073 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
7076 RegularCost += Extend->computeCost(VF, CostCtx);
7078 return PartialCost.
isValid() && PartialCost < RegularCost;
7086 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
7087 for (
const VPPartialReductionChain &Chain : Chains) {
7088 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
7092 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
7094 return PhiR == RedPhiR;
7096 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
7102 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
7111 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
7112 return RepR && RepR->getOpcode() == Instruction::Store;
7123 return IsProfitablePartialReductionChainForVF(Chains, VF);
7129 for (
auto &[Phi, Chains] : ChainsByPhi)
7130 for (
const VPPartialReductionChain &Chain : Chains)
7131 transformToPartialReduction(Chain, Plan, Phi);
7160 if (VPI && VPI->getUnderlyingValue() &&
7171 auto ProcessSubset = [&](
VPlan &,
auto ProcessVPInst) {
7174 if (!ProcessVPInst(VPI))
7183 New->insertBefore(VPI);
7184 if (VPI->
getOpcode() == Instruction::Load)
7199 "lowerMemoryIdioms", ProcessSubset, Plan, [&](
VPInstruction *VPI) {
7201 VPI, FinalRedStoresBuilder))
7210 return ReplaceWith(VPI, Histogram);
7223 "scalarizeMemOpsWithIrregularTypes", ProcessSubset, Plan,
7227 return Scalarize(VPI);
7234 "makeVPlanMemOpDecision", ProcessSubset, Plan, [&](
VPInstruction *VPI) {
7236 bool IsLoad = VPI->
getOpcode() == Instruction::Load;
7246 const SCEV *PtrSCEV =
7248 bool IsSingleScalarLoad =
7254 I, Ptr, IsSingleScalarLoad,
7262 "widenConsecutiveMemOps", ProcessSubset, Plan, [&](
VPInstruction *VPI) {
7267 bool IsLoad = VPI->
getOpcode() == Instruction::Load;
7280 VectorPtr->insertBefore(VPI);
7291 return ReplaceWith(VPI, WidenedR);
7298 return ReplaceWith(VPI, Recipe);
7300 return Scalarize(VPI);
7323 if (VPI->mayHaveSideEffects())
7327 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
7332 if (VPI->getOpcode() == Instruction::Add &&
7341 VPI->getOpcode(), VPI->operandsWithoutMask(),
nullptr, *VPI,
7342 *VPI, VPI->getDebugLoc(),
I);
7343 Recipe->insertBefore(VPI);
7344 VPI->replaceAllUsesWith(Recipe);
7345 VPI->eraseFromParent();
7355 switch (Param.ParamKind) {
7356 case VFParamKind::Vector:
7357 case VFParamKind::GlobalPredicate:
7359 case VFParamKind::OMP_Uniform:
7360 return SE->isSCEVable(Args[Param.ParamPos]->getScalarType()) &&
7361 SE->isLoopInvariant(
7362 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7364 case VFParamKind::OMP_Linear:
7365 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7366 m_scev_AffineAddRec(
7367 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
7368 m_SpecificLoop(L)));
7385 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
7386 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
7389 if (It == Mappings.end())
7396struct CallWideningDecision {
7397 enum class KindTy { Scalarize,
Intrinsic, VectorVariant };
7398 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
7421 return CallWideningDecision::KindTy::Scalarize;
7431 return CallWideningDecision::KindTy::Scalarize;
7435 false, VF, CostCtx);
7450 return CallWideningDecision::KindTy::Intrinsic;
7454 if (VecFunc && ScalarCost >= VecCallCost)
7455 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
7457 return CallWideningDecision::KindTy::Scalarize;
7467 if (!VPI || !VPI->getUnderlyingValue() ||
7468 VPI->getOpcode() != Instruction::Call)
7473 VPI->op_begin() + CI->arg_size());
7475 CallWideningDecision Decision =
7484 switch (Decision.Kind) {
7485 case CallWideningDecision::KindTy::Intrinsic: {
7489 *VPI, VPI->getDebugLoc());
7492 case CallWideningDecision::KindTy::VectorVariant: {
7496 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
7497 Ops.push_back(Mask);
7499 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
7501 *VPI, VPI->getDebugLoc());
7504 case CallWideningDecision::KindTy::Scalarize:
7510 VPI->replaceAllUsesWith(Replacement);
7511 VPI->eraseFromParent();
7534 if (!LoadR || LoadR->isConsecutive())
7553 Align Alignment = LoadR->getAlign();
7556 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
7561 Intrinsic::experimental_vp_strided_load, DataTy,
7562 LoadR->isMasked(), Alignment, Ctx);
7563 return StridedLoadStoreCost < CurrentCost;
7574 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
7579 I32VF = Builder.createScalarZExtOrTrunc(
7592 "Stride type from SCEV must match the index type");
7593 VPValue *CanIV = Builder.createScalarSExtOrTrunc(
7597 auto *
Offset = Builder.createOverflowingOp(
7598 Instruction::Mul, {CanIV, StrideInBytes},
7599 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
7600 auto *BasePtr = Builder.createNoWrapPtrAdd(
7606 VPValue *NewPtr = Builder.createVectorPointer(
7608 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
7610 VPValue *Mask = LoadR->getMask();
7613 auto *StridedLoad = Builder.createWidenMemIntrinsic(
7614 Intrinsic::experimental_vp_strided_load,
7615 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
7616 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This is the interface for a metadata-based scoped no-alias analysis.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(ArrayRef< VPReplicateRecipe * > ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L)
SinkStoreInfo(VPReplicateRecipe &GroupLeader)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
Get the first element.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class represents a constant integer value.
ConstantInt * getValue() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
void clearPredecessors()
Remove all the predecessor of this block.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
static VPSingleDefRecipe * createSingleScalarOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPValue *Mask, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL, Instruction *UV)
Create a single-scalar recipe with Opcode and Operands without inserting it.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B) const
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
unsigned getOpcode() const
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
bool prefersVectorizedAddressing() const
Returns true if the target prefers vectorized addressing.
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPSingleDefRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a replicating or single-scalar recipe for VPI.
bool isPredicatedInst(Instruction *I) const
Returns true if I needs to be predicated (i.e.
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
operand_range operandsWithoutMask()
Return the recipe's operands, excluding the mask of a predicated recipe.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Lightweight SCEV-to-VPlan expander.
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
VPIRValue * getPoison(Type *Ty)
Return a VPIRValue wrapping a poison value of type Ty.
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
canonical_widen_iv_match m_CanonicalWidenIV()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
auto m_AnyNeg(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPValue * findIncomingAliasMask(const VPlan &Plan)
Finds the incoming alias-mask within the vector preheader.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool isUsedByLoadStoreAddress(const VPValue *V)
Returns true if V is used as part of the address of another load or store.
GEPNoWrapFlags getGEPFlagsForPtr(VPValue *Ptr)
Returns the GEP nowrap flags for Ptr, looking through pointer casts mirroring Value::stripPointerCast...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
SmallVector< VPBasicBlock * > vp_rpo_plain_cfg_loop_body(VPBasicBlock *Header)
Returns the VPBasicBlocks forming the loop body of a plain (pre-region) VPlan in reverse post-order s...
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
constexpr auto bind_back(FnT &&Fn, BindArgsT &&...BindArgs)
C++23 bind_back.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr size_t range_size(R &&Range)
Returns the size of the Range, i.e., the number of elements.
void sort(IteratorTy Start, IteratorTy End)
bool hasIrregularType(Type *Ty, const DataLayout &DL)
A helper function that returns true if the given type is irregular.
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
T * find_singleton(R &&Range, Predicate P, bool AllowRepeats=false)
Return the single value in Range that satisfies P(<member of Range> *, AllowRepeats)->T * returning n...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
LLVM_ABI std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
If AR is an affine AddRec for Lp with a constant step, return the step in units of AccessTy's allocat...
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
VPBasicBlock * EarlyExitingVPBB
VPIRBasicBlock * EarlyExitVPBB
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...