57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
82 false , *VPI, Ingredient.getDebugLoc());
85 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
86 nullptr ,
false , *VPI,
87 Ingredient.getDebugLoc());
90 Ingredient.operands(), *VPI,
91 Ingredient.getDebugLoc(),
GEP);
103 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
108 if (VectorID == Intrinsic::assume ||
109 VectorID == Intrinsic::lifetime_end ||
110 VectorID == Intrinsic::lifetime_start ||
111 VectorID == Intrinsic::sideeffect ||
112 VectorID == Intrinsic::pseudoprobe) {
117 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
118 VectorID != Intrinsic::pseudoprobe;
122 Ingredient.getDebugLoc());
125 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
126 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
130 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
134 *VPI, Ingredient.getDebugLoc());
138 "inductions must be created earlier");
147 "Only recpies with zero or one defined values expected");
148 Ingredient.eraseFromParent();
164 if (
A->getOpcode() != Instruction::Store ||
165 B->getOpcode() != Instruction::Store)
175 const APInt *Distance;
181 Type *TyA =
A->getOperand(0)->getScalarType();
183 Type *TyB =
B->getOperand(0)->getScalarType();
189 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
191 auto VFs =
B->getParent()->getPlan()->vectorFactors();
195 return Distance->
abs().
uge(
203 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
211 return ExcludeRecipes.contains(&R) ||
212 (Store && isNoAliasViaDistance(Store, &GroupLeader));
225 std::optional<SinkStoreInfo> SinkInfo = {}) {
226 bool CheckReads = SinkInfo.has_value();
233 if (SinkInfo && SinkInfo->shouldSkip(R))
237 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
262template <
unsigned Opcode>
267 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
268 "Only Load and Store opcodes supported");
269 constexpr bool IsLoad = (Opcode == Instruction::Load);
272 RecipesByAddressAndType;
277 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
281 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
285 RecipesByAddressAndType[{AddrSCEV, LoadStoreTy}].push_back(RepR);
290 for (
auto &Group :
Groups) {
305 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
317 if (Candidate->getParent() == SinkTo ||
322 if (!ScalarVFOnly && RepR->isSingleScalar())
325 WorkList.
insert({SinkTo, Candidate});
337 for (
auto &Recipe : *VPBB)
339 InsertIfValidSinkCandidate(VPBB,
Op);
343 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
346 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
351 auto UsersOutsideSinkTo =
353 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
355 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
356 return !U->usesFirstLaneOnly(SinkCandidate);
359 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
361 if (NeedsDuplicating) {
365 if (
auto *SinkCandidateRepR =
370 SinkCandidateRepR->getOpcode(), SinkCandidate->
operands(),
371 nullptr, *SinkCandidateRepR, *SinkCandidateRepR,
375 Clone = SinkCandidate->
clone();
385 InsertIfValidSinkCandidate(SinkTo,
Op);
395 if (!EntryBB || EntryBB->size() != 1 ||
405 if (EntryBB->getNumSuccessors() != 2)
410 if (!Succ0 || !Succ1)
413 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
415 if (Succ0->getSingleSuccessor() == Succ1)
417 if (Succ1->getSingleSuccessor() == Succ0)
434 if (!Region1->isReplicator())
436 auto *MiddleBasicBlock =
438 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
443 if (!Region2 || !Region2->isReplicator())
448 if (!Mask1 || Mask1 != Mask2)
451 assert(Mask1 && Mask2 &&
"both region must have conditions");
457 if (TransformedRegions.
contains(Region1))
464 if (!Then1 || !Then2)
484 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
490 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
491 Phi1ToMove.eraseFromParent();
494 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
508 TransformedRegions.
insert(Region1);
511 return !TransformedRegions.
empty();
519 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
520 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
521 auto *BlockInMask = PredRecipe->
getMask();
542 Region->setParent(ParentRegion);
548 RecipeWithoutMask->getDebugLoc());
549 Exiting->appendRecipe(PHIRecipe);
562 if (RepR->isPredicated())
581 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
593 if (!VPBB->getParent())
597 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
606 R.moveBefore(*PredVPBB, PredVPBB->
end());
608 auto *ParentRegion = VPBB->getParent();
609 if (ParentRegion && ParentRegion->getExiting() == VPBB)
610 ParentRegion->setExiting(PredVPBB);
614 return !WorkList.
empty();
621 bool ShouldSimplify =
true;
622 while (ShouldSimplify) {
638 if (!
IV ||
IV->getTruncInst())
653 for (
auto *U : FindMyCast->
users()) {
655 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
656 FoundUserCast = UserCast;
663 FindMyCast = FoundUserCast;
665 if (FindMyCast !=
IV)
680 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
689 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
695 if (ResultTy != StepTy) {
702 Builder.setInsertPoint(VecPreheader);
703 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
705 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
731 WideCanIV->getDebugLoc(), Builder));
732 WideCanIV->eraseFromParent();
749 WideCanIV->replaceAllUsesWith(WidenIV);
750 WideCanIV->eraseFromParent();
759 if (PHICost > BroadcastCost)
768 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
780 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
781 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
782 WideCanIV->replaceAllUsesWith(NewWideIV);
783 WideCanIV->eraseFromParent();
791 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
793 if (IsConditionalAssume)
796 if (R.mayHaveSideEffects())
800 return all_of(R.definedValues(),
801 [](
VPValue *V) { return V->getNumUsers() == 0; });
821 VPUser *PhiUser = PhiR->getSingleUser();
827 PhiR->replaceAllUsesWith(Start);
828 PhiR->eraseFromParent();
836 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
839 Users.insert_range(V->users());
841 return Users.takeVector();
855 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
892 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
893 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
906 Def->getUnderlyingInstr()->getOpcode(), Def->operands(),
908 Def->getUnderlyingInstr());
909 Clone->insertAfter(Def);
910 Def->replaceAllUsesWith(Clone);
921 PtrIV->replaceAllUsesWith(PtrAdd);
928 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
929 return U->usesScalars(WideIV);
935 Plan,
ID.getKind(),
ID.getInductionOpcode(),
937 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
938 WideIV->getDebugLoc(), Builder);
941 if (!HasOnlyVectorVFs) {
943 "plans containing a scalar VF cannot also include scalable VFs");
944 WideIV->replaceAllUsesWith(Steps);
947 WideIV->replaceUsesWithIf(Steps,
948 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
950 return U.usesFirstLaneOnly(WideIV);
951 return U.usesScalars(WideIV);
967 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
972 if (!Def || Def->getNumOperands() != 2)
980 auto IsWideIVInc = [&]() {
981 auto &
ID = WideIV->getInductionDescriptor();
984 VPValue *IVStep = WideIV->getStepValue();
985 switch (
ID.getInductionOpcode()) {
986 case Instruction::Add:
988 case Instruction::FAdd:
990 case Instruction::FSub:
993 case Instruction::Sub: {
1013 return IsWideIVInc() ? WideIV :
nullptr;
1030 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1041 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1042 FirstActiveLane =
B.createScalarZExtOrTrunc(
1043 FirstActiveLane, CanonicalIVType, FirstActiveLane->
getScalarType(),
DL);
1044 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1049 if (Incoming != WideIV) {
1051 EndValue =
B.createAdd(EndValue, One,
DL);
1056 VPIRValue *Start = WideIV->getStartValue();
1057 VPValue *Step = WideIV->getStepValue();
1058 EndValue =
B.createDerivedIV(
1060 Start, EndValue, Step);
1074 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1084 Start, VectorTC, Step);
1114 assert(EndValue &&
"Must have computed the end value up front");
1119 if (Incoming != WideIV)
1131 auto *Zero = Plan.
getZero(StepTy);
1132 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1137 return B.createNaryOp(
1138 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1140 : Instruction::FAdd,
1141 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1152 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1162 EndValues[WideIV] = EndValue;
1172 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1173 R.eraseFromParent();
1182 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1184 if (PredVPBB == MiddleVPBB)
1186 Plan, ExitIRI->getOperand(Idx), EndValues, PSE);
1189 Plan, ExitIRI->getOperand(Idx), PSE);
1191 ExitIRI->setOperand(Idx, Escape);
1208 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1212 ExpR->replaceAllUsesWith(V->second);
1216 ExpR->eraseFromParent();
1225 while (!WorkList.
empty()) {
1227 if (!Seen.
insert(Cur).second)
1235 R->eraseFromParent();
1242static std::optional<std::pair<bool, unsigned>>
1245 std::optional<std::pair<bool, unsigned>>>(R)
1248 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1250 return std::make_pair(
true,
I->getVectorIntrinsicID());
1252 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1258 I->getVPRecipeID());
1260 .
Default([](
auto *) {
return std::nullopt; });
1285 VPlan &Plan = *R.getParent()->getPlan();
1286 auto FoldToIRValue = [&]() ->
Value * {
1288 if (OpcodeOrIID->first) {
1289 if (R.getNumOperands() != 2)
1291 unsigned ID = OpcodeOrIID->second;
1292 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1], R.getScalarType());
1294 unsigned Opcode = OpcodeOrIID->second;
1300 R.getVPSingleValue()->getScalarType());
1303 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1305 case Instruction::Select:
1306 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1307 case Instruction::ICmp:
1308 case Instruction::FCmp:
1311 case Instruction::GetElementPtr: {
1314 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1324 case Instruction::ExtractElement:
1331 if (
Value *V = FoldToIRValue())
1338 bool CanCreateNewRecipe) {
1339 VPlan *Plan = Def->getParent()->getPlan();
1349 Def->replaceAllUsesWith(
X);
1350 Def->eraseFromParent();
1362 Def->replaceAllUsesWith(
X);
1374 Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1380 Def->replaceAllUsesWith(
X);
1386 Def->replaceAllUsesWith(Plan->
getFalse());
1392 Def->replaceAllUsesWith(
X);
1397 if (CanCreateNewRecipe &&
1402 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1403 !Def->getOperand(1)->hasMoreThanOneUniqueUser())) {
1404 Def->replaceAllUsesWith(
1405 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1412 Def->replaceAllUsesWith(Def->getOperand(1));
1419 Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1425 Def->replaceAllUsesWith(Plan->
getFalse());
1430 Def->replaceAllUsesWith(
X);
1436 if (CanCreateNewRecipe &&
1438 Def->replaceAllUsesWith(Builder.createNot(
C));
1444 Def->setOperand(0,
C);
1445 Def->setOperand(1,
Y);
1446 Def->setOperand(2,
X);
1451 if (CanCreateNewRecipe &&
1455 Y->getScalarType()->isIntegerTy(1)) {
1456 Def->replaceAllUsesWith(
1457 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1466 VPlan *Plan = Def->getParent()->getPlan();
1472 return Def->replaceAllUsesWith(V);
1478 PredPHI->replaceAllUsesWith(
Op);
1491 bool CanCreateNewRecipe =
1496 Type *TruncTy = Def->getScalarType();
1497 Type *ATy =
A->getScalarType();
1498 if (TruncTy == ATy) {
1499 Def->replaceAllUsesWith(
A);
1508 : Instruction::ZExt;
1511 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1513 Ext->setUnderlyingValue(UnderlyingExt);
1515 Def->replaceAllUsesWith(Ext);
1517 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1518 Def->replaceAllUsesWith(Trunc);
1528 return Def->replaceAllUsesWith(
A);
1531 return Def->replaceAllUsesWith(
A);
1534 return Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1540 return Def->replaceAllUsesWith(Builder.createSub(
1541 Plan->
getZero(
A->getScalarType()),
A, Def->getDebugLoc(),
"", NW));
1544 if (CanCreateNewRecipe &&
1552 ->hasNoSignedWrap()};
1553 return Def->replaceAllUsesWith(
1554 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1560 return Def->replaceAllUsesWith(Builder.createNaryOp(
1562 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1567 return Def->replaceAllUsesWith(Builder.createNaryOp(
1569 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1574 return Def->replaceAllUsesWith(
A);
1589 R->setOperand(1,
Y);
1590 R->setOperand(2,
X);
1594 R->replaceAllUsesWith(Cmp);
1599 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1600 Cmp->setDebugLoc(Def->getDebugLoc());
1612 if (
Op->getNumUsers() > 1 ||
1616 }
else if (!UnpairedCmp) {
1617 UnpairedCmp =
Op->getDefiningRecipe();
1621 UnpairedCmp =
nullptr;
1628 if (NewOps.
size() < Def->getNumOperands()) {
1630 return Def->replaceAllUsesWith(NewAnyOf);
1637 if (CanCreateNewRecipe &&
1643 return Def->replaceAllUsesWith(NewCmp);
1649 Def->getOperand(1)->getScalarType() == Def->getScalarType())
1650 return Def->replaceAllUsesWith(Def->getOperand(1));
1654 Type *WideStepTy = Def->getScalarType();
1655 if (
X->getScalarType() != WideStepTy)
1656 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1657 Def->replaceAllUsesWith(
X);
1666 Def->getScalarType()->isIntegerTy(1)) {
1667 Def->setOperand(1, Def->getOperand(0));
1668 Def->setOperand(0,
Y);
1675 return Def->replaceAllUsesWith(Def->getOperand(0));
1681 Def->replaceAllUsesWith(
1682 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1687 return Def->replaceAllUsesWith(
X);
1690 return Def->replaceAllUsesWith(
A);
1693 return Def->replaceAllUsesWith(
A);
1699 Def->replaceAllUsesWith(
1700 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1707 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1712 Def->replaceAllUsesWith(
1722 "broadcast operand must be single-scalar");
1723 Def->setOperand(0,
C);
1728 return Def->replaceUsesWithIf(
1729 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1732 if (Def->getNumOperands() == 1) {
1733 Def->replaceAllUsesWith(Def->getOperand(0));
1738 Phi->replaceAllUsesWith(Phi->getOperand(0));
1744 if (Def->getNumOperands() == 1 &&
1746 return Def->replaceAllUsesWith(IRV);
1759 return Def->replaceAllUsesWith(
A);
1766 return Def->replaceAllUsesWith(WidenIV->getRegion()->getCanonicalIV());
1769 Def->replaceAllUsesWith(Builder.createNaryOp(
1770 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1784 auto *IVInc = Def->getOperand(0);
1785 if (IVInc->getNumUsers() == 2) {
1790 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1791 Def->replaceAllUsesWith(IVInc);
1793 Inc->replaceAllUsesWith(Phi);
1794 Phi->setOperand(0,
Y);
1810 Steps->replaceAllUsesWith(Steps->getOperand(0));
1818 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1820 return PhiR && PhiR->isInLoop();
1826 return Def->replaceAllUsesWith(
A);
1852 while (!Worklist.
empty()) {
1861 R->replaceAllUsesWith(
1862 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1866static std::optional<Instruction::BinaryOps>
1869 case Intrinsic::masked_udiv:
1870 return Instruction::UDiv;
1871 case Intrinsic::masked_sdiv:
1872 return Instruction::SDiv;
1873 case Intrinsic::masked_urem:
1874 return Instruction::URem;
1875 case Intrinsic::masked_srem:
1876 return Instruction::SRem;
1893 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1897 if (RepR && RepR->getOpcode() == Instruction::Store &&
1900 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1901 true ,
nullptr , *RepR ,
1902 *RepR , RepR->getDebugLoc());
1903 Clone->insertBefore(RepOrWidenR);
1905 VPValue *ExtractOp = Clone->getOperand(0);
1911 Clone->setOperand(0, ExtractOp);
1912 RepR->eraseFromParent();
1924 VPValue *SafeDivisor = Builder.createSelect(
1925 IntrR->getOperand(2), IntrR->getOperand(1),
1927 VPValue *Clone = Builder.createNaryOp(
1928 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1931 IntrR->eraseFromParent();
1940 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1949 return !U->usesScalars(
Op);
1953 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1956 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1957 IntroducesBCastOf(Op)))
1961 auto *IRV = dyn_cast<VPIRValue>(Op);
1962 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1963 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1964 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1971 RepOrWidenR->getUnderlyingInstr());
1972 Clone->insertBefore(RepOrWidenR);
1973 RepOrWidenR->replaceAllUsesWith(Clone);
1975 RepOrWidenR->eraseFromParent();
2011 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
2012 UniqueValues.
insert(Blend->getIncomingValue(0));
2013 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
2015 UniqueValues.
insert(Blend->getIncomingValue(
I));
2017 if (UniqueValues.
size() == 1) {
2018 Blend->replaceAllUsesWith(*UniqueValues.
begin());
2019 Blend->eraseFromParent();
2023 if (Blend->isNormalized())
2029 unsigned StartIndex = 0;
2030 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2035 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
2042 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2044 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2045 if (
I == StartIndex)
2047 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2048 OperandsWithMask.
push_back(Blend->getMask(
I));
2053 OperandsWithMask, *Blend, Blend->getDebugLoc());
2054 NewBlend->insertBefore(&R);
2056 VPValue *DeadMask = Blend->getMask(StartIndex);
2058 Blend->eraseFromParent();
2063 if (NewBlend->getNumOperands() == 3 &&
2065 VPValue *Inc0 = NewBlend->getOperand(0);
2066 VPValue *Inc1 = NewBlend->getOperand(1);
2067 VPValue *OldMask = NewBlend->getOperand(2);
2068 NewBlend->setOperand(0, Inc1);
2069 NewBlend->setOperand(1, Inc0);
2070 NewBlend->setOperand(2, NewMask);
2097 APInt MaxVal = AlignedTC - 1;
2100 unsigned NewBitWidth =
2106 bool MadeChange =
false;
2131 "canonical IV is not expected to have a truncation");
2136 NewWideIV->insertBefore(WideIV);
2143 Cmp->replaceAllUsesWith(
2144 VPBuilder(Cmp).createICmp(Cmp->getPredicate(), NewWideIV, NewBTC));
2158 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2160 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2174 const SCEV *VectorTripCount =
2179 "Trip count SCEV must be computable");
2200 auto *Term = &ExitingVPBB->
back();
2213 for (
unsigned Part = 0; Part < UF; ++Part) {
2219 Extracts[Part] = Ext;
2231 match(Phi->getBackedgeValue(),
2233 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2250 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2257 "Expected incoming values of Phi to be ActiveLaneMasks");
2262 EntryALM->setOperand(2, ALMMultiplier);
2263 LoopALM->setOperand(2, ALMMultiplier);
2267 ExtractFromALM(EntryALM, EntryExtracts);
2272 ExtractFromALM(LoopALM, LoopExtracts);
2274 Not->setOperand(0, LoopExtracts[0]);
2277 for (
unsigned Part = 0; Part < UF; ++Part) {
2278 Phis[Part]->setStartValue(EntryExtracts[Part]);
2279 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2292 auto *Term = &ExitingVPBB->
back();
2304 const SCEV *VectorTripCount =
2310 "Trip count SCEV must be computable");
2329 Term->setOperand(1, Plan.
getTrue());
2334 {}, Term->getDebugLoc());
2336 Term->eraseFromParent();
2369 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2379 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2380 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2398 RecurKind RK = PhiR->getRecurrenceKind();
2405 RecWithFlags->dropPoisonGeneratingFlags();
2411struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2420 return GEP->getSourceElementType();
2423 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2424 [](
auto *
I) {
return I->getSourceElementType(); })
2425 .
Default([](
auto *) {
return nullptr; });
2429 static bool canHandle(
const VPSingleDefRecipe *Def) {
2438 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2439 C->second == Instruction::ExtractValue)))
2445 return !
Def->mayReadFromMemory();
2449 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2452 getGEPSourceElementType(Def),
Def->getScalarType(),
2455 if (RFlags->hasPredicate())
2458 return hash_combine(Result, SIVSteps->getInductionOpcode());
2463 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2464 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2466 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2468 !
equal(
L->operands(),
R->operands()))
2471 "must have valid opcode info for both recipes");
2473 if (LFlags->hasPredicate() &&
2474 LFlags->getPredicate() !=
2478 if (LSIV->getInductionOpcode() !=
2484 const VPRegionBlock *RegionL =
L->getRegion();
2485 const VPRegionBlock *RegionR =
R->getRegion();
2488 L->getParent() !=
R->getParent())
2490 return L->getScalarType() ==
R->getScalarType();
2506 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2510 if (!VPDT.
dominates(V->getParent(), VPBB))
2515 Def->replaceAllUsesWith(V);
2546 "Expected vector prehader's successor to be the vector loop region");
2554 return !Op->isDefinedOutsideLoopRegions();
2557 R.moveBefore(*Preheader, Preheader->
end());
2575 assert(!RepR->isPredicated() &&
2576 "Expected prior transformation of predicated replicates to "
2577 "replicate regions");
2582 if (!RepR->isSingleScalar())
2594 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2595 auto *UserR = cast<VPRecipeBase>(U);
2596 VPBasicBlock *Parent = UserR->getParent();
2598 if (SinkBB && SinkBB != Parent)
2603 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2604 Parent->getSinglePredecessor() != LoopRegion;
2614 "Defining block must dominate sink block");
2639 VPValue *ResultVPV = R.getVPSingleValue();
2641 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2642 if (!NewResSizeInBits)
2655 (void)OldResSizeInBits;
2663 VPW->dropPoisonGeneratingFlags();
2665 assert((OldResSizeInBits != NewResSizeInBits ||
2667 "Only ICmps should not need extending the result.");
2673 if (OldResSizeInBits != NewResSizeInBits) {
2675 Instruction::ZExt, ResultVPV, OldResTy);
2677 Ext->setOperand(0, ResultVPV);
2687 unsigned OpSizeInBits =
Op->getScalarType()->getScalarSizeInBits();
2688 if (OpSizeInBits == NewResSizeInBits)
2690 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2691 auto [ProcessedIter, Inserted] = ProcessedTruncs.
try_emplace(
Op);
2697 Builder.setInsertPoint(&R);
2698 ProcessedIter->second =
2699 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2701 Op = ProcessedIter->second;
2705 NWR->insertBefore(&R);
2709 VPValue *Replacement = NWR->getVPSingleValue();
2710 if (OldResSizeInBits != NewResSizeInBits)
2716 R.eraseFromParent();
2722 std::optional<VPDominatorTree> VPDT;
2730 bool SimplifiedPhi =
false;
2740 assert(VPBB->getNumSuccessors() == 2 &&
2741 "Two successors expected for BranchOnCond");
2742 unsigned RemovedIdx;
2753 "There must be a single edge between VPBB and its successor");
2756 auto Phis = RemovedSucc->
phis();
2759 SimplifiedPhi |= !std::empty(Phis);
2763 VPBB->back().eraseFromParent();
2775 if (Reachable.contains(
B))
2786 for (
VPValue *Def : R.definedValues())
2787 Def->replaceAllUsesWith(&Tmp);
2788 R.eraseFromParent();
2792 return SimplifiedPhi;
2846 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2857 auto *EntryIncrement = Builder.createOverflowingOp(
2859 DL,
"index.part.next");
2865 {EntryIncrement, TC, ALMMultiplier},
DL,
2866 "active.lane.mask.entry");
2873 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2878 Builder.setInsertPoint(OriginalTerminator);
2879 auto *InLoopIncrement = Builder.createOverflowingOp(
2881 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2883 {InLoopIncrement, TC, ALMMultiplier},
DL,
2884 "active.lane.mask.next");
2885 LaneMaskPhi->addBackedgeValue(ALM);
2889 auto *NotMask = Builder.createNot(ALM,
DL);
2896 bool UseActiveLaneMaskForControlFlow) {
2898 auto *WideCanonicalIV =
2900 assert(WideCanonicalIV &&
2901 "Must have widened canonical IV when tail folding!");
2904 if (UseActiveLaneMaskForControlFlow) {
2913 nullptr,
"active.lane.mask");
2929 template <
typename OpTy>
bool match(OpTy *V)
const {
2940template <
typename Op0_t,
typename Op1_t>
2948 case Intrinsic::masked_udiv:
2949 return Intrinsic::vp_udiv;
2950 case Intrinsic::masked_sdiv:
2951 return Intrinsic::vp_sdiv;
2952 case Intrinsic::masked_urem:
2953 return Intrinsic::vp_urem;
2954 case Intrinsic::masked_srem:
2955 return Intrinsic::vp_srem;
2957 return std::nullopt;
2972 VPValue *Addr, *Mask, *EndPtr;
2975 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2977 EVLEndPtr->insertBefore(&CurRecipe);
2982 EVLEndPtr->setOperand(1, EVLAsVF);
2986 auto GetVPReverse = [&CurRecipe, &EVL, Plan,
2991 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2992 V->getScalarType(), {}, {},
DL);
2993 Reverse->insertBefore(&CurRecipe);
2997 if (
match(&CurRecipe,
3002 if (
match(&CurRecipe,
3006 Mask = GetVPReverse(Mask);
3007 Addr = AdjustEndPtr(EndPtr);
3010 LoadR->insertBefore(&CurRecipe);
3015 LoadR->getScalarType(), {}, {},
DL);
3026 NewLoad->setOperand(2, Mask);
3027 NewLoad->setOperand(3, &EVL);
3035 StoredVal, EVL, Mask);
3037 if (
match(&CurRecipe,
3041 Mask = GetVPReverse(Mask);
3042 Addr = AdjustEndPtr(EndPtr);
3046 Intrinsic::vector_splice_right, {StoredVal,
Poison, &EVL},
3050 SpliceR, EVL, Mask);
3054 if (Rdx->isConditional() &&
3059 if (Interleave->getMask() &&
3067 Intrinsic::vp_merge, {Mask ? Mask : Plan->
getTrue(),
LHS,
RHS, &EVL},
3068 LHS->getScalarType(), {}, {},
DL);
3081 if (
match(&CurRecipe,
3086 LHS->getScalarType(), {}, {},
DL);
3092 {IntrR->getOperand(0),
3093 IntrR->getOperand(1),
3094 Mask ? Mask : Plan->
getTrue(), &EVL},
3095 IntrR->getScalarType(), {}, {},
DL);
3104 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3109 HeaderMask = R.getVPSingleValue();
3120 NewR->insertBefore(R);
3121 for (
auto [Old, New] :
3122 zip_equal(R->definedValues(), NewR->definedValues()))
3123 Old->replaceAllUsesWith(New);
3136 Mask->getScalarType(), {}, {}, LogicalAnd->getDebugLoc());
3137 Merge->insertBefore(LogicalAnd);
3138 LogicalAnd->replaceAllUsesWith(
Merge);
3158 Intrinsic::experimental_vp_reverse, {
X, Plan.
getTrue(), EVL},
3159 X->getScalarType(), {}, {}, Def->getDebugLoc());
3160 VPReverse->insertBefore(Def);
3161 Def->replaceAllUsesWith(VPReverse);
3167 R->eraseFromParent();
3188 auto IsAllowedUser =
3189 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3190 VPWidenIntOrFpInductionRecipe,
3191 VPWidenMemIntrinsicRecipe>;
3192 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3193 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3195 return IsAllowedUser(U);
3197 "User of VF that we can't transform to EVL.");
3207 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3208 "increment of the canonical induction.");
3224 MaxEVL = Builder.createScalarZExtOrTrunc(
3228 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3229 VPValue *PrevEVL = Builder.createScalarPhi(
3243 Intrinsic::experimental_vp_splice,
3244 {
V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3245 R.getVPSingleValue()->getScalarType(), {}, {}, R.getDebugLoc());
3247 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3260 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3261 m_VPValue(), m_VPValue()))))
3262 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3263 Plan.getVectorLoopRegion();
3275 VPValue *EVLMask = Builder.createICmp(
3335 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3347 auto *CurrentIteration =
3349 CurrentIteration->insertBefore(*Header, Header->begin());
3350 VPBuilder Builder(Header, Header->getFirstNonPhi());
3353 VPPhi *AVLPhi = Builder.createScalarPhi(
3357 if (MaxSafeElements) {
3367 Builder.setInsertPoint(CanonicalIVIncrement);
3371 OpVPEVL = Builder.createScalarZExtOrTrunc(
3372 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3374 auto *NextIter = Builder.createAdd(
3375 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3376 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3377 CurrentIteration->addBackedgeValue(NextIter);
3381 "avl.next", {
true,
false});
3389 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3390 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3404 assert(!CurrentIteration &&
3405 "Found multiple CurrentIteration. Only one expected");
3406 CurrentIteration = PhiR;
3410 if (!CurrentIteration)
3421 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3430 CanIVInc->eraseFromParent();
3439 if (Header->empty())
3448 if (!
match(EVLPhi->getBackedgeValue(),
3461 [[maybe_unused]]
bool FoundAVLNext =
3464 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3472 [[maybe_unused]]
bool FoundIncrement =
match(
3479 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3484 LatchBr->setOperand(
3496 "expected to run before loop regions are created");
3498 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3501 return VPDT.
dominates(Preheader, Parent);
3504 for (
const SCEV *Stride : StridesMap.
values()) {
3507 const APInt *StrideConst;
3530 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3537 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3540 if (NewSCEV != ScevExpr) {
3542 ExpSCEV->replaceAllUsesWith(NewExp);
3553 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3558 while (!Worklist.
empty()) {
3561 if (!Visited.
insert(CurRec).second)
3583 RecWithFlags->isDisjoint()) {
3586 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3587 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3588 RecWithFlags->replaceAllUsesWith(New);
3589 RecWithFlags->eraseFromParent();
3592 RecWithFlags->dropPoisonGeneratingFlags();
3597 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3598 "found instruction with poison generating flags not covered by "
3599 "VPRecipeWithIRFlags");
3604 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3612 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3624 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3625 if (AddrDef && WidenRec->isConsecutive() &&
3626 IsNotHeaderMask(WidenRec->getMask()))
3627 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3629 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3630 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3631 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3641 const bool &EpilogueAllowed) {
3642 if (InterleaveGroups.empty())
3653 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3660 for (
const auto *IG : InterleaveGroups) {
3665 return !IRMemberToRecipe.contains(Member);
3669 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3673 StoredValues.
push_back(StoreR->getStoredValue());
3674 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3680 StoredValues.
push_back(StoreR->getStoredValue());
3684 bool NeedsMaskForGaps =
3685 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3686 (!StoredValues.
empty() && !IG->isFull());
3689 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3698 VPValue *Addr = Start->getAddr();
3707 assert(IG->getIndex(IRInsertPos) != 0 &&
3708 "index of insert position shouldn't be zero");
3712 IG->getIndex(IRInsertPos),
3716 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3722 if (IG->isReverse()) {
3725 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3726 ReversePtr->insertBefore(InsertPosR);
3730 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3732 VPIG->insertBefore(InsertPosR);
3735 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3738 if (!Member->getType()->isVoidTy()) {
3796 AddOp = Instruction::Add;
3797 MulOp = Instruction::Mul;
3799 AddOp =
ID.getInductionOpcode();
3800 MulOp = Instruction::FMul;
3808 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3809 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3818 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3823 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3824 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3842 if (R->getParent()->getEnclosingLoopRegion())
3843 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3848 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3851 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3853 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3860 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3863 WidePHI->addIncoming(
Next);
3890 VPlan *Plan = R->getParent()->getPlan();
3891 VPValue *Start = R->getStartValue();
3892 VPValue *Step = R->getStepValue();
3893 VPValue *VF = R->getVFValue();
3895 assert(R->getInductionDescriptor().getKind() ==
3897 "Not a pointer induction according to InductionDescriptor!");
3898 assert(R->getScalarType()->isPointerTy() &&
"Unexpected type.");
3900 "Recipe should have been replaced");
3906 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3910 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3913 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3915 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3916 R->replaceAllUsesWith(PtrAdd);
3921 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3922 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3925 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3933 VPValue *Step = R->getStepValue();
3934 VPValue *Index = R->getIndex();
3938 ? Builder.createScalarSExtOrTrunc(
3940 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3942 switch (R->getInductionKind()) {
3944 assert(Index->getScalarType() == Start->getScalarType() &&
3945 "Index type does not match StartValue type");
3946 return R->replaceAllUsesWith(Builder.createAdd(
3947 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3950 return R->replaceAllUsesWith(Builder.createPtrAdd(
3951 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3956 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3957 FPBinOp->
getOpcode() == Instruction::FSub) &&
3958 "Original BinOp should be defined for FP induction");
3960 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3961 return R->replaceAllUsesWith(
3962 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3975 if (!R->isReplicator())
3979 R->dissolveToCFGLoop();
4000 assert(Br->getNumOperands() == 2 &&
4001 "BranchOnTwoConds must have exactly 2 conditions");
4005 assert(Successors.size() == 3 &&
4006 "BranchOnTwoConds must have exactly 3 successors");
4011 VPValue *Cond0 = Br->getOperand(0);
4012 VPValue *Cond1 = Br->getOperand(1);
4019 if (Succ0 == Succ1) {
4021 VPValue *Combined = Builder.createOr(Cond0, Cond1,
DL);
4025 Br->eraseFromParent();
4030 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4043 Br->eraseFromParent();
4054 WidenIVR->eraseFromParent();
4064 WidenIVR->replaceAllUsesWith(PtrAdd);
4065 WidenIVR->eraseFromParent();
4069 WidenIVR->eraseFromParent();
4075 DerivedIVR->eraseFromParent();
4080 VPValue *CanIV = WideCanIV->getCanonicalIV();
4082 VPValue *Step = WideCanIV->getStepValue();
4085 "Expected unroller to have materialized step for UF != 1");
4090 Step = Builder.createAdd(
4093 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4094 WideCanIV->getNoWrapFlags());
4096 WideCanIV->eraseFromParent();
4103 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4104 Select = Builder.createSelect(Blend->getMask(
I),
4105 Blend->getIncomingValue(
I),
Select,
4106 R.getDebugLoc(),
"predphi", *Blend);
4107 Blend->replaceAllUsesWith(
Select);
4108 Blend->eraseFromParent();
4113 if (!VEPR->getOffset()) {
4115 "Expected unroller to have materialized offset for UF != 1");
4116 VEPR->materializeOffset();
4123 Expr->eraseFromParent();
4133 for (
VPValue *
Op : LastActiveL->operands()) {
4134 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4139 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4140 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4146 Builder.createSub(FirstInactiveLane, One,
4147 LastActiveL->getDebugLoc(),
"last.active.lane");
4150 LastActiveL->eraseFromParent();
4157 assert(VPI->isMasked() &&
4158 "Unmasked MaskedCond should be simplified earlier");
4159 VPI->replaceAllUsesWith(Builder.createNaryOp(
4161 VPI->eraseFromParent();
4171 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4172 VPI->getDebugLoc());
4173 VPI->replaceAllUsesWith(
Add);
4174 VPI->eraseFromParent();
4182 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4185 BranchOnCountInst->eraseFromParent();
4200 ? Instruction::UIToFP
4201 : Instruction::Trunc;
4202 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4208 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4214 MulOpc = Instruction::FMul;
4215 Flags = VPI->getFastMathFlagsOrNone();
4217 MulOpc = Instruction::Mul;
4222 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4224 VPI->replaceAllUsesWith(VectorStep);
4225 VPI->eraseFromParent();
4247 for (
auto &Exit : Exits) {
4248 if (Exit.EarlyExitingVPBB == LatchVPBB)
4252 cast<VPIRPhi>(&R)->removeIncomingValueFor(Exit.EarlyExitingVPBB);
4253 Exit.EarlyExitingVPBB->getTerminator()->eraseFromParent();
4265 std::optional<VPValue *>
Cond =
4272 for (
auto *Recipe : ConditionRecipes) {
4275 assert(CondLoad ==
nullptr &&
"Too many condition loads");
4279 assert(CondLoad &&
"Couldn't find load");
4290 VPValue *Ptr = Load->getOperand(0);
4294 DL.getTypeStoreSize(Load->getScalarType()).getFixedValue());
4305 for (
auto *
GEP : GEPs) {
4322 auto InsertIt = HeaderVPBB->
end();
4324 bool CondMoveNeeded = CondR->
getParent() != HeaderVPBB;
4329 if (R.mayReadOrWriteMemory()) {
4331 CondMoveNeeded =
true;
4332 InsertIt = R.getIterator();
4342 for (
auto *Recipe :
reverse(ConditionRecipes))
4343 Recipe->moveBefore(*HeaderVPBB, InsertIt);
4347 VPBuilder MaskBuilder(HeaderVPBB, InsertIt);
4350 Type *IVScalarTy =
IV->getScalarType();
4357 {Zero, FirstActive, ALMMultiplier},
4358 DebugLoc(),
"uncountable.exit.mask");
4363 if (R.mayReadOrWriteMemory() && &R != CondLoad) {
4365 if (!VPDT.
dominates(R.getParent(), LatchVPBB))
4374 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->
end());
4384 auto Phis = ScalarPH->
phis();
4403 if (Pred == MiddleVPBB)
4408 VPValue *CondOfEarlyExitingVPBB;
4409 [[maybe_unused]]
bool Matched =
4410 match(EarlyExitingVPBB->getTerminator(),
4412 assert(Matched &&
"Terminator must be BranchOnCond");
4416 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4417 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4419 TrueSucc == ExitBlock
4420 ? CondOfEarlyExitingVPBB
4421 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4427 "exit condition must dominate the latch");
4436 assert(!Exits.
empty() &&
"must have at least one early exit");
4443 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4446 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4452 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4453 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4455 Exits[
I].EarlyExitingVPBB) &&
4456 "RPO sort must place dominating exits before dominated ones");
4462 VPValue *Combined = Exits[0].CondToExit;
4475 "Unexpected terminator");
4476 VPValue *IsLatchExitTaken = LatchExitingBranch->getOperand(0);
4477 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4478 LatchExitingBranch->eraseFromParent();
4481 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4487 LatchVPBB->
setSuccessors({MiddleVPBB, MiddleVPBB, HeaderVPBB});
4491 LatchVPBB, MiddleVPBB, TheLoop,
4497 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4501 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4509 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4512 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4544 for (
auto [Exit, VectorEarlyExitVPBB] :
4545 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4546 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4558 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4559 VPValue *NewIncoming = IncomingVal;
4561 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4566 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4567 ExitIRI->addIncoming(NewIncoming);
4570 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4604 bool IsLastDispatch = (
I + 2 == Exits.
size());
4606 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4612 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4615 CurrentBB = FalseBB;
4630 VPValue *VecOp = Red->getVecOp();
4632 assert(!Red->isPartialReduction() &&
4633 "This path does not support partial reductions");
4636 auto IsExtendedRedValidAndClampRange =
4649 "getExtendedReductionCost only supports integer types");
4650 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4651 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4652 Red->getFastMathFlagsOrNone(),
CostKind);
4653 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4661 IsExtendedRedValidAndClampRange(
4682 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4683 Opcode != Instruction::FAdd)
4686 assert(!Red->isPartialReduction() &&
4687 "This path does not support partial reductions");
4691 auto IsMulAccValidAndClampRange =
4703 (Ext0->getOpcode() != Ext1->getOpcode() ||
4704 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4708 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4710 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4717 ExtCost += Ext0->computeCost(VF, Ctx);
4719 ExtCost += Ext1->computeCost(VF, Ctx);
4721 ExtCost += OuterExt->computeCost(VF, Ctx);
4723 return MulAccCost.
isValid() &&
4724 MulAccCost < ExtCost + MulCost + RedCost;
4729 VPValue *VecOp = Red->getVecOp();
4767 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4769 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4770 Mul->setOperand(1, ExtB);
4780 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4785 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4792 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4809 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4818 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4819 Ext0->getOpcode() == Ext1->getOpcode() &&
4820 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4822 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
4823 *Ext0, *Ext0, Ext0->getDebugLoc());
4824 NewExt0->insertBefore(Ext0);
4829 Ext->getScalarType(),
nullptr, *Ext1,
4830 *Ext1, Ext1->getDebugLoc());
4833 auto *NewMul =
Mul->cloneWithOperands({NewExt0, NewExt1});
4834 NewMul->insertBefore(
Mul);
4835 Ext->replaceAllUsesWith(NewMul);
4836 Ext->eraseFromParent();
4837 Mul->eraseFromParent();
4851 assert(!Red->isPartialReduction() &&
4852 "This path does not support partial reductions");
4855 auto IP = std::next(Red->getIterator());
4856 auto *VPBB = Red->getParent();
4866 Red->replaceAllUsesWith(AbstractR);
4896 for (
VPValue *VPV : VPValues) {
4905 if (
User->usesScalars(VPV))
4908 HoistPoint = HoistBlock->
begin();
4912 "All users must be in the vector preheader or dominated by it");
4917 VPV->replaceUsesWithIf(Broadcast,
4918 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4919 return Broadcast != &U && !U.usesScalars(VPV);
4930 return CommonMetadata;
4933template <
unsigned Opcode>
4938 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4939 "Only Load and Store opcodes supported");
4940 [[maybe_unused]]
constexpr bool IsLoad = (Opcode == Instruction::Load);
4947 for (
auto Recipes :
Groups) {
4948 if (Recipes.size() < 2)
4953 "Expected all recipes in group to have the same load-store type");
4960 VPValue *MaskI = RecipeI->getMask();
4966 bool HasComplementaryMask =
false;
4971 VPValue *MaskJ = RecipeJ->getMask();
4980 if (HasComplementaryMask) {
4981 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4991template <
typename InstType>
5009 for (
auto &Group :
Groups) {
5029 return R->isSingleScalar() == IsSingleScalar;
5031 "all members in group must agree on IsSingleScalar");
5036 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
5037 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
5039 UnpredicatedLoad->insertBefore(EarliestLoad);
5043 Load->replaceAllUsesWith(UnpredicatedLoad);
5044 Load->eraseFromParent();
5053 if (!StoreLoc || !StoreLoc->AATags.Scope)
5059 StoresToSink.
end());
5063 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L);
5075 for (
auto &Group :
Groups) {
5088 VPValue *SelectedValue = Group[0]->getOperand(0);
5091 bool IsSingleScalar = Group[0]->isSingleScalar();
5092 for (
unsigned I = 1;
I < Group.size(); ++
I) {
5093 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
5094 "all members in group must agree on IsSingleScalar");
5095 VPValue *Mask = Group[
I]->getMask();
5097 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
5106 StoreWithMinAlign->getUnderlyingInstr(),
5107 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
5108 nullptr, *LastStore, CommonMetadata);
5109 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
5113 Store->eraseFromParent();
5120 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
5121 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5184 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5186 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5193 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5203 DefR->replaceUsesWithIf(
5204 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5206 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5220 for (
VPValue *Def : R.definedValues()) {
5233 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5235 return U->usesScalars(Def) &&
5238 if (
none_of(Def->users(), IsCandidateUnpackUser))
5245 Unpack->insertAfter(&R);
5246 Def->replaceUsesWithIf(Unpack,
5247 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5248 return IsCandidateUnpackUser(&U);
5257 bool RequiresScalarEpilogue,
VPValue *Step,
5258 std::optional<uint64_t> MaxRuntimeStep) {
5270 "Step VPBB must dominate VectorPHVPBB");
5272 InsertPt = std::next(StepR->getIterator());
5274 VPBuilder Builder(VectorPHVPBB, InsertPt);
5280 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5281 TCVal->
urem(*MaxRuntimeStep) == 0) {
5292 if (TailByMasking) {
5293 TC = Builder.createAdd(
5304 Builder.createNaryOp(Instruction::URem, {TC, Step},
5313 if (RequiresScalarEpilogue) {
5315 "requiring scalar epilogue is not supported with fail folding");
5318 R = Builder.createSelect(IsZero, Step, R);
5332 "VF and VFxUF must be materialized together");
5344 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5351 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5355 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5359 VPValue *MulByUF = Builder.createOverflowingOp(
5372 auto *AliasMask = Builder.createNaryOp(
5376 if (HeaderMaskDef->isPhi())
5377 Builder =
VPBuilder(&*HeaderMaskDef->getParent()->getFirstNonPhi());
5382 auto *ClampedHeaderMask = Builder.createAnd(HeaderMask, AliasMask);
5384 return &U != ClampedHeaderMask;
5395 assert(IncomingAliasMask &&
"Expected an alias mask!");
5405 if (
Check.NeedsFreeze) {
5415 Intrinsic::loop_dependence_war_mask,
5419 AliasMask = Builder.createAnd(AliasMask, WARMask);
5421 AliasMask = WARMask;
5426 VPValue *NumActive = Builder.createNaryOp(
5429 VPValue *ClampedVF = Builder.createScalarZExtOrTrunc(
5455 VPValue *DistanceToMax = Builder.createSub(MaxUIntTripCount, TripCount);
5463 VPValue *TripCountCheck = Builder.createICmp(
5466 VPValue *
Cond = Builder.createOr(IsScalar, TripCountCheck,
DL);
5477 "Clamped VF not supported with interleaving");
5485 VPBuilder Builder(Entry, Entry->begin());
5497 if (!ExpSCEV || ExpSCEV->getNumUsers() == 0)
5499 Builder.setInsertPoint(ExpSCEV);
5509 ExpSCEV->eraseFromParent();
5518 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5525 const SCEV *Expr = ExpSCEV->getSCEV();
5528 ExpandedSCEVs[Expr] = Res;
5533 ExpSCEV->eraseFromParent();
5536 "all VPExpandSCEVRecipes must have been expanded");
5539 auto EI = Entry->begin();
5549 return ExpandedSCEVs;
5561 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5565 return Member0Op == OpV;
5569 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5572 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5589 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5592 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5597 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5598 const auto &[
OpIdx, OpV] =
P;
5610static std::optional<ElementCount>
5614 if (!InterleaveR || InterleaveR->
getMask())
5615 return std::nullopt;
5617 Type *GroupElementTy =
nullptr;
5621 return Op->getScalarType() == GroupElementTy;
5623 return std::nullopt;
5627 return Op->getScalarType() == GroupElementTy;
5629 return std::nullopt;
5633 if (IG->getFactor() != IG->getNumMembers())
5634 return std::nullopt;
5640 assert(
Size.isScalable() == VF.isScalable() &&
5641 "if Size is scalable, VF must be scalable and vice versa");
5642 return Size.getKnownMinValue();
5646 unsigned MinVal = VF.getKnownMinValue();
5648 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5651 return std::nullopt;
5659 return RepR && RepR->isSingleScalar();
5669 auto *R = V->getDefiningRecipe();
5680 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx) {
5682 for (
VPValue *Member : Members)
5683 OpsI.
push_back(Member->getDefiningRecipe()->getOperand(Idx));
5692 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5694 LoadGroup->getMask(),
true,
5695 *LoadGroup, LoadGroup->getDebugLoc());
5696 L->insertBefore(LoadGroup);
5702 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5703 "must be a single scalar load");
5704 NarrowedOps.
insert(RepR);
5709 VPValue *PtrOp = WideLoad->getAddr();
5711 PtrOp = VecPtr->getOperand(0);
5716 nullptr, {}, *WideLoad);
5717 N->insertBefore(WideLoad);
5722std::unique_ptr<VPlan>
5742 "unexpected branch-on-count");
5745 std::optional<ElementCount> VFToOptimize;
5759 if (R.mayWriteToMemory() && !InterleaveR)
5765 return any_of(V->users(), [&](VPUser *U) {
5766 auto *UR = cast<VPRecipeBase>(U);
5767 return UR->getParent()->getParent() != VectorLoop;
5784 std::optional<ElementCount> NarrowedVF =
5786 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5788 VFToOptimize = NarrowedVF;
5791 if (InterleaveR->getStoredValues().empty())
5796 auto *Member0 = InterleaveR->getStoredValues()[0];
5806 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5809 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5810 return IR && IR->getInterleaveGroup()->isFull() &&
5811 IR->getVPValue(Op.index()) == Op.value();
5820 VFToOptimize->isScalable()))
5825 if (StoreGroups.empty())
5829 bool RequiresScalarEpilogue =
5840 std::unique_ptr<VPlan> NewPlan;
5842 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5843 Plan.
setVF(*VFToOptimize);
5844 NewPlan->removeVF(*VFToOptimize);
5850 for (
auto *StoreGroup : StoreGroups) {
5857 StoreGroup->getDebugLoc());
5858 S->insertBefore(StoreGroup);
5859 StoreGroup->eraseFromParent();
5865 Type *CanIVTy = VectorLoop->getCanonicalIVType();
5871 if (VFToOptimize->isScalable()) {
5874 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
5882 materializeVectorTripCount(Plan, VectorPH,
false,
5883 RequiresScalarEpilogue, Step);
5888 removeDeadRecipes(Plan);
5891 "All VPVectorPointerRecipes should have been removed");
5907 "must have a BranchOnCond");
5910 if (VF.
isScalable() && VScaleForTuning.has_value())
5911 VectorStep *= *VScaleForTuning;
5912 assert(VectorStep > 0 &&
"trip count should not be zero");
5916 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5935 "Cannot handle loops with uncountable early exits");
5942 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5949 if (
any_of(RecurSplice->users(),
5950 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
6031 {},
"vector.recur.extract.for.phi");
6034 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
6048 VPValue *WidenIVCandidate = BinOp->getOperand(0);
6049 VPValue *InvariantCandidate = BinOp->getOperand(1);
6051 std::swap(WidenIVCandidate, InvariantCandidate);
6065 auto *ClonedOp = BinOp->
clone();
6066 if (ClonedOp->getOperand(0) == WidenIV) {
6067 ClonedOp->setOperand(0, ScalarIV);
6069 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
6070 ClonedOp->setOperand(1, ScalarIV);
6085 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
6086 bool UseMax) -> std::optional<APSInt> {
6088 for (
bool Signed : {
true,
false}) {
6097 return std::nullopt;
6105 PhiR->getRecurrenceKind()))
6114 VPValue *BackedgeVal = PhiR->getBackedgeValue();
6128 !
match(FindLastSelect,
6137 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
6143 "IVOfExpressionToSink not being an AddRec must imply "
6144 "FindLastExpression not being an AddRec.");
6155 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
6156 bool UseSigned = SentinelVal && SentinelVal->isSigned();
6163 if (IVOfExpressionToSink) {
6164 const SCEV *FindLastExpressionSCEV =
6166 if (
match(FindLastExpressionSCEV,
6169 if (
auto NewSentinel =
6170 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
6173 SentinelVal = *NewSentinel;
6174 UseSigned = NewSentinel->isSigned();
6176 IVSCEV = FindLastExpressionSCEV;
6177 IVOfExpressionToSink =
nullptr;
6187 if (AR->hasNoSignedWrap())
6189 else if (AR->hasNoUnsignedWrap())
6199 VPValue *NewFindLastSelect = BackedgeVal;
6201 if (!SentinelVal || IVOfExpressionToSink) {
6204 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
6205 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
6206 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
6207 SelectCond = LoopBuilder.
createNot(SelectCond);
6214 if (SelectCond !=
Cond || IVOfExpressionToSink) {
6217 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
6226 VPIRFlags Flags(MinMaxKind,
false,
false,
6232 NewFindLastSelect, Flags, ExitDL);
6235 VPValue *VectorRegionExitingVal = ReducedIV;
6236 if (IVOfExpressionToSink)
6237 VectorRegionExitingVal =
6239 ReducedIV, IVOfExpressionToSink);
6242 VPValue *StartVPV = PhiR->getStartValue();
6249 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
6259 AnyOfPhi->insertAfter(PhiR);
6266 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6279 PhiR->hasUsesOutsideReductionChain());
6280 NewPhiR->insertBefore(PhiR);
6281 PhiR->replaceAllUsesWith(NewPhiR);
6282 PhiR->eraseFromParent();
6289struct ReductionExtend {
6290 Type *SrcType =
nullptr;
6291 ExtendKind Kind = ExtendKind::PR_None;
6297struct ExtendedReductionOperand {
6301 ReductionExtend ExtendA, ExtendB;
6309struct VPPartialReductionChain {
6312 VPWidenRecipe *ReductionBinOp =
nullptr;
6314 ExtendedReductionOperand ExtendedOp;
6321 unsigned AccumulatorOpIdx;
6322 unsigned ScaleFactor;
6334 if (!
Op->hasOneUse() ||
6340 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6341 Op->getOperand(1), NarrowTy);
6343 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6352 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6354 assert(Ext->getOpcode() ==
6356 "Expected both the LHS and RHS extends to be the same");
6357 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6360 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6361 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6362 auto *
Max = Builder.insert(
6364 {FreezeX, FreezeY}, SrcTy));
6365 auto *Min = Builder.insert(
6367 {FreezeX, FreezeY}, SrcTy));
6370 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6371 Op->getScalarType());
6383 if (!
Mul->hasOneUse() ||
6384 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6385 MulLHS->getOpcode() != MulRHS->getOpcode())
6388 auto *NewLHS = Builder.createWidenCast(
6389 MulLHS->getOpcode(), MulLHS->getOperand(0), Ext->getScalarType());
6390 auto *NewRHS = MulLHS == MulRHS
6392 : Builder.createWidenCast(MulRHS->getOpcode(),
6393 MulRHS->getOperand(0),
6394 Ext->getScalarType());
6395 auto *NewMul =
Mul->cloneWithOperands({NewLHS, NewRHS});
6396 Builder.insert(NewMul);
6397 Op->replaceAllUsesWith(NewMul);
6398 Op->eraseFromParent();
6399 Mul->eraseFromParent();
6408 VPValue *VecOp = Red->getVecOp();
6462static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6470 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6473 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp);
6489 if ((WidenRecipe->
getOpcode() == Instruction::Sub &&
6491 (WidenRecipe->
getOpcode() == Instruction::FSub &&
6496 if (WidenRecipe->
getOpcode() == Instruction::FSub) {
6506 Builder.insert(NegRecipe);
6507 ExtendedOp = NegRecipe;
6518 assert((!ExitValue || IsLastInChain) &&
6519 "if we found ExitValue, it must match RdxPhi's backedge value");
6530 PartialRed->insertBefore(WidenRecipe);
6538 E->insertBefore(WidenRecipe);
6539 PartialRed->replaceAllUsesWith(
E);
6552 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6553 StartInst->setOperand(2, NewScaleFactor);
6561 VPValue *OldStartValue = StartInst->getOperand(0);
6562 StartInst->setOperand(0, StartInst->getOperand(1));
6566 assert(RdxResult &&
"Could not find reduction result");
6569 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6570 : Instruction::BinaryOps::Sub;
6576 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6582 const VPPartialReductionChain &Link,
6585 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6586 std::optional<unsigned> BinOpc = std::nullopt;
6588 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6589 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6591 std::optional<llvm::FastMathFlags>
Flags;
6595 auto GetLinkOpcode = [&Link]() ->
unsigned {
6598 return Instruction::Add;
6600 return Instruction::FAdd;
6602 return Link.ReductionBinOp->
getOpcode();
6607 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6608 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6629static std::optional<ExtendedReductionOperand>
6632 "Op should be operand of UpdateR");
6640 if (
Op->hasOneUse() &&
6649 Type *RHSInputType =
Y->getScalarType();
6650 if (LHSInputType != RHSInputType ||
6651 LHSExt->getOpcode() != RHSExt->getOpcode())
6652 return std::nullopt;
6655 return ExtendedReductionOperand{
6657 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6661 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6664 VPValue *CastSource = CastRecipe->getOperand(0);
6665 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6675 return ExtendedReductionOperand{
6682 if (!
Op->hasOneUse())
6683 return std::nullopt;
6688 return std::nullopt;
6698 return std::nullopt;
6702 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6705 const APInt *RHSConst =
nullptr;
6711 return std::nullopt;
6715 if (Cast && OuterExtKind &&
6716 getPartialReductionExtendKind(Cast) != OuterExtKind)
6717 return std::nullopt;
6719 Type *RHSInputType = LHSInputType;
6720 ExtendKind RHSExtendKind = LHSExtendKind;
6723 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6726 return ExtendedReductionOperand{
6727 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6734static std::optional<SmallVector<VPPartialReductionChain>>
6742 return std::nullopt;
6752 VPValue *CurrentValue = ExitValue;
6753 while (CurrentValue != RedPhiR) {
6756 return std::nullopt;
6763 std::optional<ExtendedReductionOperand> ExtendedOp =
6764 matchExtendedReductionOperand(UpdateR,
Op);
6766 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6768 return std::nullopt;
6772 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6775 return std::nullopt;
6780 VPPartialReductionChain Link(
6781 {UpdateR, *ExtendedOp, RK,
6785 CurrentValue = PrevValue;
6790 std::reverse(Chain.
begin(), Chain.
end());
6809 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6810 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6813 if (ChainsByPhi.
empty())
6820 for (
const auto &[
_, Chains] : ChainsByPhi)
6821 for (
const VPPartialReductionChain &Chain : Chains) {
6822 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6823 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6829 auto ExtendUsersValid = [&](
VPValue *Ext) {
6831 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6835 auto IsProfitablePartialReductionChainForVF =
6842 for (
const VPPartialReductionChain &Link : Chain) {
6843 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6844 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6848 PartialCost += LinkCost;
6849 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6851 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6852 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6855 RegularCost += Extend->computeCost(VF, CostCtx);
6857 return PartialCost.
isValid() && PartialCost < RegularCost;
6865 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6866 for (
const VPPartialReductionChain &Chain : Chains) {
6867 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6871 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6873 return PhiR == RedPhiR;
6875 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6881 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6890 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6891 return RepR && RepR->getOpcode() == Instruction::Store;
6902 return IsProfitablePartialReductionChainForVF(Chains, VF);
6908 for (
auto &[Phi, Chains] : ChainsByPhi)
6909 for (
const VPPartialReductionChain &Chain : Chains)
6910 transformToPartialReduction(Chain, Plan, Phi);
6924 if (VPI && VPI->getUnderlyingValue() &&
6936 New->insertBefore(VPI);
6937 if (VPI->getOpcode() == Instruction::Load)
6938 VPI->replaceAllUsesWith(New->getVPSingleValue());
6939 VPI->eraseFromParent();
6944 FinalRedStoresBuilder))
6953 ReplaceWith(Histogram);
6961 ReplaceWith(Recipe);
6984 if (VPI->mayHaveSideEffects())
6988 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
6993 if (VPI->getOpcode() == Instruction::Add &&
7002 VPI->getOpcode(), VPI->operandsWithoutMask(),
nullptr, *VPI,
7003 *VPI, VPI->getDebugLoc(),
I);
7004 Recipe->insertBefore(VPI);
7005 VPI->replaceAllUsesWith(Recipe);
7006 VPI->eraseFromParent();
7016 switch (Param.ParamKind) {
7017 case VFParamKind::Vector:
7018 case VFParamKind::GlobalPredicate:
7020 case VFParamKind::OMP_Uniform:
7021 return SE->isSCEVable(Args[Param.ParamPos]->getScalarType()) &&
7022 SE->isLoopInvariant(
7023 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7025 case VFParamKind::OMP_Linear:
7026 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7027 m_scev_AffineAddRec(
7028 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
7029 m_SpecificLoop(L)));
7046 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
7047 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
7050 if (It == Mappings.end())
7057struct CallWideningDecision {
7058 enum class KindTy { Scalarize,
Intrinsic, VectorVariant };
7059 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
7082 return CallWideningDecision::KindTy::Scalarize;
7092 return CallWideningDecision::KindTy::Scalarize;
7096 false, VF, CostCtx);
7111 return CallWideningDecision::KindTy::Intrinsic;
7115 if (VecFunc && ScalarCost >= VecCallCost)
7116 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
7118 return CallWideningDecision::KindTy::Scalarize;
7128 if (!VPI || !VPI->getUnderlyingValue() ||
7129 VPI->getOpcode() != Instruction::Call)
7134 VPI->op_begin() + CI->arg_size());
7136 CallWideningDecision Decision =
7145 switch (Decision.Kind) {
7146 case CallWideningDecision::KindTy::Intrinsic: {
7150 *VPI, VPI->getDebugLoc());
7153 case CallWideningDecision::KindTy::VectorVariant: {
7157 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
7158 Ops.push_back(Mask);
7160 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
7162 *VPI, VPI->getDebugLoc());
7165 case CallWideningDecision::KindTy::Scalarize:
7171 VPI->replaceAllUsesWith(Replacement);
7172 VPI->eraseFromParent();
7195 if (!LoadR || LoadR->isConsecutive())
7214 Align Alignment = LoadR->getAlign();
7217 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
7222 Intrinsic::experimental_vp_strided_load, DataTy,
7223 LoadR->isMasked(), Alignment, Ctx);
7224 return StridedLoadStoreCost < CurrentCost;
7235 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
7240 I32VF = Builder.createScalarZExtOrTrunc(
7253 "Stride type from SCEV must match the index type");
7254 VPValue *CanIV = Builder.createScalarSExtOrTrunc(
7258 auto *
Offset = Builder.createOverflowingOp(
7259 Instruction::Mul, {CanIV, StrideInBytes},
7260 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
7261 auto *BasePtr = Builder.createNoWrapPtrAdd(
7267 VPValue *NewPtr = Builder.createVectorPointer(
7269 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
7271 VPValue *Mask = LoadR->getMask();
7274 auto *StridedLoad = Builder.createWidenMemIntrinsic(
7275 Intrinsic::experimental_vp_strided_load,
7276 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
7277 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This is the interface for a metadata-based scoped no-alias analysis.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
Get the first element.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class represents a constant integer value.
ConstantInt * getValue() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
void clearPredecessors()
Remove all the predecessor of this block.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
static VPSingleDefRecipe * createSingleScalarOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPValue *Mask, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL, Instruction *UV)
Create a single-scalar recipe with Opcode and Operands without inserting it.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B) const
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPSingleDefRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a replicating or single-scalar recipe for VPI.
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
operand_range operandsWithoutMask()
Return the recipe's operands, excluding the mask of a predicated recipe.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Lightweight SCEV-to-VPlan expander.
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
canonical_widen_iv_match m_CanonicalWidenIV()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
auto m_AnyNeg(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPValue * findIncomingAliasMask(const VPlan &Plan)
Finds the incoming alias-mask within the vector preheader.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
LLVM_ABI_FOR_TEST std::optional< VPValue * > getRecipesForUncountableExit(SmallVectorImpl< VPInstruction * > &Recipes, SmallVectorImpl< VPInstruction * > &GEPs, VPBasicBlock *LatchVPBB)
Returns the VPValue representing the uncountable exit comparison used by AnyOf if the recipes it depe...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
SmallVector< VPBasicBlock * > vp_rpo_plain_cfg_loop_body(VPBasicBlock *Header)
Returns the VPBasicBlocks forming the loop body of a plain (pre-region) VPlan in reverse post-order s...
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
constexpr auto bind_back(FnT &&Fn, BindArgsT &&...BindArgs)
C++23 bind_back.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr size_t range_size(R &&Range)
Returns the size of the Range, i.e., the number of elements.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
VPBasicBlock * EarlyExitingVPBB
VPIRBasicBlock * EarlyExitVPBB
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...