58 if (!VPBB->getParent())
61 auto EndIter = Term ? Term->getIterator() : VPBB->end();
66 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
83 false , *VPI, Ingredient.getDebugLoc());
86 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
87 nullptr ,
false , *VPI,
88 Ingredient.getDebugLoc());
91 Ingredient.operands(), *VPI,
92 Ingredient.getDebugLoc(),
GEP);
104 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
109 if (VectorID == Intrinsic::assume ||
110 VectorID == Intrinsic::lifetime_end ||
111 VectorID == Intrinsic::lifetime_start ||
112 VectorID == Intrinsic::sideeffect ||
113 VectorID == Intrinsic::pseudoprobe) {
118 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
119 VectorID != Intrinsic::pseudoprobe;
123 Ingredient.getDebugLoc());
126 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
127 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
131 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
135 *VPI, Ingredient.getDebugLoc());
139 "inductions must be created earlier");
148 "Only recpies with zero or one defined values expected");
149 Ingredient.eraseFromParent();
165 if (
A->getOpcode() != Instruction::Store ||
166 B->getOpcode() != Instruction::Store)
176 const APInt *Distance;
182 Type *TyA =
A->getOperand(0)->getScalarType();
184 Type *TyB =
B->getOperand(0)->getScalarType();
190 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
192 auto VFs =
B->getParent()->getPlan()->vectorFactors();
196 return Distance->
abs().
uge(
204 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
212 return ExcludeRecipes.contains(&R) ||
213 (Store && isNoAliasViaDistance(Store, &GroupLeader));
226 std::optional<SinkStoreInfo> SinkInfo = {}) {
227 bool CheckReads = SinkInfo.has_value();
234 if (SinkInfo && SinkInfo->shouldSkip(R))
238 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
263template <
unsigned Opcode>
268 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
269 "Only Load and Store opcodes supported");
270 constexpr bool IsLoad = (Opcode == Instruction::Load);
273 RecipesByAddressAndType;
278 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
282 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
286 RecipesByAddressAndType[{AddrSCEV, LoadStoreTy}].push_back(RepR);
291 for (
auto &Group :
Groups) {
306 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
318 if (Candidate->getParent() == SinkTo ||
323 if (!ScalarVFOnly && RepR->isSingleScalar())
326 WorkList.
insert({SinkTo, Candidate});
338 for (
auto &Recipe : *VPBB)
340 InsertIfValidSinkCandidate(VPBB,
Op);
344 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
347 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
352 auto UsersOutsideSinkTo =
354 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
356 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
357 return !U->usesFirstLaneOnly(SinkCandidate);
360 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
362 if (NeedsDuplicating) {
366 if (
auto *SinkCandidateRepR =
372 nullptr , *SinkCandidateRepR,
376 Clone = SinkCandidate->
clone();
386 InsertIfValidSinkCandidate(SinkTo,
Op);
396 if (!EntryBB || EntryBB->size() != 1 ||
406 if (EntryBB->getNumSuccessors() != 2)
411 if (!Succ0 || !Succ1)
414 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
416 if (Succ0->getSingleSuccessor() == Succ1)
418 if (Succ1->getSingleSuccessor() == Succ0)
435 if (!Region1->isReplicator())
437 auto *MiddleBasicBlock =
439 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
444 if (!Region2 || !Region2->isReplicator())
449 if (!Mask1 || Mask1 != Mask2)
452 assert(Mask1 && Mask2 &&
"both region must have conditions");
458 if (TransformedRegions.
contains(Region1))
465 if (!Then1 || !Then2)
485 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
491 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
492 Phi1ToMove.eraseFromParent();
495 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
509 TransformedRegions.
insert(Region1);
512 return !TransformedRegions.
empty();
520 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
521 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
522 auto *BlockInMask = PredRecipe->
getMask();
543 Region->setParent(ParentRegion);
549 RecipeWithoutMask->getDebugLoc());
550 Exiting->appendRecipe(PHIRecipe);
563 if (RepR->isPredicated())
582 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
594 if (!VPBB->getParent())
598 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
607 R.moveBefore(*PredVPBB, PredVPBB->
end());
609 auto *ParentRegion = VPBB->getParent();
610 if (ParentRegion && ParentRegion->getExiting() == VPBB)
611 ParentRegion->setExiting(PredVPBB);
615 return !WorkList.
empty();
622 bool ShouldSimplify =
true;
623 while (ShouldSimplify) {
639 if (!
IV ||
IV->getTruncInst())
654 for (
auto *U : FindMyCast->
users()) {
656 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
657 FoundUserCast = UserCast;
664 FindMyCast = FoundUserCast;
666 if (FindMyCast !=
IV)
681 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
690 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
696 if (ResultTy != StepTy) {
703 Builder.setInsertPoint(VecPreheader);
704 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
706 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
732 WideCanIV->getDebugLoc(), Builder));
733 WideCanIV->eraseFromParent();
750 WideCanIV->replaceAllUsesWith(WidenIV);
751 WideCanIV->eraseFromParent();
760 if (PHICost > BroadcastCost)
769 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
781 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
782 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
783 WideCanIV->replaceAllUsesWith(NewWideIV);
784 WideCanIV->eraseFromParent();
792 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
794 if (IsConditionalAssume)
797 if (R.mayHaveSideEffects())
801 return all_of(R.definedValues(),
802 [](
VPValue *V) { return V->getNumUsers() == 0; });
822 VPUser *PhiUser = PhiR->getSingleUser();
828 PhiR->replaceAllUsesWith(Start);
829 PhiR->eraseFromParent();
837 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
840 Users.insert_range(V->users());
842 return Users.takeVector();
856 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
893 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
894 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
907 Def->operands(),
true,
909 Clone->insertAfter(Def);
910 Def->replaceAllUsesWith(Clone);
921 PtrIV->replaceAllUsesWith(PtrAdd);
928 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
929 return U->usesScalars(WideIV);
935 Plan,
ID.getKind(),
ID.getInductionOpcode(),
937 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
938 WideIV->getDebugLoc(), Builder);
941 if (!HasOnlyVectorVFs) {
943 "plans containing a scalar VF cannot also include scalable VFs");
944 WideIV->replaceAllUsesWith(Steps);
947 WideIV->replaceUsesWithIf(Steps,
948 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
950 return U.usesFirstLaneOnly(WideIV);
951 return U.usesScalars(WideIV);
967 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
972 if (!Def || Def->getNumOperands() != 2)
980 auto IsWideIVInc = [&]() {
981 auto &
ID = WideIV->getInductionDescriptor();
984 VPValue *IVStep = WideIV->getStepValue();
985 switch (
ID.getInductionOpcode()) {
986 case Instruction::Add:
988 case Instruction::FAdd:
990 case Instruction::FSub:
993 case Instruction::Sub: {
1013 return IsWideIVInc() ? WideIV :
nullptr;
1030 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1041 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1042 FirstActiveLane =
B.createScalarZExtOrTrunc(
1043 FirstActiveLane, CanonicalIVType, FirstActiveLane->
getScalarType(),
DL);
1044 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1049 if (Incoming != WideIV) {
1051 EndValue =
B.createAdd(EndValue, One,
DL);
1056 VPIRValue *Start = WideIV->getStartValue();
1057 VPValue *Step = WideIV->getStepValue();
1058 EndValue =
B.createDerivedIV(
1060 Start, EndValue, Step);
1074 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1084 Start, VectorTC, Step);
1114 assert(EndValue &&
"Must have computed the end value up front");
1119 if (Incoming != WideIV)
1131 auto *Zero = Plan.
getZero(StepTy);
1132 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1137 return B.createNaryOp(
1138 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1140 : Instruction::FAdd,
1141 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1152 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1162 EndValues[WideIV] = EndValue;
1172 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1173 R.eraseFromParent();
1182 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1184 if (PredVPBB == MiddleVPBB)
1186 Plan, ExitIRI->getOperand(Idx), EndValues, PSE);
1189 Plan, ExitIRI->getOperand(Idx), PSE);
1191 ExitIRI->setOperand(Idx, Escape);
1208 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1212 ExpR->replaceAllUsesWith(V->second);
1216 ExpR->eraseFromParent();
1225 while (!WorkList.
empty()) {
1227 if (!Seen.
insert(Cur).second)
1235 R->eraseFromParent();
1242static std::optional<std::pair<bool, unsigned>>
1245 std::optional<std::pair<bool, unsigned>>>(R)
1248 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1250 return std::make_pair(
true,
I->getVectorIntrinsicID());
1252 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1258 I->getVPRecipeID());
1260 .
Default([](
auto *) {
return std::nullopt; });
1285 VPlan &Plan = *R.getParent()->getPlan();
1286 auto FoldToIRValue = [&]() ->
Value * {
1288 if (OpcodeOrIID->first) {
1289 if (R.getNumOperands() != 2)
1291 unsigned ID = OpcodeOrIID->second;
1292 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1], R.getScalarType());
1294 unsigned Opcode = OpcodeOrIID->second;
1300 R.getVPSingleValue()->getScalarType());
1303 return Folder.FoldSelect(
Ops[0],
Ops[1],
1306 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1308 case Instruction::Select:
1309 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1310 case Instruction::ICmp:
1311 case Instruction::FCmp:
1314 case Instruction::GetElementPtr: {
1317 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1327 case Instruction::ExtractElement:
1334 if (
Value *V = FoldToIRValue())
1341 bool CanCreateNewRecipe) {
1342 VPlan *Plan = Def->getParent()->getPlan();
1352 Def->replaceAllUsesWith(
X);
1353 Def->eraseFromParent();
1365 Def->replaceAllUsesWith(
X);
1377 Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1383 Def->replaceAllUsesWith(
X);
1389 Def->replaceAllUsesWith(Plan->
getFalse());
1395 Def->replaceAllUsesWith(
X);
1400 if (CanCreateNewRecipe &&
1405 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1406 !Def->getOperand(1)->hasMoreThanOneUniqueUser())) {
1407 Def->replaceAllUsesWith(
1408 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1415 Def->replaceAllUsesWith(Def->getOperand(1));
1422 Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1428 Def->replaceAllUsesWith(Plan->
getFalse());
1433 Def->replaceAllUsesWith(
X);
1439 if (CanCreateNewRecipe &&
1441 Def->replaceAllUsesWith(Builder.createNot(
C));
1447 Def->setOperand(0,
C);
1448 Def->setOperand(1,
Y);
1449 Def->setOperand(2,
X);
1454 if (CanCreateNewRecipe &&
1458 Y->getScalarType()->isIntegerTy(1)) {
1459 Def->replaceAllUsesWith(
1460 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1469 VPlan *Plan = Def->getParent()->getPlan();
1475 return Def->replaceAllUsesWith(V);
1481 PredPHI->replaceAllUsesWith(
Op);
1494 bool CanCreateNewRecipe =
1499 Type *TruncTy = Def->getScalarType();
1500 Type *ATy =
A->getScalarType();
1501 if (TruncTy == ATy) {
1502 Def->replaceAllUsesWith(
A);
1511 : Instruction::ZExt;
1514 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1516 Ext->setUnderlyingValue(UnderlyingExt);
1518 Def->replaceAllUsesWith(Ext);
1520 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1521 Def->replaceAllUsesWith(Trunc);
1531 return Def->replaceAllUsesWith(
A);
1534 return Def->replaceAllUsesWith(
A);
1537 return Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1543 return Def->replaceAllUsesWith(Builder.createSub(
1544 Plan->
getZero(
A->getScalarType()),
A, Def->getDebugLoc(),
"", NW));
1547 if (CanCreateNewRecipe &&
1555 ->hasNoSignedWrap()};
1556 return Def->replaceAllUsesWith(
1557 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1563 return Def->replaceAllUsesWith(Builder.createNaryOp(
1565 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1570 return Def->replaceAllUsesWith(Builder.createNaryOp(
1572 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1577 return Def->replaceAllUsesWith(
A);
1592 R->setOperand(1,
Y);
1593 R->setOperand(2,
X);
1597 R->replaceAllUsesWith(Cmp);
1602 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1603 Cmp->setDebugLoc(Def->getDebugLoc());
1615 if (
Op->getNumUsers() > 1 ||
1619 }
else if (!UnpairedCmp) {
1620 UnpairedCmp =
Op->getDefiningRecipe();
1624 UnpairedCmp =
nullptr;
1631 if (NewOps.
size() < Def->getNumOperands()) {
1633 return Def->replaceAllUsesWith(NewAnyOf);
1640 if (CanCreateNewRecipe &&
1646 return Def->replaceAllUsesWith(NewCmp);
1652 Def->getOperand(1)->getScalarType() == Def->getScalarType())
1653 return Def->replaceAllUsesWith(Def->getOperand(1));
1657 Type *WideStepTy = Def->getScalarType();
1658 if (
X->getScalarType() != WideStepTy)
1659 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1660 Def->replaceAllUsesWith(
X);
1669 Def->getScalarType()->isIntegerTy(1)) {
1670 Def->setOperand(1, Def->getOperand(0));
1671 Def->setOperand(0,
Y);
1678 return Def->replaceAllUsesWith(Def->getOperand(0));
1684 Def->replaceAllUsesWith(
1685 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1690 return Def->replaceAllUsesWith(
X);
1693 return Def->replaceAllUsesWith(
A);
1696 return Def->replaceAllUsesWith(
A);
1702 Def->replaceAllUsesWith(
1703 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1710 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1715 Def->replaceAllUsesWith(
1725 "broadcast operand must be single-scalar");
1726 Def->setOperand(0,
C);
1731 return Def->replaceUsesWithIf(
1732 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1735 if (Def->getNumOperands() == 1) {
1736 Def->replaceAllUsesWith(Def->getOperand(0));
1741 Phi->replaceAllUsesWith(Phi->getOperand(0));
1747 if (Def->getNumOperands() == 1 &&
1749 return Def->replaceAllUsesWith(IRV);
1762 return Def->replaceAllUsesWith(
A);
1769 return Def->replaceAllUsesWith(WidenIV->getRegion()->getCanonicalIV());
1772 Def->replaceAllUsesWith(Builder.createNaryOp(
1773 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1787 auto *IVInc = Def->getOperand(0);
1788 if (IVInc->getNumUsers() == 2) {
1793 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1794 Def->replaceAllUsesWith(IVInc);
1796 Inc->replaceAllUsesWith(Phi);
1797 Phi->setOperand(0,
Y);
1813 Steps->replaceAllUsesWith(Steps->getOperand(0));
1821 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1823 return PhiR && PhiR->isInLoop();
1829 return Def->replaceAllUsesWith(
A);
1855 while (!Worklist.
empty()) {
1864 R->replaceAllUsesWith(
1865 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1869static std::optional<Instruction::BinaryOps>
1872 case Intrinsic::masked_udiv:
1873 return Instruction::UDiv;
1874 case Intrinsic::masked_sdiv:
1875 return Instruction::SDiv;
1876 case Intrinsic::masked_urem:
1877 return Instruction::URem;
1878 case Intrinsic::masked_srem:
1879 return Instruction::SRem;
1896 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1900 if (RepR && RepR->getOpcode() == Instruction::Store &&
1903 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1904 true ,
nullptr , *RepR ,
1905 *RepR , RepR->getDebugLoc());
1906 Clone->insertBefore(RepOrWidenR);
1908 VPValue *ExtractOp = Clone->getOperand(0);
1914 Clone->setOperand(0, ExtractOp);
1915 RepR->eraseFromParent();
1927 VPValue *SafeDivisor = Builder.createSelect(
1928 IntrR->getOperand(2), IntrR->getOperand(1),
1930 VPValue *Clone = Builder.createNaryOp(
1931 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1934 IntrR->eraseFromParent();
1943 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1952 return !U->usesScalars(
Op);
1956 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1959 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1960 IntroducesBCastOf(Op)))
1964 auto *IRV = dyn_cast<VPIRValue>(Op);
1965 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1966 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1967 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1972 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1973 true ,
nullptr, *RepOrWidenR);
1974 Clone->insertBefore(RepOrWidenR);
1975 RepOrWidenR->replaceAllUsesWith(Clone);
1977 RepOrWidenR->eraseFromParent();
2013 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
2014 UniqueValues.
insert(Blend->getIncomingValue(0));
2015 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
2017 UniqueValues.
insert(Blend->getIncomingValue(
I));
2019 if (UniqueValues.
size() == 1) {
2020 Blend->replaceAllUsesWith(*UniqueValues.
begin());
2021 Blend->eraseFromParent();
2025 if (Blend->isNormalized())
2031 unsigned StartIndex = 0;
2032 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2037 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
2044 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2046 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2047 if (
I == StartIndex)
2049 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2050 OperandsWithMask.
push_back(Blend->getMask(
I));
2055 OperandsWithMask, *Blend, Blend->getDebugLoc());
2056 NewBlend->insertBefore(&R);
2058 VPValue *DeadMask = Blend->getMask(StartIndex);
2060 Blend->eraseFromParent();
2065 if (NewBlend->getNumOperands() == 3 &&
2067 VPValue *Inc0 = NewBlend->getOperand(0);
2068 VPValue *Inc1 = NewBlend->getOperand(1);
2069 VPValue *OldMask = NewBlend->getOperand(2);
2070 NewBlend->setOperand(0, Inc1);
2071 NewBlend->setOperand(1, Inc0);
2072 NewBlend->setOperand(2, NewMask);
2099 APInt MaxVal = AlignedTC - 1;
2102 unsigned NewBitWidth =
2108 bool MadeChange =
false;
2133 "canonical IV is not expected to have a truncation");
2138 NewWideIV->insertBefore(WideIV);
2145 Cmp->replaceAllUsesWith(
2146 VPBuilder(Cmp).createICmp(Cmp->getPredicate(), NewWideIV, NewBTC));
2160 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2162 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2176 const SCEV *VectorTripCount =
2181 "Trip count SCEV must be computable");
2202 auto *Term = &ExitingVPBB->
back();
2215 for (
unsigned Part = 0; Part < UF; ++Part) {
2221 Extracts[Part] = Ext;
2233 match(Phi->getBackedgeValue(),
2235 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2252 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2259 "Expected incoming values of Phi to be ActiveLaneMasks");
2264 EntryALM->setOperand(2, ALMMultiplier);
2265 LoopALM->setOperand(2, ALMMultiplier);
2269 ExtractFromALM(EntryALM, EntryExtracts);
2274 ExtractFromALM(LoopALM, LoopExtracts);
2276 Not->setOperand(0, LoopExtracts[0]);
2279 for (
unsigned Part = 0; Part < UF; ++Part) {
2280 Phis[Part]->setStartValue(EntryExtracts[Part]);
2281 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2294 auto *Term = &ExitingVPBB->
back();
2306 const SCEV *VectorTripCount =
2312 "Trip count SCEV must be computable");
2331 Term->setOperand(1, Plan.
getTrue());
2336 {}, Term->getDebugLoc());
2338 Term->eraseFromParent();
2371 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2381 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2382 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2400 RecurKind RK = PhiR->getRecurrenceKind();
2407 RecWithFlags->dropPoisonGeneratingFlags();
2413struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2422 return GEP->getSourceElementType();
2425 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2426 [](
auto *
I) {
return I->getSourceElementType(); })
2427 .
Default([](
auto *) {
return nullptr; });
2431 static bool canHandle(
const VPSingleDefRecipe *Def) {
2440 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2441 C->second == Instruction::ExtractValue)))
2447 return !
Def->mayReadFromMemory();
2451 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2454 getGEPSourceElementType(Def),
Def->getScalarType(),
2457 if (RFlags->hasPredicate())
2460 return hash_combine(Result, SIVSteps->getInductionOpcode());
2465 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2466 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2468 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2470 !
equal(
L->operands(),
R->operands()))
2473 "must have valid opcode info for both recipes");
2475 if (LFlags->hasPredicate() &&
2476 LFlags->getPredicate() !=
2480 if (LSIV->getInductionOpcode() !=
2486 const VPRegionBlock *RegionL =
L->getRegion();
2487 const VPRegionBlock *RegionR =
R->getRegion();
2490 L->getParent() !=
R->getParent())
2492 return L->getScalarType() ==
R->getScalarType();
2508 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2512 if (!VPDT.
dominates(V->getParent(), VPBB))
2517 Def->replaceAllUsesWith(V);
2548 "Expected vector prehader's successor to be the vector loop region");
2556 return !Op->isDefinedOutsideLoopRegions();
2559 R.moveBefore(*Preheader, Preheader->
end());
2577 assert(!RepR->isPredicated() &&
2578 "Expected prior transformation of predicated replicates to "
2579 "replicate regions");
2584 if (!RepR->isSingleScalar())
2596 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2597 auto *UserR = cast<VPRecipeBase>(U);
2598 VPBasicBlock *Parent = UserR->getParent();
2600 if (SinkBB && SinkBB != Parent)
2605 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2606 Parent->getSinglePredecessor() != LoopRegion;
2616 "Defining block must dominate sink block");
2641 VPValue *ResultVPV = R.getVPSingleValue();
2643 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2644 if (!NewResSizeInBits)
2657 (void)OldResSizeInBits;
2665 VPW->dropPoisonGeneratingFlags();
2667 assert((OldResSizeInBits != NewResSizeInBits ||
2669 "Only ICmps should not need extending the result.");
2675 if (OldResSizeInBits != NewResSizeInBits) {
2677 Instruction::ZExt, ResultVPV, OldResTy);
2679 Ext->setOperand(0, ResultVPV);
2689 unsigned OpSizeInBits =
Op->getScalarType()->getScalarSizeInBits();
2690 if (OpSizeInBits == NewResSizeInBits)
2692 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2693 auto [ProcessedIter, Inserted] = ProcessedTruncs.
try_emplace(
Op);
2699 Builder.setInsertPoint(&R);
2700 ProcessedIter->second =
2701 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2703 Op = ProcessedIter->second;
2707 NWR->insertBefore(&R);
2711 VPValue *Replacement = NWR->getVPSingleValue();
2712 if (OldResSizeInBits != NewResSizeInBits)
2718 R.eraseFromParent();
2724 std::optional<VPDominatorTree> VPDT;
2741 assert(VPBB->getNumSuccessors() == 2 &&
2742 "Two successors expected for BranchOnCond");
2743 unsigned RemovedIdx;
2754 "There must be a single edge between VPBB and its successor");
2762 VPBB->back().eraseFromParent();
2774 if (Reachable.contains(
B))
2785 for (
VPValue *Def : R.definedValues())
2786 Def->replaceAllUsesWith(&Tmp);
2787 R.eraseFromParent();
2844 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2855 auto *EntryIncrement = Builder.createOverflowingOp(
2857 DL,
"index.part.next");
2863 {EntryIncrement, TC, ALMMultiplier},
DL,
2864 "active.lane.mask.entry");
2871 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2876 Builder.setInsertPoint(OriginalTerminator);
2877 auto *InLoopIncrement = Builder.createOverflowingOp(
2879 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2881 {InLoopIncrement, TC, ALMMultiplier},
DL,
2882 "active.lane.mask.next");
2883 LaneMaskPhi->addBackedgeValue(ALM);
2887 auto *NotMask = Builder.createNot(ALM,
DL);
2894 bool UseActiveLaneMaskForControlFlow) {
2896 auto *WideCanonicalIV =
2898 assert(WideCanonicalIV &&
2899 "Must have widened canonical IV when tail folding!");
2902 if (UseActiveLaneMaskForControlFlow) {
2911 nullptr,
"active.lane.mask");
2927 template <
typename OpTy>
bool match(OpTy *V)
const {
2938template <
typename Op0_t,
typename Op1_t>
2946 case Intrinsic::masked_udiv:
2947 return Intrinsic::vp_udiv;
2948 case Intrinsic::masked_sdiv:
2949 return Intrinsic::vp_sdiv;
2950 case Intrinsic::masked_urem:
2951 return Intrinsic::vp_urem;
2952 case Intrinsic::masked_srem:
2953 return Intrinsic::vp_srem;
2955 return std::nullopt;
2970 VPValue *Addr, *Mask, *EndPtr;
2973 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2975 EVLEndPtr->insertBefore(&CurRecipe);
2980 EVLEndPtr->setOperand(1, EVLAsVF);
2984 auto GetVPReverse = [&CurRecipe, &EVL, Plan,
2989 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2990 V->getScalarType(), {}, {},
DL);
2991 Reverse->insertBefore(&CurRecipe);
2995 if (
match(&CurRecipe,
3000 if (
match(&CurRecipe,
3004 Mask = GetVPReverse(Mask);
3005 Addr = AdjustEndPtr(EndPtr);
3008 LoadR->insertBefore(&CurRecipe);
3013 LoadR->getScalarType(), {}, {},
DL);
3024 NewLoad->setOperand(2, Mask);
3025 NewLoad->setOperand(3, &EVL);
3033 StoredVal, EVL, Mask);
3035 if (
match(&CurRecipe,
3039 Mask = GetVPReverse(Mask);
3040 Addr = AdjustEndPtr(EndPtr);
3044 Intrinsic::vector_splice_right, {StoredVal,
Poison, &EVL},
3048 SpliceR, EVL, Mask);
3052 if (Rdx->isConditional() &&
3057 if (Interleave->getMask() &&
3065 Intrinsic::vp_merge, {Mask ? Mask : Plan->
getTrue(),
LHS,
RHS, &EVL},
3066 LHS->getScalarType(), {}, {},
DL);
3079 if (
match(&CurRecipe,
3084 LHS->getScalarType(), {}, {},
DL);
3090 {IntrR->getOperand(0),
3091 IntrR->getOperand(1),
3092 Mask ? Mask : Plan->
getTrue(), &EVL},
3093 IntrR->getScalarType(), {}, {},
DL);
3102 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3107 HeaderMask = R.getVPSingleValue();
3118 NewR->insertBefore(R);
3119 for (
auto [Old, New] :
3120 zip_equal(R->definedValues(), NewR->definedValues()))
3121 Old->replaceAllUsesWith(New);
3134 Mask->getScalarType(), {}, {}, LogicalAnd->getDebugLoc());
3135 Merge->insertBefore(LogicalAnd);
3136 LogicalAnd->replaceAllUsesWith(
Merge);
3156 Intrinsic::experimental_vp_reverse, {
X, Plan.
getTrue(), EVL},
3157 X->getScalarType(), {}, {}, Def->getDebugLoc());
3158 VPReverse->insertBefore(Def);
3159 Def->replaceAllUsesWith(VPReverse);
3165 R->eraseFromParent();
3186 auto IsAllowedUser =
3187 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3188 VPWidenIntOrFpInductionRecipe,
3189 VPWidenMemIntrinsicRecipe>;
3190 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3191 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3193 return IsAllowedUser(U);
3195 "User of VF that we can't transform to EVL.");
3205 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3206 "increment of the canonical induction.");
3222 MaxEVL = Builder.createScalarZExtOrTrunc(
3226 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3227 VPValue *PrevEVL = Builder.createScalarPhi(
3241 Intrinsic::experimental_vp_splice,
3242 {
V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3243 R.getVPSingleValue()->getScalarType(), {}, {}, R.getDebugLoc());
3245 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3258 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3259 m_VPValue(), m_VPValue()))))
3260 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3261 Plan.getVectorLoopRegion();
3273 VPValue *EVLMask = Builder.createICmp(
3333 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3345 auto *CurrentIteration =
3347 CurrentIteration->insertBefore(*Header, Header->begin());
3348 VPBuilder Builder(Header, Header->getFirstNonPhi());
3351 VPPhi *AVLPhi = Builder.createScalarPhi(
3355 if (MaxSafeElements) {
3365 Builder.setInsertPoint(CanonicalIVIncrement);
3369 OpVPEVL = Builder.createScalarZExtOrTrunc(
3370 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3372 auto *NextIter = Builder.createAdd(
3373 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3374 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3375 CurrentIteration->addBackedgeValue(NextIter);
3379 "avl.next", {
true,
false});
3387 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3388 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3402 assert(!CurrentIteration &&
3403 "Found multiple CurrentIteration. Only one expected");
3404 CurrentIteration = PhiR;
3408 if (!CurrentIteration)
3419 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3428 CanIVInc->eraseFromParent();
3437 if (Header->empty())
3446 if (!
match(EVLPhi->getBackedgeValue(),
3459 [[maybe_unused]]
bool FoundAVLNext =
3462 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3470 [[maybe_unused]]
bool FoundIncrement =
match(
3477 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3482 LatchBr->setOperand(
3493 "expected to run before loop regions are created");
3496 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3499 return VPDT.
dominates(Preheader, Parent);
3502 for (
const SCEV *Stride : StridesMap.
values()) {
3505 const APInt *StrideConst;
3528 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3535 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3538 if (NewSCEV != ScevExpr) {
3540 ExpSCEV->replaceAllUsesWith(NewExp);
3551 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3556 while (!Worklist.
empty()) {
3559 if (!Visited.
insert(CurRec).second)
3581 RecWithFlags->isDisjoint()) {
3584 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3585 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3586 RecWithFlags->replaceAllUsesWith(New);
3587 RecWithFlags->eraseFromParent();
3590 RecWithFlags->dropPoisonGeneratingFlags();
3595 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3596 "found instruction with poison generating flags not covered by "
3597 "VPRecipeWithIRFlags");
3602 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3610 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3622 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3623 if (AddrDef && WidenRec->isConsecutive() &&
3624 IsNotHeaderMask(WidenRec->getMask()))
3625 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3627 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3628 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3629 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3639 const bool &EpilogueAllowed) {
3640 if (InterleaveGroups.empty())
3651 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3658 for (
const auto *IG : InterleaveGroups) {
3663 return !IRMemberToRecipe.contains(Member);
3667 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3671 StoredValues.
push_back(StoreR->getStoredValue());
3672 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3678 StoredValues.
push_back(StoreR->getStoredValue());
3682 bool NeedsMaskForGaps =
3683 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3684 (!StoredValues.
empty() && !IG->isFull());
3687 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3696 VPValue *Addr = Start->getAddr();
3705 assert(IG->getIndex(IRInsertPos) != 0 &&
3706 "index of insert position shouldn't be zero");
3710 IG->getIndex(IRInsertPos),
3714 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3720 if (IG->isReverse()) {
3723 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3724 ReversePtr->insertBefore(InsertPosR);
3728 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3730 VPIG->insertBefore(InsertPosR);
3733 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3736 if (!Member->getType()->isVoidTy()) {
3794 AddOp = Instruction::Add;
3795 MulOp = Instruction::Mul;
3797 AddOp =
ID.getInductionOpcode();
3798 MulOp = Instruction::FMul;
3806 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3807 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3816 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3821 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3822 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3840 if (R->getParent()->getEnclosingLoopRegion())
3841 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3846 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3849 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3851 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3858 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3861 WidePHI->addIncoming(
Next);
3888 VPlan *Plan = R->getParent()->getPlan();
3889 VPValue *Start = R->getStartValue();
3890 VPValue *Step = R->getStepValue();
3891 VPValue *VF = R->getVFValue();
3893 assert(R->getInductionDescriptor().getKind() ==
3895 "Not a pointer induction according to InductionDescriptor!");
3896 assert(R->getScalarType()->isPointerTy() &&
"Unexpected type.");
3898 "Recipe should have been replaced");
3904 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3908 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3911 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3913 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3914 R->replaceAllUsesWith(PtrAdd);
3919 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3920 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3923 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3931 VPValue *Step = R->getStepValue();
3932 VPValue *Index = R->getIndex();
3936 ? Builder.createScalarSExtOrTrunc(
3938 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3940 switch (R->getInductionKind()) {
3942 assert(Index->getScalarType() == Start->getScalarType() &&
3943 "Index type does not match StartValue type");
3944 return R->replaceAllUsesWith(Builder.createAdd(
3945 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3948 return R->replaceAllUsesWith(Builder.createPtrAdd(
3949 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3954 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3955 FPBinOp->
getOpcode() == Instruction::FSub) &&
3956 "Original BinOp should be defined for FP induction");
3958 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3959 return R->replaceAllUsesWith(
3960 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3973 if (!R->isReplicator())
3977 R->dissolveToCFGLoop();
3998 assert(Br->getNumOperands() == 2 &&
3999 "BranchOnTwoConds must have exactly 2 conditions");
4003 assert(Successors.size() == 3 &&
4004 "BranchOnTwoConds must have exactly 3 successors");
4009 VPValue *Cond0 = Br->getOperand(0);
4010 VPValue *Cond1 = Br->getOperand(1);
4017 if (Succ0 == Succ1) {
4019 VPValue *Combined = Builder.createOr(Cond0, Cond1,
DL);
4023 Br->eraseFromParent();
4028 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4041 Br->eraseFromParent();
4052 WidenIVR->eraseFromParent();
4062 WidenIVR->replaceAllUsesWith(PtrAdd);
4063 WidenIVR->eraseFromParent();
4067 WidenIVR->eraseFromParent();
4073 DerivedIVR->eraseFromParent();
4078 VPValue *CanIV = WideCanIV->getCanonicalIV();
4080 VPValue *Step = WideCanIV->getStepValue();
4083 "Expected unroller to have materialized step for UF != 1");
4088 Step = Builder.createAdd(
4091 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4092 WideCanIV->getNoWrapFlags());
4094 WideCanIV->eraseFromParent();
4101 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4102 Select = Builder.createSelect(Blend->getMask(
I),
4103 Blend->getIncomingValue(
I),
Select,
4104 R.getDebugLoc(),
"predphi", *Blend);
4105 Blend->replaceAllUsesWith(
Select);
4106 Blend->eraseFromParent();
4111 if (!VEPR->getOffset()) {
4113 "Expected unroller to have materialized offset for UF != 1");
4114 VEPR->materializeOffset();
4121 Expr->eraseFromParent();
4131 for (
VPValue *
Op : LastActiveL->operands()) {
4132 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4137 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4138 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4144 Builder.createSub(FirstInactiveLane, One,
4145 LastActiveL->getDebugLoc(),
"last.active.lane");
4148 LastActiveL->eraseFromParent();
4155 assert(VPI->isMasked() &&
4156 "Unmasked MaskedCond should be simplified earlier");
4157 VPI->replaceAllUsesWith(Builder.createNaryOp(
4159 VPI->eraseFromParent();
4169 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4170 VPI->getDebugLoc());
4171 VPI->replaceAllUsesWith(
Add);
4172 VPI->eraseFromParent();
4180 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4183 BranchOnCountInst->eraseFromParent();
4198 ? Instruction::UIToFP
4199 : Instruction::Trunc;
4200 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4206 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4212 MulOpc = Instruction::FMul;
4213 Flags = VPI->getFastMathFlagsOrNone();
4215 MulOpc = Instruction::Mul;
4220 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4222 VPI->replaceAllUsesWith(VectorStep);
4223 VPI->eraseFromParent();
4245 for (
auto &Exit : Exits) {
4246 if (Exit.EarlyExitingVPBB == LatchVPBB)
4250 cast<VPIRPhi>(&R)->removeIncomingValueFor(Exit.EarlyExitingVPBB);
4251 Exit.EarlyExitingVPBB->getTerminator()->eraseFromParent();
4261 std::optional<VPValue *>
Cond =
4268 for (
auto *Recipe : ConditionRecipes) {
4271 assert(CondLoad ==
nullptr &&
"Too many condition loads");
4275 assert(CondLoad &&
"Couldn't find load");
4286 VPValue *Ptr = Load->getOperand(0);
4290 DL.getTypeStoreSize(Load->getScalarType()).getFixedValue());
4301 for (
auto *
GEP : GEPs) {
4318 auto InsertIt = HeaderVPBB->
end();
4320 bool CondMoveNeeded = CondR->
getParent() != HeaderVPBB;
4325 if (R.mayReadOrWriteMemory()) {
4327 CondMoveNeeded =
true;
4328 InsertIt = R.getIterator();
4338 for (
auto *Recipe :
reverse(ConditionRecipes))
4339 Recipe->moveBefore(*HeaderVPBB, InsertIt);
4343 VPBuilder MaskBuilder(HeaderVPBB, InsertIt);
4346 Type *IVScalarTy =
IV->getScalarType();
4353 {Zero, FirstActive, ALMMultiplier},
4354 DebugLoc(),
"uncountable.exit.mask");
4359 if (R.mayReadOrWriteMemory() && &R != CondLoad) {
4361 if (!VPDT.
dominates(R.getParent(), LatchVPBB))
4370 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->
end());
4380 auto Phis = ScalarPH->
phis();
4399 if (Pred == MiddleVPBB)
4404 VPValue *CondOfEarlyExitingVPBB;
4405 [[maybe_unused]]
bool Matched =
4406 match(EarlyExitingVPBB->getTerminator(),
4408 assert(Matched &&
"Terminator must be BranchOnCond");
4412 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4413 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4415 TrueSucc == ExitBlock
4416 ? CondOfEarlyExitingVPBB
4417 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4423 "exit condition must dominate the latch");
4432 assert(!Exits.
empty() &&
"must have at least one early exit");
4439 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4442 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4448 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4449 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4451 Exits[
I].EarlyExitingVPBB) &&
4452 "RPO sort must place dominating exits before dominated ones");
4458 VPValue *Combined = Exits[0].CondToExit;
4471 "Unexpected terminator");
4472 VPValue *IsLatchExitTaken = LatchExitingBranch->getOperand(0);
4473 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4474 LatchExitingBranch->eraseFromParent();
4477 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4483 LatchVPBB->
setSuccessors({MiddleVPBB, MiddleVPBB, HeaderVPBB});
4487 LatchVPBB, MiddleVPBB, TheLoop,
4493 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4497 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4505 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4508 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4540 for (
auto [Exit, VectorEarlyExitVPBB] :
4541 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4542 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4554 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4555 VPValue *NewIncoming = IncomingVal;
4557 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4562 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4563 ExitIRI->addIncoming(NewIncoming);
4566 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4600 bool IsLastDispatch = (
I + 2 == Exits.
size());
4602 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4608 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4611 CurrentBB = FalseBB;
4626 VPValue *VecOp = Red->getVecOp();
4628 assert(!Red->isPartialReduction() &&
4629 "This path does not support partial reductions");
4632 auto IsExtendedRedValidAndClampRange =
4645 "getExtendedReductionCost only supports integer types");
4646 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4647 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4648 Red->getFastMathFlagsOrNone(),
CostKind);
4649 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4657 IsExtendedRedValidAndClampRange(
4678 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4679 Opcode != Instruction::FAdd)
4682 assert(!Red->isPartialReduction() &&
4683 "This path does not support partial reductions");
4687 auto IsMulAccValidAndClampRange =
4699 (Ext0->getOpcode() != Ext1->getOpcode() ||
4700 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4704 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4706 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4713 ExtCost += Ext0->computeCost(VF, Ctx);
4715 ExtCost += Ext1->computeCost(VF, Ctx);
4717 ExtCost += OuterExt->computeCost(VF, Ctx);
4719 return MulAccCost.
isValid() &&
4720 MulAccCost < ExtCost + MulCost + RedCost;
4725 VPValue *VecOp = Red->getVecOp();
4763 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4765 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4766 Mul->setOperand(1, ExtB);
4776 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4781 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4788 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4805 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4814 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4815 Ext0->getOpcode() == Ext1->getOpcode() &&
4816 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4818 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
4819 *Ext0, *Ext0, Ext0->getDebugLoc());
4820 NewExt0->insertBefore(Ext0);
4825 Ext->getScalarType(),
nullptr, *Ext1,
4826 *Ext1, Ext1->getDebugLoc());
4829 auto *NewMul =
Mul->cloneWithOperands({NewExt0, NewExt1});
4830 NewMul->insertBefore(
Mul);
4831 Ext->replaceAllUsesWith(NewMul);
4832 Ext->eraseFromParent();
4833 Mul->eraseFromParent();
4847 assert(!Red->isPartialReduction() &&
4848 "This path does not support partial reductions");
4851 auto IP = std::next(Red->getIterator());
4852 auto *VPBB = Red->getParent();
4862 Red->replaceAllUsesWith(AbstractR);
4892 for (
VPValue *VPV : VPValues) {
4901 if (
User->usesScalars(VPV))
4904 HoistPoint = HoistBlock->
begin();
4908 "All users must be in the vector preheader or dominated by it");
4913 VPV->replaceUsesWithIf(Broadcast,
4914 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4915 return Broadcast != &U && !U.usesScalars(VPV);
4926 return CommonMetadata;
4929template <
unsigned Opcode>
4934 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4935 "Only Load and Store opcodes supported");
4936 [[maybe_unused]]
constexpr bool IsLoad = (Opcode == Instruction::Load);
4943 for (
auto Recipes :
Groups) {
4944 if (Recipes.size() < 2)
4949 "Expected all recipes in group to have the same load-store type");
4956 VPValue *MaskI = RecipeI->getMask();
4962 bool HasComplementaryMask =
false;
4967 VPValue *MaskJ = RecipeJ->getMask();
4976 if (HasComplementaryMask) {
4977 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4987template <
typename InstType>
5005 for (
auto &Group :
Groups) {
5025 return R->isSingleScalar() == IsSingleScalar;
5027 "all members in group must agree on IsSingleScalar");
5032 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
5033 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
5035 UnpredicatedLoad->insertBefore(EarliestLoad);
5039 Load->replaceAllUsesWith(UnpredicatedLoad);
5040 Load->eraseFromParent();
5049 if (!StoreLoc || !StoreLoc->AATags.Scope)
5055 StoresToSink.
end());
5059 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L);
5071 for (
auto &Group :
Groups) {
5084 VPValue *SelectedValue = Group[0]->getOperand(0);
5087 bool IsSingleScalar = Group[0]->isSingleScalar();
5088 for (
unsigned I = 1;
I < Group.size(); ++
I) {
5089 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
5090 "all members in group must agree on IsSingleScalar");
5091 VPValue *Mask = Group[
I]->getMask();
5093 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
5102 StoreWithMinAlign->getUnderlyingInstr(),
5103 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
5104 nullptr, *LastStore, CommonMetadata);
5105 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
5109 Store->eraseFromParent();
5116 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
5117 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5180 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5182 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5189 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5199 DefR->replaceUsesWithIf(
5200 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5202 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5216 for (
VPValue *Def : R.definedValues()) {
5229 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5231 return U->usesScalars(Def) &&
5234 if (
none_of(Def->users(), IsCandidateUnpackUser))
5241 Unpack->insertAfter(&R);
5242 Def->replaceUsesWithIf(Unpack,
5243 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5244 return IsCandidateUnpackUser(&U);
5253 bool RequiresScalarEpilogue,
VPValue *Step,
5254 std::optional<uint64_t> MaxRuntimeStep) {
5266 "Step VPBB must dominate VectorPHVPBB");
5268 InsertPt = std::next(StepR->getIterator());
5270 VPBuilder Builder(VectorPHVPBB, InsertPt);
5276 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5288 if (TailByMasking) {
5289 TC = Builder.createAdd(
5300 Builder.createNaryOp(Instruction::URem, {TC, Step},
5309 if (RequiresScalarEpilogue) {
5311 "requiring scalar epilogue is not supported with fail folding");
5314 R = Builder.createSelect(IsZero, Step, R);
5328 "VF and VFxUF must be materialized together");
5340 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5347 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5351 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5355 VPValue *MulByUF = Builder.createOverflowingOp(
5368 auto *AliasMask = Builder.createNaryOp(
5372 if (HeaderMaskDef->isPhi())
5373 Builder =
VPBuilder(&*HeaderMaskDef->getParent()->getFirstNonPhi());
5378 auto *ClampedHeaderMask = Builder.createAnd(HeaderMask, AliasMask);
5380 return &U != ClampedHeaderMask;
5391 assert(IncomingAliasMask &&
"Expected an alias mask!");
5401 if (
Check.NeedsFreeze) {
5411 Intrinsic::loop_dependence_war_mask,
5415 AliasMask = Builder.createAnd(AliasMask, WARMask);
5417 AliasMask = WARMask;
5422 VPValue *NumActive = Builder.createNaryOp(
5425 VPValue *ClampedVF = Builder.createScalarZExtOrTrunc(
5451 VPValue *DistanceToMax = Builder.createSub(MaxUIntTripCount, TripCount);
5459 VPValue *TripCountCheck = Builder.createICmp(
5462 VPValue *
Cond = Builder.createOr(IsScalar, TripCountCheck,
DL);
5473 "Clamped VF not supported with interleaving");
5481 VPBuilder Builder(Entry, Entry->begin());
5493 if (!ExpSCEV || ExpSCEV->getNumUsers() == 0)
5495 Builder.setInsertPoint(ExpSCEV);
5505 ExpSCEV->eraseFromParent();
5514 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5521 const SCEV *Expr = ExpSCEV->getSCEV();
5524 ExpandedSCEVs[Expr] = Res;
5529 ExpSCEV->eraseFromParent();
5532 "all VPExpandSCEVRecipes must have been expanded");
5535 auto EI = Entry->begin();
5545 return ExpandedSCEVs;
5557 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5561 return Member0Op == OpV;
5565 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5568 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5585 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5588 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5593 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5594 const auto &[
OpIdx, OpV] =
P;
5606static std::optional<ElementCount>
5610 if (!InterleaveR || InterleaveR->
getMask())
5611 return std::nullopt;
5613 Type *GroupElementTy =
nullptr;
5617 return Op->getScalarType() == GroupElementTy;
5619 return std::nullopt;
5623 return Op->getScalarType() == GroupElementTy;
5625 return std::nullopt;
5629 if (IG->getFactor() != IG->getNumMembers())
5630 return std::nullopt;
5636 assert(
Size.isScalable() == VF.isScalable() &&
5637 "if Size is scalable, VF must be scalable and vice versa");
5638 return Size.getKnownMinValue();
5642 unsigned MinVal = VF.getKnownMinValue();
5644 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5647 return std::nullopt;
5655 return RepR && RepR->isSingleScalar();
5665 auto *R = V->getDefiningRecipe();
5676 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx) {
5678 for (
VPValue *Member : Members)
5679 OpsI.
push_back(Member->getDefiningRecipe()->getOperand(Idx));
5688 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5690 LoadGroup->getMask(),
true,
5691 *LoadGroup, LoadGroup->getDebugLoc());
5692 L->insertBefore(LoadGroup);
5698 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5699 "must be a single scalar load");
5700 NarrowedOps.
insert(RepR);
5705 VPValue *PtrOp = WideLoad->getAddr();
5707 PtrOp = VecPtr->getOperand(0);
5712 nullptr, {}, *WideLoad);
5713 N->insertBefore(WideLoad);
5718std::unique_ptr<VPlan>
5738 "unexpected branch-on-count");
5741 std::optional<ElementCount> VFToOptimize;
5755 if (R.mayWriteToMemory() && !InterleaveR)
5761 return any_of(V->users(), [&](VPUser *U) {
5762 auto *UR = cast<VPRecipeBase>(U);
5763 return UR->getParent()->getParent() != VectorLoop;
5780 std::optional<ElementCount> NarrowedVF =
5782 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5784 VFToOptimize = NarrowedVF;
5787 if (InterleaveR->getStoredValues().empty())
5792 auto *Member0 = InterleaveR->getStoredValues()[0];
5802 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5805 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5806 return IR && IR->getInterleaveGroup()->isFull() &&
5807 IR->getVPValue(Op.index()) == Op.value();
5816 VFToOptimize->isScalable()))
5821 if (StoreGroups.empty())
5825 bool RequiresScalarEpilogue =
5836 std::unique_ptr<VPlan> NewPlan;
5838 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5839 Plan.
setVF(*VFToOptimize);
5840 NewPlan->removeVF(*VFToOptimize);
5846 for (
auto *StoreGroup : StoreGroups) {
5853 StoreGroup->getDebugLoc());
5854 S->insertBefore(StoreGroup);
5855 StoreGroup->eraseFromParent();
5861 Type *CanIVTy = VectorLoop->getCanonicalIVType();
5867 if (VFToOptimize->isScalable()) {
5870 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
5878 materializeVectorTripCount(Plan, VectorPH,
false,
5879 RequiresScalarEpilogue, Step);
5884 removeDeadRecipes(Plan);
5887 "All VPVectorPointerRecipes should have been removed");
5903 "must have a BranchOnCond");
5906 if (VF.
isScalable() && VScaleForTuning.has_value())
5907 VectorStep *= *VScaleForTuning;
5908 assert(VectorStep > 0 &&
"trip count should not be zero");
5912 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5931 "Cannot handle loops with uncountable early exits");
5938 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5945 if (
any_of(RecurSplice->users(),
5946 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
6027 {},
"vector.recur.extract.for.phi");
6030 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
6044 VPValue *WidenIVCandidate = BinOp->getOperand(0);
6045 VPValue *InvariantCandidate = BinOp->getOperand(1);
6047 std::swap(WidenIVCandidate, InvariantCandidate);
6061 auto *ClonedOp = BinOp->
clone();
6062 if (ClonedOp->getOperand(0) == WidenIV) {
6063 ClonedOp->setOperand(0, ScalarIV);
6065 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
6066 ClonedOp->setOperand(1, ScalarIV);
6081 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
6082 bool UseMax) -> std::optional<APSInt> {
6084 for (
bool Signed : {
true,
false}) {
6093 return std::nullopt;
6101 PhiR->getRecurrenceKind()))
6110 VPValue *BackedgeVal = PhiR->getBackedgeValue();
6124 !
match(FindLastSelect,
6133 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
6139 "IVOfExpressionToSink not being an AddRec must imply "
6140 "FindLastExpression not being an AddRec.");
6151 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
6152 bool UseSigned = SentinelVal && SentinelVal->isSigned();
6159 if (IVOfExpressionToSink) {
6160 const SCEV *FindLastExpressionSCEV =
6162 if (
match(FindLastExpressionSCEV,
6165 if (
auto NewSentinel =
6166 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
6169 SentinelVal = *NewSentinel;
6170 UseSigned = NewSentinel->isSigned();
6172 IVSCEV = FindLastExpressionSCEV;
6173 IVOfExpressionToSink =
nullptr;
6183 if (AR->hasNoSignedWrap())
6185 else if (AR->hasNoUnsignedWrap())
6195 VPValue *NewFindLastSelect = BackedgeVal;
6197 if (!SentinelVal || IVOfExpressionToSink) {
6200 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
6201 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
6202 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
6203 SelectCond = LoopBuilder.
createNot(SelectCond);
6210 if (SelectCond !=
Cond || IVOfExpressionToSink) {
6213 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
6222 VPIRFlags Flags(MinMaxKind,
false,
false,
6228 NewFindLastSelect, Flags, ExitDL);
6231 VPValue *VectorRegionExitingVal = ReducedIV;
6232 if (IVOfExpressionToSink)
6233 VectorRegionExitingVal =
6235 ReducedIV, IVOfExpressionToSink);
6238 VPValue *StartVPV = PhiR->getStartValue();
6245 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
6255 AnyOfPhi->insertAfter(PhiR);
6262 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6275 PhiR->hasUsesOutsideReductionChain());
6276 NewPhiR->insertBefore(PhiR);
6277 PhiR->replaceAllUsesWith(NewPhiR);
6278 PhiR->eraseFromParent();
6285struct ReductionExtend {
6286 Type *SrcType =
nullptr;
6287 ExtendKind Kind = ExtendKind::PR_None;
6293struct ExtendedReductionOperand {
6297 ReductionExtend ExtendA, ExtendB;
6305struct VPPartialReductionChain {
6308 VPWidenRecipe *ReductionBinOp =
nullptr;
6310 ExtendedReductionOperand ExtendedOp;
6317 unsigned AccumulatorOpIdx;
6318 unsigned ScaleFactor;
6330 if (!
Op->hasOneUse() ||
6336 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6337 Op->getOperand(1), NarrowTy);
6339 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6348 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6350 assert(Ext->getOpcode() ==
6352 "Expected both the LHS and RHS extends to be the same");
6353 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6356 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6357 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6358 auto *
Max = Builder.insert(
6360 {FreezeX, FreezeY}, SrcTy));
6361 auto *Min = Builder.insert(
6363 {FreezeX, FreezeY}, SrcTy));
6366 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6367 Op->getScalarType());
6379 if (!
Mul->hasOneUse() ||
6380 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6381 MulLHS->getOpcode() != MulRHS->getOpcode())
6384 auto *NewLHS = Builder.createWidenCast(
6385 MulLHS->getOpcode(), MulLHS->getOperand(0), Ext->getScalarType());
6386 auto *NewRHS = MulLHS == MulRHS
6388 : Builder.createWidenCast(MulRHS->getOpcode(),
6389 MulRHS->getOperand(0),
6390 Ext->getScalarType());
6391 auto *NewMul =
Mul->cloneWithOperands({NewLHS, NewRHS});
6392 Builder.insert(NewMul);
6393 Op->replaceAllUsesWith(NewMul);
6394 Op->eraseFromParent();
6395 Mul->eraseFromParent();
6404 VPValue *VecOp = Red->getVecOp();
6458static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6466 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6469 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp);
6485 if ((WidenRecipe->
getOpcode() == Instruction::Sub &&
6487 (WidenRecipe->
getOpcode() == Instruction::FSub &&
6492 if (WidenRecipe->
getOpcode() == Instruction::FSub) {
6502 Builder.insert(NegRecipe);
6503 ExtendedOp = NegRecipe;
6514 assert((!ExitValue || IsLastInChain) &&
6515 "if we found ExitValue, it must match RdxPhi's backedge value");
6526 PartialRed->insertBefore(WidenRecipe);
6534 E->insertBefore(WidenRecipe);
6535 PartialRed->replaceAllUsesWith(
E);
6548 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6549 StartInst->setOperand(2, NewScaleFactor);
6557 VPValue *OldStartValue = StartInst->getOperand(0);
6558 StartInst->setOperand(0, StartInst->getOperand(1));
6562 assert(RdxResult &&
"Could not find reduction result");
6565 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6566 : Instruction::BinaryOps::Sub;
6572 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6578 const VPPartialReductionChain &Link,
6581 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6582 std::optional<unsigned> BinOpc = std::nullopt;
6584 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6585 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6587 std::optional<llvm::FastMathFlags>
Flags;
6591 auto GetLinkOpcode = [&Link]() ->
unsigned {
6594 return Instruction::Add;
6596 return Instruction::FAdd;
6598 return Link.ReductionBinOp->
getOpcode();
6603 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6604 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6625static std::optional<ExtendedReductionOperand>
6628 "Op should be operand of UpdateR");
6636 if (
Op->hasOneUse() &&
6645 Type *RHSInputType =
Y->getScalarType();
6646 if (LHSInputType != RHSInputType ||
6647 LHSExt->getOpcode() != RHSExt->getOpcode())
6648 return std::nullopt;
6651 return ExtendedReductionOperand{
6653 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6657 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6660 VPValue *CastSource = CastRecipe->getOperand(0);
6661 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6671 return ExtendedReductionOperand{
6678 if (!
Op->hasOneUse())
6679 return std::nullopt;
6684 return std::nullopt;
6694 return std::nullopt;
6698 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6701 const APInt *RHSConst =
nullptr;
6707 return std::nullopt;
6711 if (Cast && OuterExtKind &&
6712 getPartialReductionExtendKind(Cast) != OuterExtKind)
6713 return std::nullopt;
6715 Type *RHSInputType = LHSInputType;
6716 ExtendKind RHSExtendKind = LHSExtendKind;
6719 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6722 return ExtendedReductionOperand{
6723 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6730static std::optional<SmallVector<VPPartialReductionChain>>
6738 return std::nullopt;
6748 VPValue *CurrentValue = ExitValue;
6749 while (CurrentValue != RedPhiR) {
6752 return std::nullopt;
6759 std::optional<ExtendedReductionOperand> ExtendedOp =
6760 matchExtendedReductionOperand(UpdateR,
Op);
6762 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6764 return std::nullopt;
6768 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6771 return std::nullopt;
6776 VPPartialReductionChain Link(
6777 {UpdateR, *ExtendedOp, RK,
6781 CurrentValue = PrevValue;
6786 std::reverse(Chain.
begin(), Chain.
end());
6805 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6806 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6809 if (ChainsByPhi.
empty())
6816 for (
const auto &[
_, Chains] : ChainsByPhi)
6817 for (
const VPPartialReductionChain &Chain : Chains) {
6818 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6819 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6825 auto ExtendUsersValid = [&](
VPValue *Ext) {
6827 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6831 auto IsProfitablePartialReductionChainForVF =
6838 for (
const VPPartialReductionChain &Link : Chain) {
6839 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6840 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6844 PartialCost += LinkCost;
6845 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6847 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6848 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6851 RegularCost += Extend->computeCost(VF, CostCtx);
6853 return PartialCost.
isValid() && PartialCost < RegularCost;
6861 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6862 for (
const VPPartialReductionChain &Chain : Chains) {
6863 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6867 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6869 return PhiR == RedPhiR;
6871 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6877 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6886 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6887 return RepR && RepR->getOpcode() == Instruction::Store;
6898 return IsProfitablePartialReductionChainForVF(Chains, VF);
6904 for (
auto &[Phi, Chains] : ChainsByPhi)
6905 for (
const VPPartialReductionChain &Chain : Chains)
6906 transformToPartialReduction(Chain, Plan, Phi);
6920 if (VPI && VPI->getUnderlyingValue() &&
6932 New->insertBefore(VPI);
6933 if (VPI->getOpcode() == Instruction::Load)
6934 VPI->replaceAllUsesWith(New->getVPSingleValue());
6935 VPI->eraseFromParent();
6940 FinalRedStoresBuilder))
6949 ReplaceWith(Histogram);
6957 ReplaceWith(Recipe);
6980 if (VPI->mayHaveSideEffects())
6984 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
6989 if (VPI->getOpcode() == Instruction::Add &&
6998 I, VPI->operandsWithoutMask(),
true,
6999 nullptr, *VPI, *VPI, VPI->getDebugLoc());
7000 Recipe->insertBefore(VPI);
7001 VPI->replaceAllUsesWith(Recipe);
7002 VPI->eraseFromParent();
7012 switch (Param.ParamKind) {
7013 case VFParamKind::Vector:
7014 case VFParamKind::GlobalPredicate:
7016 case VFParamKind::OMP_Uniform:
7017 return SE->isSCEVable(Args[Param.ParamPos]->getScalarType()) &&
7018 SE->isLoopInvariant(
7019 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7021 case VFParamKind::OMP_Linear:
7022 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
7023 m_scev_AffineAddRec(
7024 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
7025 m_SpecificLoop(L)));
7042 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
7043 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
7046 if (It == Mappings.end())
7053struct CallWideningDecision {
7054 enum class KindTy { Scalarize,
Intrinsic, VectorVariant };
7055 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
7078 return CallWideningDecision::KindTy::Scalarize;
7088 return CallWideningDecision::KindTy::Scalarize;
7092 false, VF, CostCtx);
7107 return CallWideningDecision::KindTy::Intrinsic;
7111 if (VecFunc && ScalarCost >= VecCallCost)
7112 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
7114 return CallWideningDecision::KindTy::Scalarize;
7124 if (!VPI || !VPI->getUnderlyingValue() ||
7125 VPI->getOpcode() != Instruction::Call)
7130 VPI->op_begin() + CI->arg_size());
7132 CallWideningDecision Decision =
7141 switch (Decision.Kind) {
7142 case CallWideningDecision::KindTy::Intrinsic: {
7146 *VPI, VPI->getDebugLoc());
7149 case CallWideningDecision::KindTy::VectorVariant: {
7153 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
7154 Ops.push_back(Mask);
7156 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
7158 *VPI, VPI->getDebugLoc());
7161 case CallWideningDecision::KindTy::Scalarize:
7167 VPI->replaceAllUsesWith(Replacement);
7168 VPI->eraseFromParent();
7191 if (!LoadR || LoadR->isConsecutive())
7210 Align Alignment = LoadR->getAlign();
7213 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
7218 Intrinsic::experimental_vp_strided_load, DataTy,
7219 LoadR->isMasked(), Alignment, Ctx);
7220 return StridedLoadStoreCost < CurrentCost;
7231 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
7236 I32VF = Builder.createScalarZExtOrTrunc(
7249 "Stride type from SCEV must match the index type");
7250 VPValue *CanIV = Builder.createScalarSExtOrTrunc(
7254 auto *
Offset = Builder.createOverflowingOp(
7255 Instruction::Mul, {CanIV, StrideInBytes},
7256 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
7257 auto *BasePtr = Builder.createNoWrapPtrAdd(
7263 VPValue *NewPtr = Builder.createVectorPointer(
7265 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
7267 VPValue *Mask = LoadR->getMask();
7270 auto *StridedLoad = Builder.createWidenMemIntrinsic(
7271 Intrinsic::experimental_vp_strided_load,
7272 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
7273 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
Get the first element.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class represents a constant integer value.
ConstantInt * getValue() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
void clearPredecessors()
Remove all the predecessor of this block.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
operand_range operandsWithoutMask()
Return the recipe's operands, excluding the mask of a predicated recipe.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Lightweight SCEV-to-VPlan expander.
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
canonical_widen_iv_match m_CanonicalWidenIV()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
auto m_AnyNeg(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPValue * findIncomingAliasMask(const VPlan &Plan)
Finds the incoming alias-mask within the vector preheader.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
LLVM_ABI_FOR_TEST std::optional< VPValue * > getRecipesForUncountableExit(SmallVectorImpl< VPInstruction * > &Recipes, SmallVectorImpl< VPInstruction * > &GEPs, VPBasicBlock *LatchVPBB)
Returns the VPValue representing the uncountable exit comparison used by AnyOf if the recipes it depe...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
SmallVector< VPBasicBlock * > vp_rpo_plain_cfg_loop_body(VPBasicBlock *Header)
Returns the VPBasicBlocks forming the loop body of a plain (pre-region) VPlan in reverse post-order s...
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
constexpr auto bind_back(FnT &&Fn, BindArgsT &&...BindArgs)
C++23 bind_back.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr size_t range_size(R &&Range)
Returns the size of the Range, i.e., the number of elements.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
VPBasicBlock * EarlyExitingVPBB
VPIRBasicBlock * EarlyExitVPBB
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...