52 GetIntOrFpInductionDescriptor,
59 if (!VPBB->getParent())
62 auto EndIter = Term ? Term->getIterator() : VPBB->end();
67 VPValue *VPV = Ingredient.getVPSingleValue();
76 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
90 Phi, Start, Step, &Plan.
getVF(), *
II, Flags,
91 Ingredient.getDebugLoc());
99 *Load, Ingredient.getOperand(0),
nullptr ,
100 false ,
false , *VPI,
101 Ingredient.getDebugLoc());
104 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
105 nullptr ,
false ,
false , *VPI,
106 Ingredient.getDebugLoc());
109 Ingredient.getDebugLoc());
117 *VPI, CI->getDebugLoc());
120 *VPI, Ingredient.getDebugLoc());
123 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
127 *VPI, Ingredient.getDebugLoc());
136 "Only recpies with zero or one defined values expected");
137 Ingredient.eraseFromParent();
154 if (
A->getOpcode() != Instruction::Store ||
155 B->getOpcode() != Instruction::Store)
165 const APInt *Distance;
170 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
172 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
178 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
180 auto VFs =
B->getParent()->getPlan()->vectorFactors();
182 return Distance->
abs().
uge(
190 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), SE(SE), L(L),
191 TypeInfo(TypeInfo) {}
198 return ExcludeRecipes.contains(&R) ||
199 (Store && isNoAliasViaDistance(Store, &GroupLeader));
212 std::optional<SinkStoreInfo> SinkInfo = {}) {
213 bool CheckReads = SinkInfo.has_value();
222 "Expected at most one successor in block chain");
225 if (SinkInfo && SinkInfo->shouldSkip(R))
229 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
240 if (CheckReads &&
R.mayReadFromMemory() &&
247 Loc->AATags.NoAlias))
267 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
272 return RepR && RepR->getOpcode() == Instruction::Alloca;
281 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
297 if (!ScalarVFOnly && RepR->isSingleScalar())
300 WorkList.
insert({SinkTo, Candidate});
312 for (
auto &Recipe : *VPBB)
314 InsertIfValidSinkCandidate(VPBB,
Op);
318 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
321 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
326 auto UsersOutsideSinkTo =
328 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
330 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
331 return !U->usesFirstLaneOnly(SinkCandidate);
334 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
336 if (NeedsDuplicating) {
340 if (
auto *SinkCandidateRepR =
346 nullptr , *SinkCandidateRepR,
350 Clone = SinkCandidate->
clone();
360 InsertIfValidSinkCandidate(SinkTo,
Op);
370 if (!EntryBB || EntryBB->size() != 1 ||
380 if (EntryBB->getNumSuccessors() != 2)
385 if (!Succ0 || !Succ1)
388 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
390 if (Succ0->getSingleSuccessor() == Succ1)
392 if (Succ1->getSingleSuccessor() == Succ0)
409 if (!Region1->isReplicator())
411 auto *MiddleBasicBlock =
413 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
418 if (!Region2 || !Region2->isReplicator())
423 if (!Mask1 || Mask1 != Mask2)
426 assert(Mask1 && Mask2 &&
"both region must have conditions");
432 if (TransformedRegions.
contains(Region1))
439 if (!Then1 || !Then2)
459 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
465 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
466 Phi1ToMove.eraseFromParent();
469 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
483 TransformedRegions.
insert(Region1);
486 return !TransformedRegions.
empty();
493 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
494 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
495 auto *BlockInMask = PredRecipe->
getMask();
514 RecipeWithoutMask->getDebugLoc());
538 if (RepR->isPredicated())
557 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
571 if (!VPBB->getParent())
575 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
584 R.moveBefore(*PredVPBB, PredVPBB->
end());
586 auto *ParentRegion = VPBB->getParent();
587 if (ParentRegion && ParentRegion->getExiting() == VPBB)
588 ParentRegion->setExiting(PredVPBB);
589 for (
auto *Succ :
to_vector(VPBB->successors())) {
595 return !WorkList.
empty();
602 bool ShouldSimplify =
true;
603 while (ShouldSimplify) {
619 if (!
IV ||
IV->getTruncInst())
634 for (
auto *U : FindMyCast->
users()) {
636 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
637 FoundUserCast = UserCast;
641 FindMyCast = FoundUserCast;
666 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
679 WidenOriginalIV->dropPoisonGeneratingFlags();
692 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
694 if (IsConditionalAssume)
697 if (R.mayHaveSideEffects())
701 return all_of(R.definedValues(),
702 [](
VPValue *V) { return V->getNumUsers() == 0; });
718 if (!PhiR || PhiR->getNumOperands() != 2)
720 VPUser *PhiUser = PhiR->getSingleUser();
724 if (PhiUser !=
Incoming->getDefiningRecipe() ||
727 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
728 PhiR->eraseFromParent();
729 Incoming->getDefiningRecipe()->eraseFromParent();
744 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
754 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
760 if (ResultTy != StepTy) {
767 Builder.setInsertPoint(VecPreheader);
768 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
770 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
776 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
781 Users.insert_range(V->users());
783 return Users.takeVector();
797 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
834 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
835 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
843 Def->operands(),
true,
845 Clone->insertAfter(Def);
846 Def->replaceAllUsesWith(Clone);
857 PtrIV->replaceAllUsesWith(PtrAdd);
864 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
865 return U->usesScalars(WideIV);
871 Plan,
ID.getKind(),
ID.getInductionOpcode(),
873 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
874 WideIV->getDebugLoc(), Builder);
877 if (!HasOnlyVectorVFs) {
879 "plans containing a scalar VF cannot also include scalable VFs");
880 WideIV->replaceAllUsesWith(Steps);
883 WideIV->replaceUsesWithIf(Steps,
884 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
886 return U.usesFirstLaneOnly(WideIV);
887 return U.usesScalars(WideIV);
903 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
908 if (!Def || Def->getNumOperands() != 2)
916 auto IsWideIVInc = [&]() {
917 auto &
ID = WideIV->getInductionDescriptor();
920 VPValue *IVStep = WideIV->getStepValue();
921 switch (
ID.getInductionOpcode()) {
922 case Instruction::Add:
924 case Instruction::FAdd:
927 case Instruction::FSub:
930 case Instruction::Sub: {
949 return IsWideIVInc() ? WideIV :
nullptr;
969 if (WideIntOrFp && WideIntOrFp->getTruncInst())
982 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
983 FirstActiveLaneType,
DL);
985 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
992 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
995 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
997 VPValue *Start = WideIV->getStartValue();
998 VPValue *Step = WideIV->getStepValue();
999 EndValue =
B.createDerivedIV(
1001 Start, EndValue, Step);
1021 assert(EndValue &&
"end value must have been pre-computed");
1031 VPValue *Step = WideIV->getStepValue();
1034 return B.createNaryOp(Instruction::Sub, {EndValue, Step},
1039 return B.createPtrAdd(EndValue,
1040 B.createNaryOp(Instruction::Sub, {Zero, Step}),
1044 const auto &
ID = WideIV->getInductionDescriptor();
1045 return B.createNaryOp(
1046 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1048 : Instruction::FAdd,
1049 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1064 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1066 if (PredVPBB == MiddleVPBB)
1068 ExitIRI->getOperand(Idx),
1072 ExitIRI->getOperand(Idx), SE);
1074 ExitIRI->setOperand(Idx, Escape);
1091 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1094 ExpR->replaceAllUsesWith(V->second);
1095 ExpR->eraseFromParent();
1104 while (!WorkList.
empty()) {
1106 if (!Seen.
insert(Cur).second)
1114 R->eraseFromParent();
1121static std::optional<std::pair<bool, unsigned>>
1124 std::optional<std::pair<bool, unsigned>>>(R)
1127 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1128 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
1129 return std::make_pair(
true,
I->getVectorIntrinsicID());
1131 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1135 return std::make_pair(
false,
1138 .
Default([](
auto *) {
return std::nullopt; });
1154 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1156 Ops.push_back(
Op->getLiveInIRValue());
1159 auto FoldToIRValue = [&]() ->
Value * {
1161 if (OpcodeOrIID->first) {
1162 if (R.getNumOperands() != 2)
1164 unsigned ID = OpcodeOrIID->second;
1165 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1168 unsigned Opcode = OpcodeOrIID->second;
1177 return Folder.FoldSelect(
Ops[0],
Ops[1],
1180 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1182 case Instruction::Select:
1183 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1184 case Instruction::ICmp:
1185 case Instruction::FCmp:
1188 case Instruction::GetElementPtr: {
1191 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1201 case Instruction::ExtractElement:
1208 if (
Value *V = FoldToIRValue())
1209 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1215 VPlan *Plan = Def->getParent()->getPlan();
1222 return Def->replaceAllUsesWith(V);
1228 PredPHI->replaceAllUsesWith(
Op);
1236 if (TruncTy == ATy) {
1237 Def->replaceAllUsesWith(
A);
1246 : Instruction::ZExt;
1249 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1251 Ext->setUnderlyingValue(UnderlyingExt);
1253 Def->replaceAllUsesWith(Ext);
1255 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1256 Def->replaceAllUsesWith(Trunc);
1264 for (
VPUser *U :
A->users()) {
1266 for (
VPValue *VPV : R->definedValues())
1280 Def->replaceAllUsesWith(
X);
1281 Def->eraseFromParent();
1287 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1291 return Def->replaceAllUsesWith(
X);
1295 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1299 return Def->replaceAllUsesWith(Def->getOperand(1));
1306 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1307 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1308 return Def->replaceAllUsesWith(
1309 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1313 return Def->replaceAllUsesWith(Plan->
getFalse());
1316 return Def->replaceAllUsesWith(
X);
1321 Def->setOperand(0,
C);
1322 Def->setOperand(1,
Y);
1323 Def->setOperand(2,
X);
1332 X->hasMoreThanOneUniqueUser())
1333 return Def->replaceAllUsesWith(
1334 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1337 return Def->replaceAllUsesWith(
A);
1340 return Def->replaceAllUsesWith(
A);
1343 return Def->replaceAllUsesWith(
1344 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1348 return Def->replaceAllUsesWith(
A);
1363 R->setOperand(1,
Y);
1364 R->setOperand(2,
X);
1368 R->replaceAllUsesWith(Cmp);
1373 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1374 Cmp->setDebugLoc(Def->getDebugLoc());
1386 if (
Op->getNumUsers() > 1 ||
1390 }
else if (!UnpairedCmp) {
1391 UnpairedCmp =
Op->getDefiningRecipe();
1395 UnpairedCmp =
nullptr;
1402 if (NewOps.
size() < Def->getNumOperands()) {
1404 return Def->replaceAllUsesWith(NewAnyOf);
1416 return Def->replaceAllUsesWith(NewCmp);
1424 return Def->replaceAllUsesWith(Def->getOperand(1));
1430 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1431 Def->replaceAllUsesWith(
X);
1441 Def->setOperand(1, Def->getOperand(0));
1442 Def->setOperand(0,
Y);
1447 if (Phi->getOperand(0) == Phi->getOperand(1))
1448 Phi->replaceAllUsesWith(Phi->getOperand(0));
1456 Def->replaceAllUsesWith(
1457 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1461 return Def->replaceAllUsesWith(
A);
1467 Def->replaceAllUsesWith(
1468 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1475 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1480 Def->replaceAllUsesWith(
1490 "broadcast operand must be single-scalar");
1491 Def->setOperand(0,
C);
1496 if (Phi->getNumOperands() == 1)
1497 Phi->replaceAllUsesWith(Phi->getOperand(0));
1510 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1511 Phi->getSingleUser() == Def) {
1512 Phi->setOperand(0,
Y);
1513 Def->replaceAllUsesWith(Phi);
1522 return VPR->replaceAllUsesWith(VPR->getOperand(0));
1528 Steps->replaceAllUsesWith(Steps->getOperand(0));
1536 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1538 return PhiR && PhiR->isInLoop();
1544 Def->replaceAllUsesWith(
A);
1553 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1554 return Def->replaceAllUsesWith(
A);
1558 return Def->replaceAllUsesWith(
A);
1587 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1596 !WidenStoreR->isConsecutive()) {
1597 assert(!WidenStoreR->isReverse() &&
1598 "Not consecutive memory recipes shouldn't be reversed");
1599 VPValue *Mask = WidenStoreR->getMask();
1608 {WidenStoreR->getOperand(1)});
1613 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1614 true ,
nullptr , {},
1616 ScalarStore->insertBefore(WidenStoreR);
1617 WidenStoreR->eraseFromParent();
1625 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1626 true ,
nullptr , *RepR ,
1627 *RepR , RepR->getDebugLoc());
1628 Clone->insertBefore(RepOrWidenR);
1630 VPValue *ExtractOp = Clone->getOperand(0);
1636 Clone->setOperand(0, ExtractOp);
1637 RepR->eraseFromParent();
1650 if (!
all_of(RepOrWidenR->users(),
1651 [RepOrWidenR](
const VPUser *U) {
1652 if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1653 unsigned Opcode = VPI->getOpcode();
1654 if (Opcode == VPInstruction::ExtractLastLane ||
1655 Opcode == VPInstruction::ExtractLastPart ||
1656 Opcode == VPInstruction::ExtractPenultimateElement)
1660 return U->usesScalars(RepOrWidenR);
1663 if (Op->getSingleUser() != RepOrWidenR)
1667 bool LiveInNeedsBroadcast =
1668 Op->isLiveIn() && !isa<Constant>(Op->getLiveInIRValue());
1669 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1670 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1675 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1676 true ,
nullptr, *RepOrWidenR);
1677 Clone->insertBefore(RepOrWidenR);
1678 RepOrWidenR->replaceAllUsesWith(Clone);
1680 RepOrWidenR->eraseFromParent();
1716 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1717 UniqueValues.
insert(Blend->getIncomingValue(0));
1718 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1720 UniqueValues.
insert(Blend->getIncomingValue(
I));
1722 if (UniqueValues.
size() == 1) {
1723 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1724 Blend->eraseFromParent();
1728 if (Blend->isNormalized())
1734 unsigned StartIndex = 0;
1735 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1740 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1747 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1749 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1750 if (
I == StartIndex)
1752 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1753 OperandsWithMask.
push_back(Blend->getMask(
I));
1758 OperandsWithMask, Blend->getDebugLoc());
1759 NewBlend->insertBefore(&R);
1761 VPValue *DeadMask = Blend->getMask(StartIndex);
1763 Blend->eraseFromParent();
1768 if (NewBlend->getNumOperands() == 3 &&
1770 VPValue *Inc0 = NewBlend->getOperand(0);
1771 VPValue *Inc1 = NewBlend->getOperand(1);
1772 VPValue *OldMask = NewBlend->getOperand(2);
1773 NewBlend->setOperand(0, Inc1);
1774 NewBlend->setOperand(1, Inc0);
1775 NewBlend->setOperand(2, NewMask);
1802 APInt MaxVal = AlignedTC - 1;
1805 unsigned NewBitWidth =
1811 bool MadeChange =
false;
1820 if (!WideIV || !WideIV->isCanonical() ||
1821 WideIV->hasMoreThanOneUniqueUser() ||
1822 NewIVTy == WideIV->getScalarType())
1827 VPUser *SingleUser = WideIV->getSingleUser();
1836 WideIV->setStartValue(NewStart);
1838 WideIV->setStepValue(NewStep);
1844 Cmp->setOperand(1, NewBTC);
1858 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1860 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1873 const SCEV *VectorTripCount =
1878 "Trip count SCEV must be computable");
1898 auto *Term = &ExitingVPBB->
back();
1911 for (
unsigned Part = 0; Part < UF; ++Part) {
1919 Extracts[Part] = Ext;
1931 match(Phi->getBackedgeValue(),
1933 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1946 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1953 "Expected incoming values of Phi to be ActiveLaneMasks");
1958 EntryALM->setOperand(2, ALMMultiplier);
1959 LoopALM->setOperand(2, ALMMultiplier);
1963 ExtractFromALM(EntryALM, EntryExtracts);
1968 ExtractFromALM(LoopALM, LoopExtracts);
1970 Not->setOperand(0, LoopExtracts[0]);
1973 for (
unsigned Part = 0; Part < UF; ++Part) {
1974 Phis[Part]->setStartValue(EntryExtracts[Part]);
1975 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1988 auto *Term = &ExitingVPBB->
back();
1996 const SCEV *VectorTripCount =
2001 "Trip count SCEV must be computable");
2023 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2024 return R->isCanonical();
2025 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
2026 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2032 R->getScalarType());
2034 HeaderR.eraseFromParent();
2038 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2039 HeaderR.eraseFromParent();
2048 B->setParent(
nullptr);
2057 {}, {}, Term->getDebugLoc());
2061 Term->eraseFromParent();
2088 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2098 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2099 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2108 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
2123 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2126 if (SinkCandidate == Previous)
2130 !Seen.
insert(SinkCandidate).second ||
2143 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2146 "only recipes with a single defined value expected");
2161 if (SinkCandidate == FOR)
2164 SinkCandidate->moveAfter(Previous);
2165 Previous = SinkCandidate;
2183 for (
VPUser *U : FOR->users()) {
2189 [&VPDT, HoistPoint](
VPUser *U) {
2190 auto *R = cast<VPRecipeBase>(U);
2191 return HoistPoint == R ||
2192 VPDT.properlyDominates(HoistPoint, R);
2194 "HoistPoint must dominate all users of FOR");
2196 auto NeedsHoisting = [HoistPoint, &VPDT,
2198 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2199 if (!HoistCandidate)
2204 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2205 "CFG in VPlan should still be flat, without replicate regions");
2207 if (!Visited.
insert(HoistCandidate).second)
2219 return HoistCandidate;
2228 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2231 "only recipes with a single defined value expected");
2243 if (
auto *R = NeedsHoisting(
Op)) {
2246 if (R->getNumDefinedValues() != 1)
2260 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2279 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2282 while (
auto *PrevPhi =
2284 assert(PrevPhi->getParent() == FOR->getParent());
2286 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2304 {FOR, FOR->getBackedgeValue()});
2306 FOR->replaceAllUsesWith(RecurSplice);
2309 RecurSplice->setOperand(0, FOR);
2315 for (
VPUser *U : RecurSplice->users()) {
2326 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
2327 VPValue *PenultimateLastIter =
2329 {PenultimateIndex, FOR->getBackedgeValue()});
2334 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2347 RecurKind RK = PhiR->getRecurrenceKind();
2354 RecWithFlags->dropPoisonGeneratingFlags();
2360struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2362 return Def == getEmptyKey() || Def == getTombstoneKey();
2373 return GEP->getSourceElementType();
2376 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2377 [](
auto *
I) {
return I->getSourceElementType(); })
2378 .
Default([](
auto *) {
return nullptr; });
2382 static bool canHandle(
const VPSingleDefRecipe *Def) {
2391 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2392 C->second == Instruction::ExtractValue)))
2398 return !
Def->mayReadFromMemory();
2402 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2403 const VPlan *Plan =
Def->getParent()->getPlan();
2404 VPTypeAnalysis TypeInfo(*Plan);
2407 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2410 if (RFlags->hasPredicate())
2416 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2419 if (
L->getVPDefID() !=
R->getVPDefID() ||
2421 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2423 !
equal(
L->operands(),
R->operands()))
2426 "must have valid opcode info for both recipes");
2428 if (LFlags->hasPredicate() &&
2429 LFlags->getPredicate() !=
2435 const VPRegionBlock *RegionL =
L->getRegion();
2436 const VPRegionBlock *RegionR =
R->getRegion();
2439 L->getParent() !=
R->getParent())
2441 const VPlan *Plan =
L->getParent()->getPlan();
2442 VPTypeAnalysis TypeInfo(*Plan);
2443 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2458 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2462 if (!VPDT.
dominates(V->getParent(), VPBB))
2467 Def->replaceAllUsesWith(V);
2486 "Expected vector prehader's successor to be the vector loop region");
2493 return !Op->isDefinedOutsideLoopRegions();
2496 R.moveBefore(*Preheader, Preheader->
end());
2520 VPValue *ResultVPV = R.getVPSingleValue();
2522 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2523 if (!NewResSizeInBits)
2536 (void)OldResSizeInBits;
2544 VPW->dropPoisonGeneratingFlags();
2546 if (OldResSizeInBits != NewResSizeInBits &&
2551 Ext->insertAfter(&R);
2553 Ext->setOperand(0, ResultVPV);
2554 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2557 "Only ICmps should not need extending the result.");
2566 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2567 auto *
Op = R.getOperand(Idx);
2568 unsigned OpSizeInBits =
2570 if (OpSizeInBits == NewResSizeInBits)
2572 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2573 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2575 R.setOperand(Idx, ProcessedIter->second);
2583 Builder.setInsertPoint(&R);
2585 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2586 ProcessedIter->second = NewOp;
2587 R.setOperand(Idx, NewOp);
2602 assert(VPBB->getNumSuccessors() == 2 &&
2603 "Two successors expected for BranchOnCond");
2604 unsigned RemovedIdx;
2615 "There must be a single edge between VPBB and its successor");
2624 VPBB->back().eraseFromParent();
2686 VPValue *StartV = CanonicalIVPHI->getStartValue();
2688 auto *CanonicalIVIncrement =
2692 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2693 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2703 VPValue *TripCount, *IncrementValue;
2708 IncrementValue = CanonicalIVIncrement;
2714 IncrementValue = CanonicalIVPHI;
2718 auto *EntryIncrement = Builder.createOverflowingOp(
2726 {EntryIncrement, TC, ALMMultiplier},
DL,
2727 "active.lane.mask.entry");
2733 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2738 Builder.setInsertPoint(OriginalTerminator);
2739 auto *InLoopIncrement =
2741 {IncrementValue}, {
false,
false},
DL);
2743 {InLoopIncrement, TripCount, ALMMultiplier},
2744 DL,
"active.lane.mask.next");
2749 auto *NotMask = Builder.createNot(ALM,
DL);
2762 auto *FoundWidenCanonicalIVUser =
find_if(
2766 "Must have at most one VPWideCanonicalIVRecipe");
2767 if (FoundWidenCanonicalIVUser !=
2769 auto *WideCanonicalIV =
2771 WideCanonicalIVs.
push_back(WideCanonicalIV);
2779 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2780 WideCanonicalIVs.
push_back(WidenOriginalIV);
2786 for (
auto *Wide : WideCanonicalIVs) {
2792 assert(VPI->getOperand(0) == Wide &&
2793 "WidenCanonicalIV must be the first operand of the compare");
2794 assert(!HeaderMask &&
"Multiple header masks found?");
2802 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2805 UseActiveLaneMaskForControlFlow) &&
2806 "DataAndControlFlowWithoutRuntimeCheck implies "
2807 "UseActiveLaneMaskForControlFlow");
2810 auto *FoundWidenCanonicalIVUser =
find_if(
2812 assert(FoundWidenCanonicalIVUser &&
2813 "Must have widened canonical IV when tail folding!");
2815 auto *WideCanonicalIV =
2818 if (UseActiveLaneMaskForControlFlow) {
2828 nullptr,
"active.lane.mask");
2844 template <
typename OpTy>
bool match(OpTy *V)
const {
2855template <
typename Op0_t,
typename Op1_t>
2874 VPValue *Addr, *Mask, *EndPtr;
2877 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2879 EVLEndPtr->insertBefore(&CurRecipe);
2880 EVLEndPtr->setOperand(1, &EVL);
2884 if (
match(&CurRecipe,
2890 if (
match(&CurRecipe,
2895 AdjustEndPtr(EndPtr), EVL, Mask);
2908 AdjustEndPtr(EndPtr), EVL, Mask);
2911 if (Rdx->isConditional() &&
2916 if (Interleave->getMask() &&
2921 if (
match(&CurRecipe,
2930 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2953 "User of VF that we can't transform to EVL.");
2959 [&LoopRegion, &Plan](
VPUser *U) {
2961 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2962 m_Specific(&Plan.getVFxUF()))) ||
2963 isa<VPWidenPointerInductionRecipe>(U);
2965 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2966 "increment of the canonical induction.");
2986 MaxEVL = Builder.createScalarZExtOrTrunc(
2990 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2991 VPValue *PrevEVL = Builder.createScalarPhi(
3005 Intrinsic::experimental_vp_splice,
3006 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3010 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3028 VPValue *EVLMask = Builder.createICmp(
3046 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
3047 "New recipe must define the same number of values as the "
3052 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
3053 VPValue *CurVPV = CurRecipe->getVPValue(
I);
3065 R->eraseFromParent();
3115 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3123 VPValue *StartV = CanonicalIVPHI->getStartValue();
3127 EVLPhi->insertAfter(CanonicalIVPHI);
3128 VPBuilder Builder(Header, Header->getFirstNonPhi());
3131 VPPhi *AVLPhi = Builder.createScalarPhi(
3135 if (MaxSafeElements) {
3145 auto *CanonicalIVIncrement =
3147 Builder.setInsertPoint(CanonicalIVIncrement);
3151 OpVPEVL = Builder.createScalarZExtOrTrunc(
3152 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3154 auto *NextEVLIV = Builder.createOverflowingOp(
3155 Instruction::Add, {OpVPEVL, EVLPhi},
3156 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3157 CanonicalIVIncrement->hasNoSignedWrap()},
3158 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
3159 EVLPhi->addOperand(NextEVLIV);
3161 VPValue *NextAVL = Builder.createOverflowingOp(
3162 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
3170 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
3171 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3185 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
3196 [[maybe_unused]]
bool FoundAVL =
3199 assert(FoundAVL &&
"Didn't find AVL?");
3207 [[maybe_unused]]
bool FoundAVLNext =
3210 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3221 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3224 "Unexpected canonical iv");
3230 CanonicalIV->eraseFromParent();
3244 "Expected BranchOnCond with ICmp comparing EVL increment with vector "
3249 LatchExitingBr->setOperand(0,
3261 return R->getRegion() ||
3265 for (
const SCEV *Stride : StridesMap.
values()) {
3268 const APInt *StrideConst;
3285 unsigned BW = U->getType()->getScalarSizeInBits();
3291 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3298 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3301 if (NewSCEV != ScevExpr) {
3303 ExpSCEV->replaceAllUsesWith(NewExp);
3312 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3316 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3321 while (!Worklist.
empty()) {
3324 if (!Visited.
insert(CurRec).second)
3346 RecWithFlags->isDisjoint()) {
3349 Instruction::Add, {
A,
B}, {
false,
false},
3350 RecWithFlags->getDebugLoc());
3351 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3352 RecWithFlags->replaceAllUsesWith(New);
3353 RecWithFlags->eraseFromParent();
3356 RecWithFlags->dropPoisonGeneratingFlags();
3361 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3362 "found instruction with poison generating flags not covered by "
3363 "VPRecipeWithIRFlags");
3368 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3380 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3381 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3382 if (AddrDef && WidenRec->isConsecutive() &&
3383 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3384 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3386 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3390 InterleaveRec->getInterleaveGroup();
3391 bool NeedPredication =
false;
3393 I < NumMembers; ++
I) {
3396 NeedPredication |= BlockNeedsPredication(Member->getParent());
3399 if (NeedPredication)
3400 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3412 if (InterleaveGroups.empty())
3419 for (
const auto *IG : InterleaveGroups) {
3425 StoredValues.
push_back(StoreR->getStoredValue());
3426 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3433 StoredValues.
push_back(StoreR->getStoredValue());
3437 bool NeedsMaskForGaps =
3438 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3439 (!StoredValues.
empty() && !IG->isFull());
3451 VPValue *Addr = Start->getAddr();
3460 assert(IG->getIndex(IRInsertPos) != 0 &&
3461 "index of insert position shouldn't be zero");
3465 IG->getIndex(IRInsertPos),
3469 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3475 if (IG->isReverse()) {
3478 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3479 ReversePtr->insertBefore(InsertPos);
3483 InsertPos->getMask(), NeedsMaskForGaps,
3484 InterleaveMD, InsertPos->getDebugLoc());
3485 VPIG->insertBefore(InsertPos);
3488 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3491 if (!Member->getType()->isVoidTy()) {
3550 AddOp = Instruction::Add;
3551 MulOp = Instruction::Mul;
3553 AddOp =
ID.getInductionOpcode();
3554 MulOp = Instruction::FMul;
3562 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3563 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3565 Flags.dropPoisonGeneratingFlags();
3574 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3579 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3580 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3586 WidePHI->insertBefore(WidenIVR);
3597 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3601 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3604 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3607 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3614 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3617 WidePHI->addOperand(
Next);
3645 VPlan *Plan = R->getParent()->getPlan();
3646 VPValue *Start = R->getStartValue();
3647 VPValue *Step = R->getStepValue();
3648 VPValue *VF = R->getVFValue();
3650 assert(R->getInductionDescriptor().getKind() ==
3652 "Not a pointer induction according to InductionDescriptor!");
3655 "Recipe should have been replaced");
3661 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3665 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3668 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3669 VPValue *PtrAdd = Builder.createNaryOp(
3671 R->replaceAllUsesWith(PtrAdd);
3676 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3678 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3681 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3690 if (!R->isReplicator())
3694 R->dissolveToCFGLoop();
3716 WidenIVR->replaceAllUsesWith(PtrAdd);
3729 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3730 Select = Builder.createSelect(Blend->getMask(
I),
3731 Blend->getIncomingValue(
I),
Select,
3732 R.getDebugLoc(),
"predphi");
3733 Blend->replaceAllUsesWith(
Select);
3748 for (
VPValue *
Op : LastActiveL->operands()) {
3749 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3754 VPValue *FirstInactiveLane = Builder.createNaryOp(
3756 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3761 VPValue *LastLane = Builder.createNaryOp(
3762 Instruction::Sub, {FirstInactiveLane, One},
3763 LastActiveL->getDebugLoc(),
"last.active.lane");
3774 DebugLoc DL = BranchOnCountInst->getDebugLoc();
3777 ToRemove.push_back(BranchOnCountInst);
3792 ? Instruction::UIToFP
3793 : Instruction::Trunc;
3794 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3800 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3805 Flags = {VPI->getFastMathFlags()};
3810 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3812 VPI->replaceAllUsesWith(VectorStep);
3818 R->eraseFromParent();
3831 "unsupported early exit VPBB");
3842 "Terminator must be be BranchOnCond");
3843 VPValue *CondOfEarlyExitingVPBB =
3845 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3846 ? CondOfEarlyExitingVPBB
3847 : Builder.createNot(CondOfEarlyExitingVPBB);
3864 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3869 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3870 if (ExitIRI->getNumOperands() != 1) {
3873 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
3876 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3877 if (!IncomingFromEarlyExit->
isLiveIn()) {
3885 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3896 "Unexpected terminator");
3897 auto *IsLatchExitTaken =
3899 LatchExitingBranch->getOperand(1));
3900 auto *AnyExitTaken = Builder.createNaryOp(
3901 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3903 LatchExitingBranch->eraseFromParent();
3913 Type *RedTy = Ctx.Types.inferScalarType(Red);
3914 VPValue *VecOp = Red->getVecOp();
3917 auto IsExtendedRedValidAndClampRange =
3929 if (Red->isPartialReduction()) {
3934 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3935 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3938 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3939 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3940 Red->getFastMathFlags(),
CostKind);
3942 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3950 IsExtendedRedValidAndClampRange(
3953 Ctx.Types.inferScalarType(
A)))
3971 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3974 Type *RedTy = Ctx.Types.inferScalarType(Red);
3977 auto IsMulAccValidAndClampRange =
3984 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3987 if (Red->isPartialReduction()) {
3989 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3992 MulAccCost = Ctx.TTI.getPartialReductionCost(
3993 Opcode, SrcTy, SrcTy2, RedTy, VF,
4003 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
4007 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4009 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4017 ExtCost += Ext0->computeCost(VF, Ctx);
4019 ExtCost += Ext1->computeCost(VF, Ctx);
4021 ExtCost += OuterExt->computeCost(VF, Ctx);
4023 return MulAccCost.
isValid() &&
4024 MulAccCost < ExtCost + MulCost + RedCost;
4029 VPValue *VecOp = Red->getVecOp();
4047 if (!ExtA || ExtB || !ValB->
isLiveIn())
4063 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4064 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4065 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4066 Mul->setOperand(1, ExtB);
4076 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4081 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4088 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4105 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4114 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4115 Ext0->getOpcode() == Ext1->getOpcode() &&
4116 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4118 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4119 *Ext0, *Ext0, Ext0->getDebugLoc());
4120 NewExt0->insertBefore(Ext0);
4125 Ext->getResultType(),
nullptr, *Ext1,
4126 *Ext1, Ext1->getDebugLoc());
4129 Mul->setOperand(0, NewExt0);
4130 Mul->setOperand(1, NewExt1);
4131 Red->setOperand(1,
Mul);
4144 auto IP = std::next(Red->getIterator());
4145 auto *VPBB = Red->getParent();
4155 Red->replaceAllUsesWith(AbstractR);
4185 for (
VPValue *VPV : VPValues) {
4187 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
4195 if (
User->usesScalars(VPV))
4198 HoistPoint = HoistBlock->
begin();
4202 "All users must be in the vector preheader or dominated by it");
4207 VPV->replaceUsesWithIf(Broadcast,
4208 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4209 return Broadcast != &U && !U.usesScalars(VPV);
4226 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4227 RepR->getOpcode() != Instruction::Load)
4230 VPValue *Addr = RepR->getOperand(0);
4233 if (!
Loc.AATags.Scope)
4238 if (R.mayWriteToMemory()) {
4240 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4248 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4252 const AAMDNodes &LoadAA = LoadLoc.AATags;
4268 return CommonMetadata;
4271template <
unsigned Opcode>
4275 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4276 "Only Load and Store opcodes supported");
4277 constexpr bool IsLoad = (Opcode == Instruction::Load);
4287 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4291 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4294 RecipesByAddress[AddrSCEV].push_back(RepR);
4301 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4303 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4304 if (Recipes.size() < 2)
4312 VPValue *MaskI = RecipeI->getMask();
4313 Type *TypeI = GetLoadStoreValueType(RecipeI);
4319 bool HasComplementaryMask =
false;
4324 VPValue *MaskJ = RecipeJ->getMask();
4325 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4326 if (TypeI == TypeJ) {
4336 if (HasComplementaryMask) {
4337 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4347template <
typename InstType>
4366 for (
auto &Group :
Groups) {
4391 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4392 false,
nullptr, *EarliestLoad,
4395 UnpredicatedLoad->insertBefore(EarliestLoad);
4399 Load->replaceAllUsesWith(UnpredicatedLoad);
4400 Load->eraseFromParent();
4410 if (!StoreLoc || !StoreLoc->AATags.Scope)
4416 StoresToSink.
end());
4420 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo);
4434 for (
auto &Group :
Groups) {
4451 VPValue *SelectedValue = Group[0]->getOperand(0);
4454 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4455 VPValue *Mask = Group[
I]->getMask();
4457 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4465 auto *UnpredicatedStore =
4467 {SelectedValue, LastStore->getOperand(1)},
4469 nullptr, *LastStore, CommonMetadata);
4470 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4474 Store->eraseFromParent();
4481 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4482 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4516 auto *TCMO = Builder.createNaryOp(
4544 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4546 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4553 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4563 DefR->replaceUsesWithIf(
4564 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4566 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4580 for (
VPValue *Def : R.definedValues()) {
4593 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4595 return U->usesScalars(Def) &&
4598 if (
none_of(Def->users(), IsCandidateUnpackUser))
4605 Unpack->insertAfter(&R);
4606 Def->replaceUsesWithIf(Unpack,
4607 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4608 return IsCandidateUnpackUser(&U);
4618 bool RequiresScalarEpilogue) {
4620 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4639 if (TailByMasking) {
4640 TC = Builder.createNaryOp(
4642 {TC, Builder.createNaryOp(Instruction::Sub,
4653 Builder.createNaryOp(Instruction::URem, {TC, Step},
4662 if (RequiresScalarEpilogue) {
4664 "requiring scalar epilogue is not supported with fail folding");
4667 R = Builder.createSelect(IsZero, Step, R);
4670 VPValue *Res = Builder.createNaryOp(
4689 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4696 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4700 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4705 VPValue *MulByUF = Builder.createOverflowingOp(
4706 Instruction::Mul, {RuntimeVF, UF}, {
true,
false});
4715 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4723 const SCEV *Expr = ExpSCEV->getSCEV();
4726 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4731 ExpSCEV->eraseFromParent();
4734 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4735 "after any VPIRInstructions");
4738 auto EI = Entry->begin();
4748 return ExpandedSCEVs;
4764 return Member0Op == OpV;
4766 return !W->getMask() && Member0Op == OpV;
4768 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4779 if (!InterleaveR || InterleaveR->
getMask())
4782 Type *GroupElementTy =
nullptr;
4786 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4787 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4794 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4795 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4804 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4805 GroupSize == VectorRegWidth;
4813 return RepR && RepR->isSingleScalar();
4820 auto *R = V->getDefiningRecipe();
4828 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4829 WideMember0->setOperand(
4838 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4840 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4841 false, {}, LoadGroup->getDebugLoc());
4842 L->insertBefore(LoadGroup);
4848 assert(RepR->isSingleScalar() &&
4850 "must be a single scalar load");
4851 NarrowedOps.
insert(RepR);
4856 VPValue *PtrOp = WideLoad->getAddr();
4858 PtrOp = VecPtr->getOperand(0);
4863 nullptr, {}, *WideLoad);
4864 N->insertBefore(WideLoad);
4894 if (R.mayWriteToMemory() && !InterleaveR)
4916 if (InterleaveR->getStoredValues().empty())
4921 auto *Member0 = InterleaveR->getStoredValues()[0];
4923 all_of(InterleaveR->getStoredValues(),
4924 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4932 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4935 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4936 return IR && IR->getInterleaveGroup()->isFull() &&
4937 IR->getVPValue(Op.index()) == Op.value();
4949 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4951 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4952 R->getNumOperands() > 2)
4955 [WideMember0, Idx =
I](
const auto &
P) {
4956 const auto &[OpIdx, OpV] = P;
4957 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4964 if (StoreGroups.
empty())
4970 for (
auto *StoreGroup : StoreGroups) {
4976 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
4977 false, {}, StoreGroup->getDebugLoc());
4978 S->insertBefore(StoreGroup);
4979 StoreGroup->eraseFromParent();
4994 Instruction::Mul, {VScale, UF}, {
true,
false});
4998 Inc->setOperand(1, UF);
5017 "must have a BranchOnCond");
5020 if (VF.
isScalable() && VScaleForTuning.has_value())
5021 VectorStep *= *VScaleForTuning;
5022 assert(VectorStep > 0 &&
"trip count should not be zero");
5026 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5039 if (WideIntOrFp && WideIntOrFp->getTruncInst())
5046 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
5049 Start, VectorTC, Step);
5072 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5082 IVEndValues[WideIVR] = EndValue;
5083 ResumePhiR->setOperand(0, EndValue);
5084 ResumePhiR->setName(
"bc.resume.val");
5091 "should only skip truncated wide inductions");
5099 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
5101 "Cannot handle loops with uncountable early exits");
5107 "vector.recur.extract");
5109 ResumePhiR->setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
5110 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
5119 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
5120 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5132 "Cannot handle loops with uncountable early exits");
5205 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5219 "vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, ScalarEvolution &SE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...