56 if (!VPBB->getParent())
59 auto EndIter = Term ? Term->getIterator() : VPBB->end();
64 VPValue *VPV = Ingredient.getVPSingleValue();
81 *Load, Ingredient.getOperand(0),
nullptr ,
83 Ingredient.getDebugLoc());
86 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
87 nullptr ,
false ,
false , *VPI,
88 Ingredient.getDebugLoc());
91 Ingredient.getDebugLoc());
99 *VPI, CI->getDebugLoc());
102 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
106 *VPI, Ingredient.getDebugLoc());
110 "inductions must be created earlier");
119 "Only recpies with zero or one defined values expected");
120 Ingredient.eraseFromParent();
137 if (
A->getOpcode() != Instruction::Store ||
138 B->getOpcode() != Instruction::Store)
148 const APInt *Distance;
154 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
156 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
162 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
164 auto VFs =
B->getParent()->getPlan()->vectorFactors();
168 return Distance->
abs().
uge(
176 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
177 L(L), TypeInfo(TypeInfo) {}
184 return ExcludeRecipes.contains(&R) ||
185 (Store && isNoAliasViaDistance(Store, &GroupLeader));
198 std::optional<SinkStoreInfo> SinkInfo = {}) {
199 bool CheckReads = SinkInfo.has_value();
208 "Expected at most one successor in block chain");
211 if (SinkInfo && SinkInfo->shouldSkip(R))
215 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
226 if (CheckReads &&
R.mayReadFromMemory() &&
233 Loc->AATags.NoAlias))
253 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
258 return RepR && RepR->getOpcode() == Instruction::Alloca;
267 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
283 if (!ScalarVFOnly && RepR->isSingleScalar())
286 WorkList.
insert({SinkTo, Candidate});
298 for (
auto &Recipe : *VPBB)
300 InsertIfValidSinkCandidate(VPBB,
Op);
304 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
307 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
312 auto UsersOutsideSinkTo =
314 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
316 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
317 return !U->usesFirstLaneOnly(SinkCandidate);
320 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
322 if (NeedsDuplicating) {
326 if (
auto *SinkCandidateRepR =
332 nullptr , *SinkCandidateRepR,
336 Clone = SinkCandidate->
clone();
346 InsertIfValidSinkCandidate(SinkTo,
Op);
356 if (!EntryBB || EntryBB->size() != 1 ||
366 if (EntryBB->getNumSuccessors() != 2)
371 if (!Succ0 || !Succ1)
374 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
376 if (Succ0->getSingleSuccessor() == Succ1)
378 if (Succ1->getSingleSuccessor() == Succ0)
395 if (!Region1->isReplicator())
397 auto *MiddleBasicBlock =
399 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
404 if (!Region2 || !Region2->isReplicator())
409 if (!Mask1 || Mask1 != Mask2)
412 assert(Mask1 && Mask2 &&
"both region must have conditions");
418 if (TransformedRegions.
contains(Region1))
425 if (!Then1 || !Then2)
445 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
451 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
452 Phi1ToMove.eraseFromParent();
455 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
469 TransformedRegions.
insert(Region1);
472 return !TransformedRegions.
empty();
479 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
480 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
481 auto *BlockInMask = PredRecipe->
getMask();
500 RecipeWithoutMask->getDebugLoc());
524 if (RepR->isPredicated())
543 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
557 if (!VPBB->getParent())
561 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
570 R.moveBefore(*PredVPBB, PredVPBB->
end());
572 auto *ParentRegion = VPBB->getParent();
573 if (ParentRegion && ParentRegion->getExiting() == VPBB)
574 ParentRegion->setExiting(PredVPBB);
575 for (
auto *Succ :
to_vector(VPBB->successors())) {
581 return !WorkList.
empty();
588 bool ShouldSimplify =
true;
589 while (ShouldSimplify) {
605 if (!
IV ||
IV->getTruncInst())
620 for (
auto *U : FindMyCast->
users()) {
622 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
623 FoundUserCast = UserCast;
627 FindMyCast = FoundUserCast;
652 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
665 WidenOriginalIV->dropPoisonGeneratingFlags();
678 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
680 if (IsConditionalAssume)
683 if (R.mayHaveSideEffects())
687 return all_of(R.definedValues(),
688 [](
VPValue *V) { return V->getNumUsers() == 0; });
704 if (!PhiR || PhiR->getNumOperands() != 2)
706 VPUser *PhiUser = PhiR->getSingleUser();
710 if (PhiUser !=
Incoming->getDefiningRecipe() ||
713 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
714 PhiR->eraseFromParent();
715 Incoming->getDefiningRecipe()->eraseFromParent();
730 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
740 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
746 if (ResultTy != StepTy) {
753 Builder.setInsertPoint(VecPreheader);
754 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
756 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
762 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
767 Users.insert_range(V->users());
769 return Users.takeVector();
783 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
820 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
821 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
829 Def->operands(),
true,
831 Clone->insertAfter(Def);
832 Def->replaceAllUsesWith(Clone);
843 PtrIV->replaceAllUsesWith(PtrAdd);
850 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
851 return U->usesScalars(WideIV);
857 Plan,
ID.getKind(),
ID.getInductionOpcode(),
859 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
860 WideIV->getDebugLoc(), Builder);
863 if (!HasOnlyVectorVFs) {
865 "plans containing a scalar VF cannot also include scalable VFs");
866 WideIV->replaceAllUsesWith(Steps);
869 WideIV->replaceUsesWithIf(Steps,
870 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
872 return U.usesFirstLaneOnly(WideIV);
873 return U.usesScalars(WideIV);
889 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
894 if (!Def || Def->getNumOperands() != 2)
902 auto IsWideIVInc = [&]() {
903 auto &
ID = WideIV->getInductionDescriptor();
906 VPValue *IVStep = WideIV->getStepValue();
907 switch (
ID.getInductionOpcode()) {
908 case Instruction::Add:
910 case Instruction::FAdd:
912 case Instruction::FSub:
915 case Instruction::Sub: {
935 return IsWideIVInc() ? WideIV :
nullptr;
955 if (WideIntOrFp && WideIntOrFp->getTruncInst())
968 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
969 FirstActiveLaneType,
DL);
971 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
978 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
981 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
983 VPIRValue *Start = WideIV->getStartValue();
984 VPValue *Step = WideIV->getStepValue();
985 EndValue =
B.createDerivedIV(
987 Start, EndValue, Step);
1007 assert(EndValue &&
"end value must have been pre-computed");
1017 VPValue *Step = WideIV->getStepValue();
1020 return B.createNaryOp(Instruction::Sub, {EndValue, Step},
1025 return B.createPtrAdd(EndValue,
1026 B.createNaryOp(Instruction::Sub, {Zero, Step}),
1030 const auto &
ID = WideIV->getInductionDescriptor();
1031 return B.createNaryOp(
1032 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1034 : Instruction::FAdd,
1035 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1050 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1052 if (PredVPBB == MiddleVPBB)
1054 ExitIRI->getOperand(Idx),
1058 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1060 ExitIRI->setOperand(Idx, Escape);
1077 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1080 ExpR->replaceAllUsesWith(V->second);
1081 ExpR->eraseFromParent();
1090 while (!WorkList.
empty()) {
1092 if (!Seen.
insert(Cur).second)
1100 R->eraseFromParent();
1107static std::optional<std::pair<bool, unsigned>>
1110 std::optional<std::pair<bool, unsigned>>>(R)
1113 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1115 return std::make_pair(
true,
I->getVectorIntrinsicID());
1117 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1121 return std::make_pair(
false,
1124 .
Default([](
auto *) {
return std::nullopt; });
1142 Value *V =
Op->getUnderlyingValue();
1148 auto FoldToIRValue = [&]() ->
Value * {
1150 if (OpcodeOrIID->first) {
1151 if (R.getNumOperands() != 2)
1153 unsigned ID = OpcodeOrIID->second;
1154 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1157 unsigned Opcode = OpcodeOrIID->second;
1166 return Folder.FoldSelect(
Ops[0],
Ops[1],
1169 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1171 case Instruction::Select:
1172 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1173 case Instruction::ICmp:
1174 case Instruction::FCmp:
1177 case Instruction::GetElementPtr: {
1180 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1190 case Instruction::ExtractElement:
1197 if (
Value *V = FoldToIRValue())
1198 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1204 VPlan *Plan = Def->getParent()->getPlan();
1211 return Def->replaceAllUsesWith(V);
1217 PredPHI->replaceAllUsesWith(
Op);
1225 if (TruncTy == ATy) {
1226 Def->replaceAllUsesWith(
A);
1235 : Instruction::ZExt;
1238 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1240 Ext->setUnderlyingValue(UnderlyingExt);
1242 Def->replaceAllUsesWith(Ext);
1244 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1245 Def->replaceAllUsesWith(Trunc);
1253 for (
VPUser *U :
A->users()) {
1255 for (
VPValue *VPV : R->definedValues())
1269 Def->replaceAllUsesWith(
X);
1270 Def->eraseFromParent();
1276 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1280 return Def->replaceAllUsesWith(
X);
1284 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1288 return Def->replaceAllUsesWith(Def->getOperand(1));
1295 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1296 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1297 return Def->replaceAllUsesWith(
1298 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1302 return Def->replaceAllUsesWith(Plan->
getFalse());
1305 return Def->replaceAllUsesWith(
X);
1310 return Def->replaceAllUsesWith(Builder.createNot(
C));
1314 Def->setOperand(0,
C);
1315 Def->setOperand(1,
Y);
1316 Def->setOperand(2,
X);
1325 X->hasMoreThanOneUniqueUser())
1326 return Def->replaceAllUsesWith(
1327 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1330 return Def->replaceAllUsesWith(
A);
1333 return Def->replaceAllUsesWith(
A);
1336 return Def->replaceAllUsesWith(
1337 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1341 return Def->replaceAllUsesWith(Builder.createNaryOp(
1343 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1348 const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
1349 bool IsInReplicateRegion = ParentRegion && ParentRegion->
isReplicator();
1352 return Def->replaceAllUsesWith(Builder.createNaryOp(
1354 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, {},
1355 Def->getDebugLoc()));
1359 return Def->replaceAllUsesWith(
A);
1374 R->setOperand(1,
Y);
1375 R->setOperand(2,
X);
1379 R->replaceAllUsesWith(Cmp);
1384 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1385 Cmp->setDebugLoc(Def->getDebugLoc());
1397 if (
Op->getNumUsers() > 1 ||
1401 }
else if (!UnpairedCmp) {
1402 UnpairedCmp =
Op->getDefiningRecipe();
1406 UnpairedCmp =
nullptr;
1413 if (NewOps.
size() < Def->getNumOperands()) {
1415 return Def->replaceAllUsesWith(NewAnyOf);
1427 return Def->replaceAllUsesWith(NewCmp);
1435 return Def->replaceAllUsesWith(Def->getOperand(1));
1441 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1442 Def->replaceAllUsesWith(
X);
1452 Def->setOperand(1, Def->getOperand(0));
1453 Def->setOperand(0,
Y);
1458 if (Phi->getOperand(0) == Phi->getOperand(1))
1459 Phi->replaceAllUsesWith(Phi->getOperand(0));
1467 Def->replaceAllUsesWith(
1468 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1472 return Def->replaceAllUsesWith(
A);
1478 Def->replaceAllUsesWith(
1479 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1486 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1491 Def->replaceAllUsesWith(
1501 "broadcast operand must be single-scalar");
1502 Def->setOperand(0,
C);
1507 if (Phi->getNumOperands() == 1)
1508 Phi->replaceAllUsesWith(Phi->getOperand(0));
1513 if (Def->getNumOperands() == 1 &&
1515 return Def->replaceAllUsesWith(IRV);
1528 return Def->replaceAllUsesWith(
A);
1531 Def->replaceAllUsesWith(Builder.createNaryOp(
1532 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1540 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1541 Phi->getSingleUser() == Def) {
1542 Phi->setOperand(0,
Y);
1543 Def->replaceAllUsesWith(Phi);
1558 Steps->replaceAllUsesWith(Steps->getOperand(0));
1566 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1568 return PhiR && PhiR->isInLoop();
1574 Def->replaceAllUsesWith(
A);
1583 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1584 return Def->replaceAllUsesWith(
A);
1588 return Def->replaceAllUsesWith(
A);
1617 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1626 !WidenStoreR->isConsecutive()) {
1627 assert(!WidenStoreR->isReverse() &&
1628 "Not consecutive memory recipes shouldn't be reversed");
1629 VPValue *Mask = WidenStoreR->getMask();
1638 {WidenStoreR->getOperand(1)});
1643 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1644 true ,
nullptr , {},
1646 ScalarStore->insertBefore(WidenStoreR);
1647 WidenStoreR->eraseFromParent();
1655 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1656 true ,
nullptr , *RepR ,
1657 *RepR , RepR->getDebugLoc());
1658 Clone->insertBefore(RepOrWidenR);
1660 VPValue *ExtractOp = Clone->getOperand(0);
1666 Clone->setOperand(0, ExtractOp);
1667 RepR->eraseFromParent();
1680 if (!
all_of(RepOrWidenR->users(),
1681 [RepOrWidenR](
const VPUser *U) {
1682 if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1683 unsigned Opcode = VPI->getOpcode();
1684 if (Opcode == VPInstruction::ExtractLastLane ||
1685 Opcode == VPInstruction::ExtractLastPart ||
1686 Opcode == VPInstruction::ExtractPenultimateElement)
1690 return U->usesScalars(RepOrWidenR);
1693 if (Op->getSingleUser() != RepOrWidenR)
1697 auto *IRV = dyn_cast<VPIRValue>(Op);
1698 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1699 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1700 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1705 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1706 true ,
nullptr, *RepOrWidenR);
1707 Clone->insertBefore(RepOrWidenR);
1708 RepOrWidenR->replaceAllUsesWith(Clone);
1710 RepOrWidenR->eraseFromParent();
1746 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1747 UniqueValues.
insert(Blend->getIncomingValue(0));
1748 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1750 UniqueValues.
insert(Blend->getIncomingValue(
I));
1752 if (UniqueValues.
size() == 1) {
1753 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1754 Blend->eraseFromParent();
1758 if (Blend->isNormalized())
1764 unsigned StartIndex = 0;
1765 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1770 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1777 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1779 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1780 if (
I == StartIndex)
1782 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1783 OperandsWithMask.
push_back(Blend->getMask(
I));
1788 OperandsWithMask, Blend->getDebugLoc());
1789 NewBlend->insertBefore(&R);
1791 VPValue *DeadMask = Blend->getMask(StartIndex);
1793 Blend->eraseFromParent();
1798 if (NewBlend->getNumOperands() == 3 &&
1800 VPValue *Inc0 = NewBlend->getOperand(0);
1801 VPValue *Inc1 = NewBlend->getOperand(1);
1802 VPValue *OldMask = NewBlend->getOperand(2);
1803 NewBlend->setOperand(0, Inc1);
1804 NewBlend->setOperand(1, Inc0);
1805 NewBlend->setOperand(2, NewMask);
1832 APInt MaxVal = AlignedTC - 1;
1835 unsigned NewBitWidth =
1841 bool MadeChange =
false;
1850 if (!WideIV || !WideIV->isCanonical() ||
1851 WideIV->hasMoreThanOneUniqueUser() ||
1852 NewIVTy == WideIV->getScalarType())
1857 VPUser *SingleUser = WideIV->getSingleUser();
1866 WideIV->setStartValue(NewStart);
1868 WideIV->setStepValue(NewStep);
1874 Cmp->setOperand(1, NewBTC);
1888 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1890 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
1903 const SCEV *VectorTripCount =
1908 "Trip count SCEV must be computable");
1929 auto *Term = &ExitingVPBB->
back();
1942 for (
unsigned Part = 0; Part < UF; ++Part) {
1950 Extracts[Part] = Ext;
1962 match(Phi->getBackedgeValue(),
1964 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1977 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1984 "Expected incoming values of Phi to be ActiveLaneMasks");
1989 EntryALM->setOperand(2, ALMMultiplier);
1990 LoopALM->setOperand(2, ALMMultiplier);
1994 ExtractFromALM(EntryALM, EntryExtracts);
1999 ExtractFromALM(LoopALM, LoopExtracts);
2001 Not->setOperand(0, LoopExtracts[0]);
2004 for (
unsigned Part = 0; Part < UF; ++Part) {
2005 Phis[Part]->setStartValue(EntryExtracts[Part]);
2006 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2019 auto *Term = &ExitingVPBB->
back();
2026 const SCEV *VectorTripCount =
2032 "Trip count SCEV must be computable");
2057 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2058 return R->isCanonical();
2059 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
2060 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2066 R->getScalarType());
2068 HeaderR.eraseFromParent();
2072 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2073 HeaderR.eraseFromParent();
2083 B->setParent(
nullptr);
2092 if (Exits.
size() != 1) {
2094 "BranchOnTwoConds needs 2 remaining exits");
2096 Term->getOperand(0));
2105 Term->setOperand(1, Plan.
getTrue());
2110 {}, {}, Term->getDebugLoc());
2114 Term->eraseFromParent();
2141 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2151 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2152 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2161 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
2176 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2179 if (SinkCandidate == Previous)
2183 !Seen.
insert(SinkCandidate).second ||
2196 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2199 "only recipes with a single defined value expected");
2214 if (SinkCandidate == FOR)
2217 SinkCandidate->moveAfter(Previous);
2218 Previous = SinkCandidate;
2236 for (
VPUser *U : FOR->users()) {
2242 [&VPDT, HoistPoint](
VPUser *U) {
2243 auto *R = cast<VPRecipeBase>(U);
2244 return HoistPoint == R ||
2245 VPDT.properlyDominates(HoistPoint, R);
2247 "HoistPoint must dominate all users of FOR");
2249 auto NeedsHoisting = [HoistPoint, &VPDT,
2251 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2252 if (!HoistCandidate)
2257 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2258 "CFG in VPlan should still be flat, without replicate regions");
2260 if (!Visited.
insert(HoistCandidate).second)
2272 return HoistCandidate;
2281 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2284 "only recipes with a single defined value expected");
2296 if (
auto *R = NeedsHoisting(
Op)) {
2299 if (R->getNumDefinedValues() != 1)
2313 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2332 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2335 while (
auto *PrevPhi =
2337 assert(PrevPhi->getParent() == FOR->getParent());
2339 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2357 {FOR, FOR->getBackedgeValue()});
2359 FOR->replaceAllUsesWith(RecurSplice);
2362 RecurSplice->setOperand(0, FOR);
2368 for (
VPUser *U : RecurSplice->users()) {
2379 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
2380 VPValue *PenultimateLastIter =
2382 {PenultimateIndex, FOR->getBackedgeValue()});
2387 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2400 RecurKind RK = PhiR->getRecurrenceKind();
2407 RecWithFlags->dropPoisonGeneratingFlags();
2413struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2415 return Def == getEmptyKey() || Def == getTombstoneKey();
2426 return GEP->getSourceElementType();
2429 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2430 [](
auto *
I) {
return I->getSourceElementType(); })
2431 .
Default([](
auto *) {
return nullptr; });
2435 static bool canHandle(
const VPSingleDefRecipe *Def) {
2444 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2445 C->second == Instruction::ExtractValue)))
2451 return !
Def->mayReadFromMemory();
2455 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2456 const VPlan *Plan =
Def->getParent()->getPlan();
2457 VPTypeAnalysis TypeInfo(*Plan);
2460 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2463 if (RFlags->hasPredicate())
2469 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2472 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2474 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2476 !
equal(
L->operands(),
R->operands()))
2479 "must have valid opcode info for both recipes");
2481 if (LFlags->hasPredicate() &&
2482 LFlags->getPredicate() !=
2488 const VPRegionBlock *RegionL =
L->getRegion();
2489 const VPRegionBlock *RegionR =
R->getRegion();
2492 L->getParent() !=
R->getParent())
2494 const VPlan *Plan =
L->getParent()->getPlan();
2495 VPTypeAnalysis TypeInfo(*Plan);
2496 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2511 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2515 if (!VPDT.
dominates(V->getParent(), VPBB))
2520 Def->replaceAllUsesWith(V);
2539 "Expected vector prehader's successor to be the vector loop region");
2546 return !Op->isDefinedOutsideLoopRegions();
2549 R.moveBefore(*Preheader, Preheader->
end());
2572 VPValue *ResultVPV = R.getVPSingleValue();
2574 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2575 if (!NewResSizeInBits)
2588 (void)OldResSizeInBits;
2596 VPW->dropPoisonGeneratingFlags();
2598 if (OldResSizeInBits != NewResSizeInBits &&
2603 Ext->insertAfter(&R);
2605 Ext->setOperand(0, ResultVPV);
2606 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2609 "Only ICmps should not need extending the result.");
2619 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2620 auto *
Op = R.getOperand(Idx);
2621 unsigned OpSizeInBits =
2623 if (OpSizeInBits == NewResSizeInBits)
2625 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2626 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2628 R.setOperand(Idx, ProcessedIter->second);
2636 Builder.setInsertPoint(&R);
2638 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2639 ProcessedIter->second = NewOp;
2640 R.setOperand(Idx, NewOp);
2655 assert(VPBB->getNumSuccessors() == 2 &&
2656 "Two successors expected for BranchOnCond");
2657 unsigned RemovedIdx;
2668 "There must be a single edge between VPBB and its successor");
2677 VPBB->back().eraseFromParent();
2739 VPValue *StartV = CanonicalIVPHI->getStartValue();
2741 auto *CanonicalIVIncrement =
2745 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2746 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2756 VPValue *TripCount, *IncrementValue;
2761 IncrementValue = CanonicalIVIncrement;
2767 IncrementValue = CanonicalIVPHI;
2771 auto *EntryIncrement = Builder.createOverflowingOp(
2779 {EntryIncrement, TC, ALMMultiplier},
DL,
2780 "active.lane.mask.entry");
2786 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2791 Builder.setInsertPoint(OriginalTerminator);
2792 auto *InLoopIncrement =
2794 {IncrementValue}, {
false,
false},
DL);
2796 {InLoopIncrement, TripCount, ALMMultiplier},
2797 DL,
"active.lane.mask.next");
2802 auto *NotMask = Builder.createNot(ALM,
DL);
2815 auto *FoundWidenCanonicalIVUser =
find_if(
2819 "Must have at most one VPWideCanonicalIVRecipe");
2820 if (FoundWidenCanonicalIVUser !=
2822 auto *WideCanonicalIV =
2824 WideCanonicalIVs.
push_back(WideCanonicalIV);
2832 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2833 WideCanonicalIVs.
push_back(WidenOriginalIV);
2839 for (
auto *Wide : WideCanonicalIVs) {
2840 for (
VPUser *U : Wide->users()) {
2845 assert(VPI->getOperand(0) == Wide &&
2846 "WidenCanonicalIV must be the first operand of the compare");
2847 assert(!HeaderMask &&
"Multiple header masks found?");
2855 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2858 UseActiveLaneMaskForControlFlow) &&
2859 "DataAndControlFlowWithoutRuntimeCheck implies "
2860 "UseActiveLaneMaskForControlFlow");
2863 auto *FoundWidenCanonicalIVUser =
find_if(
2865 assert(FoundWidenCanonicalIVUser &&
2866 "Must have widened canonical IV when tail folding!");
2868 auto *WideCanonicalIV =
2871 if (UseActiveLaneMaskForControlFlow) {
2881 nullptr,
"active.lane.mask");
2897 template <
typename OpTy>
bool match(OpTy *V)
const {
2908template <
typename Op0_t,
typename Op1_t>
2927 VPValue *Addr, *Mask, *EndPtr;
2930 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2932 EVLEndPtr->insertBefore(&CurRecipe);
2933 EVLEndPtr->setOperand(1, &EVL);
2937 if (
match(&CurRecipe,
2951 LoadR->insertBefore(&CurRecipe);
2953 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2962 StoredVal, EVL, Mask);
2964 if (
match(&CurRecipe,
2970 Intrinsic::experimental_vp_reverse,
2971 {ReversedVal, Plan->
getTrue(), &EVL},
2975 AdjustEndPtr(EndPtr), NewReverse, EVL,
2980 if (Rdx->isConditional() &&
2985 if (Interleave->getMask() &&
2990 if (
match(&CurRecipe,
2999 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3017 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3022 HeaderMask = R.getVPSingleValue();
3034 NewR->insertBefore(R);
3035 for (
auto [Old, New] :
3036 zip_equal(R->definedValues(), NewR->definedValues()))
3037 Old->replaceAllUsesWith(New);
3044 R->eraseFromParent();
3061 "User of VF that we can't transform to EVL.");
3067 [&LoopRegion, &Plan](
VPUser *U) {
3069 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3070 m_Specific(&Plan.getVFxUF()))) ||
3071 isa<VPWidenPointerInductionRecipe>(U);
3073 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3074 "increment of the canonical induction.");
3090 MaxEVL = Builder.createScalarZExtOrTrunc(
3094 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3095 VPValue *PrevEVL = Builder.createScalarPhi(
3109 Intrinsic::experimental_vp_splice,
3110 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3114 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3131 VPValue *EVLMask = Builder.createICmp(
3192 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3200 VPValue *StartV = CanonicalIVPHI->getStartValue();
3204 EVLPhi->insertAfter(CanonicalIVPHI);
3205 VPBuilder Builder(Header, Header->getFirstNonPhi());
3208 VPPhi *AVLPhi = Builder.createScalarPhi(
3212 if (MaxSafeElements) {
3222 auto *CanonicalIVIncrement =
3224 Builder.setInsertPoint(CanonicalIVIncrement);
3228 OpVPEVL = Builder.createScalarZExtOrTrunc(
3229 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3231 auto *NextEVLIV = Builder.createOverflowingOp(
3232 Instruction::Add, {OpVPEVL, EVLPhi},
3233 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3234 CanonicalIVIncrement->hasNoSignedWrap()},
3235 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
3236 EVLPhi->addOperand(NextEVLIV);
3238 VPValue *NextAVL = Builder.createOverflowingOp(
3239 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
3248 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
3249 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3263 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
3274 [[maybe_unused]]
bool FoundAVL =
3277 assert(FoundAVL &&
"Didn't find AVL?");
3285 [[maybe_unused]]
bool FoundAVLNext =
3288 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3299 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3302 "Unexpected canonical iv");
3308 CanonicalIV->eraseFromParent();
3322 "Expected BranchOnCond with ICmp comparing EVL increment with vector "
3327 LatchExitingBr->setOperand(0,
3339 return R->getRegion() ||
3343 for (
const SCEV *Stride : StridesMap.
values()) {
3346 const APInt *StrideConst;
3369 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3376 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3379 if (NewSCEV != ScevExpr) {
3381 ExpSCEV->replaceAllUsesWith(NewExp);
3390 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3394 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3399 while (!Worklist.
empty()) {
3402 if (!Visited.
insert(CurRec).second)
3424 RecWithFlags->isDisjoint()) {
3427 Instruction::Add, {
A,
B}, {
false,
false},
3428 RecWithFlags->getDebugLoc());
3429 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3430 RecWithFlags->replaceAllUsesWith(New);
3431 RecWithFlags->eraseFromParent();
3434 RecWithFlags->dropPoisonGeneratingFlags();
3439 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3440 "found instruction with poison generating flags not covered by "
3441 "VPRecipeWithIRFlags");
3446 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3458 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3459 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3460 if (AddrDef && WidenRec->isConsecutive() &&
3461 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3462 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3464 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3468 InterleaveRec->getInterleaveGroup();
3469 bool NeedPredication =
false;
3471 I < NumMembers; ++
I) {
3474 NeedPredication |= BlockNeedsPredication(Member->getParent());
3477 if (NeedPredication)
3478 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3490 if (InterleaveGroups.empty())
3497 for (
const auto *IG : InterleaveGroups) {
3503 StoredValues.
push_back(StoreR->getStoredValue());
3504 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3511 StoredValues.
push_back(StoreR->getStoredValue());
3515 bool NeedsMaskForGaps =
3516 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3517 (!StoredValues.
empty() && !IG->isFull());
3529 VPValue *Addr = Start->getAddr();
3538 assert(IG->getIndex(IRInsertPos) != 0 &&
3539 "index of insert position shouldn't be zero");
3543 IG->getIndex(IRInsertPos),
3547 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3553 if (IG->isReverse()) {
3556 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3557 ReversePtr->insertBefore(InsertPos);
3561 InsertPos->getMask(), NeedsMaskForGaps,
3562 InterleaveMD, InsertPos->getDebugLoc());
3563 VPIG->insertBefore(InsertPos);
3566 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3569 if (!Member->getType()->isVoidTy()) {
3628 AddOp = Instruction::Add;
3629 MulOp = Instruction::Mul;
3631 AddOp =
ID.getInductionOpcode();
3632 MulOp = Instruction::FMul;
3640 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3641 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3643 Flags.dropPoisonGeneratingFlags();
3652 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3657 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3658 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3664 WidePHI->insertBefore(WidenIVR);
3675 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3679 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3682 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3685 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3692 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3695 WidePHI->addOperand(
Next);
3723 VPlan *Plan = R->getParent()->getPlan();
3724 VPValue *Start = R->getStartValue();
3725 VPValue *Step = R->getStepValue();
3726 VPValue *VF = R->getVFValue();
3728 assert(R->getInductionDescriptor().getKind() ==
3730 "Not a pointer induction according to InductionDescriptor!");
3733 "Recipe should have been replaced");
3739 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3743 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3746 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3747 VPValue *PtrAdd = Builder.createNaryOp(
3749 R->replaceAllUsesWith(PtrAdd);
3754 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3756 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3759 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3768 if (!R->isReplicator())
3772 R->dissolveToCFGLoop();
3793 assert(Br->getNumOperands() == 2 &&
3794 "BranchOnTwoConds must have exactly 2 conditions");
3798 assert(Successors.size() == 3 &&
3799 "BranchOnTwoConds must have exactly 3 successors");
3804 VPValue *Cond0 = Br->getOperand(0);
3805 VPValue *Cond1 = Br->getOperand(1);
3810 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3823 Br->eraseFromParent();
3846 WidenIVR->replaceAllUsesWith(PtrAdd);
3859 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3860 Select = Builder.createSelect(Blend->getMask(
I),
3861 Blend->getIncomingValue(
I),
Select,
3862 R.getDebugLoc(),
"predphi");
3863 Blend->replaceAllUsesWith(
Select);
3878 for (
VPValue *
Op : LastActiveL->operands()) {
3879 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3884 VPValue *FirstInactiveLane = Builder.createNaryOp(
3886 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3891 VPValue *LastLane = Builder.createNaryOp(
3892 Instruction::Sub, {FirstInactiveLane, One},
3893 LastActiveL->getDebugLoc(),
"last.active.lane");
3904 DebugLoc DL = BranchOnCountInst->getDebugLoc();
3907 ToRemove.push_back(BranchOnCountInst);
3922 ? Instruction::UIToFP
3923 : Instruction::Trunc;
3924 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3930 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3935 Flags = {VPI->getFastMathFlags()};
3940 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3942 VPI->replaceAllUsesWith(VectorStep);
3948 R->eraseFromParent();
3961 "unsupported early exit VPBB");
3972 "Terminator must be be BranchOnCond");
3973 VPValue *CondOfEarlyExitingVPBB =
3975 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3976 ? CondOfEarlyExitingVPBB
3977 : Builder.createNot(CondOfEarlyExitingVPBB);
3991 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3996 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3997 if (ExitIRI->getNumOperands() != 1) {
4000 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
4003 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
4012 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
4022 "Unexpected terminator");
4023 auto *IsLatchExitTaken =
4025 LatchExitingBranch->getOperand(1));
4027 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4028 LatchExitingBranch->eraseFromParent();
4030 Builder.setInsertPoint(LatchVPBB);
4032 {IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
4034 LatchVPBB->
setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
4045 Type *RedTy = Ctx.Types.inferScalarType(Red);
4046 VPValue *VecOp = Red->getVecOp();
4049 auto IsExtendedRedValidAndClampRange =
4061 if (Red->isPartialReduction()) {
4066 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4067 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4070 ? std::optional{Red->getFastMathFlags()}
4074 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4075 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4076 Red->getFastMathFlags(),
CostKind);
4078 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4088 IsExtendedRedValidAndClampRange(
4091 Ctx.Types.inferScalarType(
A)))
4110 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4111 Opcode != Instruction::FAdd)
4114 Type *RedTy = Ctx.Types.inferScalarType(Red);
4117 auto IsMulAccValidAndClampRange =
4124 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4127 if (Red->isPartialReduction()) {
4129 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4132 MulAccCost = Ctx.TTI.getPartialReductionCost(
4133 Opcode, SrcTy, SrcTy2, RedTy, VF,
4142 ? std::optional{Red->getFastMathFlags()}
4148 (Ext0->getOpcode() != Ext1->getOpcode() ||
4149 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4153 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4155 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4163 ExtCost += Ext0->computeCost(VF, Ctx);
4165 ExtCost += Ext1->computeCost(VF, Ctx);
4167 ExtCost += OuterExt->computeCost(VF, Ctx);
4169 return MulAccCost.
isValid() &&
4170 MulAccCost < ExtCost + MulCost + RedCost;
4175 VPValue *VecOp = Red->getVecOp();
4182 assert(Opcode == Instruction::FAdd &&
4183 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4192 if (RecipeA && RecipeB &&
4193 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4230 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4231 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4232 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4233 Mul->setOperand(1, ExtB);
4243 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4248 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4255 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4272 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4281 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4282 Ext0->getOpcode() == Ext1->getOpcode() &&
4283 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4285 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4286 *Ext0, *Ext0, Ext0->getDebugLoc());
4287 NewExt0->insertBefore(Ext0);
4292 Ext->getResultType(),
nullptr, *Ext1,
4293 *Ext1, Ext1->getDebugLoc());
4296 Mul->setOperand(0, NewExt0);
4297 Mul->setOperand(1, NewExt1);
4298 Red->setOperand(1,
Mul);
4311 auto IP = std::next(Red->getIterator());
4312 auto *VPBB = Red->getParent();
4322 Red->replaceAllUsesWith(AbstractR);
4352 for (
VPValue *VPV : VPValues) {
4361 if (
User->usesScalars(VPV))
4364 HoistPoint = HoistBlock->
begin();
4368 "All users must be in the vector preheader or dominated by it");
4373 VPV->replaceUsesWithIf(Broadcast,
4374 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4375 return Broadcast != &U && !U.usesScalars(VPV);
4392 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4393 RepR->getOpcode() != Instruction::Load)
4396 VPValue *Addr = RepR->getOperand(0);
4399 if (!
Loc.AATags.Scope)
4404 if (R.mayWriteToMemory()) {
4406 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4414 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4418 const AAMDNodes &LoadAA = LoadLoc.AATags;
4434 return CommonMetadata;
4437template <
unsigned Opcode>
4442 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4443 "Only Load and Store opcodes supported");
4444 constexpr bool IsLoad = (Opcode == Instruction::Load);
4454 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4458 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4461 RecipesByAddress[AddrSCEV].push_back(RepR);
4468 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4470 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4471 if (Recipes.size() < 2)
4479 VPValue *MaskI = RecipeI->getMask();
4480 Type *TypeI = GetLoadStoreValueType(RecipeI);
4486 bool HasComplementaryMask =
false;
4491 VPValue *MaskJ = RecipeJ->getMask();
4492 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4493 if (TypeI == TypeJ) {
4503 if (HasComplementaryMask) {
4504 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4514template <
typename InstType>
4534 for (
auto &Group :
Groups) {
4559 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4560 false,
nullptr, *EarliestLoad,
4563 UnpredicatedLoad->insertBefore(EarliestLoad);
4567 Load->replaceAllUsesWith(UnpredicatedLoad);
4568 Load->eraseFromParent();
4578 if (!StoreLoc || !StoreLoc->AATags.Scope)
4584 StoresToSink.
end());
4588 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4603 for (
auto &Group :
Groups) {
4620 VPValue *SelectedValue = Group[0]->getOperand(0);
4623 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4624 VPValue *Mask = Group[
I]->getMask();
4626 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4634 auto *UnpredicatedStore =
4636 {SelectedValue, LastStore->getOperand(1)},
4638 nullptr, *LastStore, CommonMetadata);
4639 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4643 Store->eraseFromParent();
4650 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4651 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4685 auto *TCMO = Builder.createNaryOp(
4713 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4715 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4722 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4732 DefR->replaceUsesWithIf(
4733 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4735 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4749 for (
VPValue *Def : R.definedValues()) {
4762 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4764 return U->usesScalars(Def) &&
4767 if (
none_of(Def->users(), IsCandidateUnpackUser))
4774 Unpack->insertAfter(&R);
4775 Def->replaceUsesWithIf(Unpack,
4776 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4777 return IsCandidateUnpackUser(&U);
4787 bool RequiresScalarEpilogue) {
4807 if (TailByMasking) {
4808 TC = Builder.createNaryOp(
4810 {TC, Builder.createNaryOp(Instruction::Sub,
4821 Builder.createNaryOp(Instruction::URem, {TC, Step},
4830 if (RequiresScalarEpilogue) {
4832 "requiring scalar epilogue is not supported with fail folding");
4835 R = Builder.createSelect(IsZero, Step, R);
4838 VPValue *Res = Builder.createNaryOp(
4857 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4864 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4868 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4873 VPValue *MulByUF = Builder.createOverflowingOp(
4874 Instruction::Mul, {RuntimeVF, UF}, {
true,
false});
4883 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4891 const SCEV *Expr = ExpSCEV->getSCEV();
4894 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4899 ExpSCEV->eraseFromParent();
4902 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4903 "before any VPIRInstructions");
4906 auto EI = Entry->begin();
4916 return ExpandedSCEVs;
4932 return Member0Op == OpV;
4934 return !W->getMask() && Member0Op == OpV;
4936 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4947 if (!InterleaveR || InterleaveR->
getMask())
4950 Type *GroupElementTy =
nullptr;
4954 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4955 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4962 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4963 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4972 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4973 GroupSize == VectorRegWidth;
4981 return RepR && RepR->isSingleScalar();
4988 auto *R = V->getDefiningRecipe();
4996 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4997 WideMember0->setOperand(
5006 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5008 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
5009 false, {}, LoadGroup->getDebugLoc());
5010 L->insertBefore(LoadGroup);
5016 assert(RepR->isSingleScalar() &&
5018 "must be a single scalar load");
5019 NarrowedOps.
insert(RepR);
5024 VPValue *PtrOp = WideLoad->getAddr();
5026 PtrOp = VecPtr->getOperand(0);
5031 nullptr, {}, *WideLoad);
5032 N->insertBefore(WideLoad);
5062 if (R.mayWriteToMemory() && !InterleaveR)
5084 if (InterleaveR->getStoredValues().empty())
5089 auto *Member0 = InterleaveR->getStoredValues()[0];
5099 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5102 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5103 return IR && IR->getInterleaveGroup()->isFull() &&
5104 IR->getVPValue(Op.index()) == Op.value();
5116 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
5118 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
5119 R->getNumOperands() > 2)
5122 [WideMember0, Idx =
I](
const auto &
P) {
5123 const auto &[OpIdx, OpV] = P;
5124 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
5131 if (StoreGroups.
empty())
5137 for (
auto *StoreGroup : StoreGroups) {
5143 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5144 false, {}, StoreGroup->getDebugLoc());
5145 S->insertBefore(StoreGroup);
5146 StoreGroup->eraseFromParent();
5161 Instruction::Mul, {VScale, UF}, {
true,
false});
5165 Inc->setOperand(1, UF);
5184 "must have a BranchOnCond");
5187 if (VF.
isScalable() && VScaleForTuning.has_value())
5188 VectorStep *= *VScaleForTuning;
5189 assert(VectorStep > 0 &&
"trip count should not be zero");
5193 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5206 if (WideIntOrFp && WideIntOrFp->getTruncInst())
5213 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
5216 Start, VectorTC, Step);
5239 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5249 IVEndValues[WideIVR] = EndValue;
5250 ResumePhiR->setOperand(0, EndValue);
5251 ResumePhiR->setName(
"bc.resume.val");
5258 "should only skip truncated wide inductions");
5266 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
5268 "Cannot handle loops with uncountable early exits");
5274 "vector.recur.extract");
5276 ResumePhiR->setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
5277 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
5286 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
5287 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5299 "Cannot handle loops with uncountable early exits");
5372 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5386 "vector.recur.extract.for.phi");
5397struct VPPartialReductionChain {
5406 unsigned ScaleFactor;
5412static bool transformToPartialReduction(
const VPPartialReductionChain &Chain,
5416 unsigned ScaleFactor = Chain.ScaleFactor;
5428 auto *AccumRecipe =
Accumulator->getDefiningRecipe();
5437 -> std::pair<Type *, TargetTransformInfo::PartialReductionExtendKind> {
5443 return {ExtOpType, ExtKind};
5445 auto ExtInfoA = GetExtInfo(Chain.ExtendA);
5446 auto ExtInfoB = GetExtInfo(Chain.ExtendB);
5447 Type *ExtOpTypeA = ExtInfoA.first;
5448 Type *ExtOpTypeB = ExtInfoB.first;
5449 auto ExtKindA = ExtInfoA.second;
5450 auto ExtKindB = ExtInfoB.second;
5453 if (!Chain.ExtendB && Chain.BinOp && Chain.BinOp != Chain.ReductionBinOp) {
5462 ExtOpTypeB = ExtOpTypeA;
5463 ExtKindB = ExtKindA;
5468 std::optional<unsigned> BinOpc =
5469 (Chain.BinOp && Chain.BinOp != Chain.ReductionBinOp)
5470 ? std::make_optional(Chain.BinOp->
getOpcode())
5477 WidenRecipe->
getOpcode(), ExtOpTypeA, ExtOpTypeB, PhiType,
5478 VF, ExtKindA, ExtKindB, BinOpc, CostCtx.
CostKind,
5480 ? std::optional{WidenRecipe->getFastMathFlags()}
5490 assert(RdxPhi->getVFScaleFactor() == 1 &&
"scale factor must not be set");
5491 RdxPhi->setVFScaleFactor(ScaleFactor);
5494 VPValue *StartValue = RdxPhi->getOperand(0);
5498 StartInst->setOperand(2, NewScaleFactor);
5503 ExitValue = RdxResult->getOperand(0);
5509 if (WidenRecipe->
getOpcode() == Instruction::Sub) {
5518 Builder.insert(NegRecipe);
5530 PartialRed->insertBefore(WidenRecipe);
5551 VPValue *PhiOp = UpdateR->getOperand(1);
5560 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
5568 Op = CastRecipe->getOperand(0);
5574 if (getScaledReductions(RedPhiR,
Op, Chains, TypeInfo)) {
5575 RedPhiR = Chains.
rbegin()->ReductionBinOp;
5576 Op = UpdateR->getOperand(0);
5577 PhiOp = UpdateR->getOperand(1);
5581 if (RedPhiR != PhiOp)
5590 auto MatchExtends = [OuterExtKind,
5592 assert(Operands.size() <= 2 &&
"expected at most 2 operands");
5594 for (
const auto &[
I, OpVal] :
enumerate(Operands)) {
5597 if (
I > 0 && CastRecipes[0] &&
match(OpVal,
m_APInt(Unused)))
5606 if (!CastRecipes[
I])
5617 return CastRecipes[0] !=
nullptr;
5633 !MatchExtends(BinOp->operands()))
5638 if (!MatchExtends({
Op}))
5653 {UpdateR, CastRecipes[0], CastRecipes[1], BinOp,
5677 VPValue *ExitValue = RdxResult->getOperand(0);
5680 getScaledReductions(RedPhiR, ExitValue, ChainsByPhi[RedPhiR],
5685 if (ChainsByPhi.
empty())
5692 for (
const auto &[
_, Chains] : ChainsByPhi)
5693 for (
const VPPartialReductionChain &Chain : Chains) {
5694 PartialReductionOps.
insert(Chain.BinOp);
5695 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
5703 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
5712 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
5713 for (
const VPPartialReductionChain &Chain : Chains) {
5714 if (!ExtendUsersValid(Chain.ExtendA) ||
5715 !ExtendUsersValid(Chain.ExtendB)) {
5719 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
5721 return PhiR == RedPhiR;
5723 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
5729 if (!
all_of(Chain.ReductionBinOp->
users(), UseIsValid)) {
5738 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
5739 return RepR && isa<StoreInst>(RepR->getUnderlyingInstr());
5748 for (
const auto &[
_, Chains] : ChainsByPhi)
5749 for (
const VPPartialReductionChain &Chain : Chains)
5750 transformToPartialReduction(Chain,
Range, CostCtx, Plan);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
size_t getNumPredecessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
void setParent(VPRegionBlock *P)
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool isa_and_present(const Y &Val)
isa_and_present<X> - Functionally identical to isa, except that a null value is accepted.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...