51 GetIntOrFpInductionDescriptor,
58 if (!VPBB->getParent())
61 auto EndIter = Term ? Term->getIterator() : VPBB->end();
66 VPValue *VPV = Ingredient.getVPSingleValue();
75 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
89 Phi, Start, Step, &Plan.
getVF(), *
II, Flags,
90 Ingredient.getDebugLoc());
98 *Load, Ingredient.getOperand(0),
nullptr ,
100 Ingredient.getDebugLoc());
103 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
104 nullptr ,
false ,
false , *VPI,
105 Ingredient.getDebugLoc());
108 Ingredient.getDebugLoc());
116 *VPI, CI->getDebugLoc());
119 *VPI, Ingredient.getDebugLoc());
122 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
126 *VPI, Ingredient.getDebugLoc());
135 "Only recpies with zero or one defined values expected");
136 Ingredient.eraseFromParent();
158 "Expected at most one successor in block chain");
161 if (ExcludeRecipes && ExcludeRecipes->contains(&R))
165 if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory()))
176 if (CheckReads && R.mayReadFromMemory() &&
183 Loc->AATags.NoAlias))
203 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
208 return RepR && RepR->getOpcode() == Instruction::Alloca;
217 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
233 if (!ScalarVFOnly && RepR->isSingleScalar())
236 WorkList.
insert({SinkTo, Candidate});
248 for (
auto &Recipe : *VPBB)
250 InsertIfValidSinkCandidate(VPBB,
Op);
254 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
257 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
262 auto UsersOutsideSinkTo =
264 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
266 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
267 return !U->usesFirstLaneOnly(SinkCandidate);
270 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
272 if (NeedsDuplicating) {
276 if (
auto *SinkCandidateRepR =
282 nullptr , *SinkCandidateRepR,
286 Clone = SinkCandidate->
clone();
296 InsertIfValidSinkCandidate(SinkTo,
Op);
306 if (!EntryBB || EntryBB->size() != 1 ||
316 if (EntryBB->getNumSuccessors() != 2)
321 if (!Succ0 || !Succ1)
324 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
326 if (Succ0->getSingleSuccessor() == Succ1)
328 if (Succ1->getSingleSuccessor() == Succ0)
345 if (!Region1->isReplicator())
347 auto *MiddleBasicBlock =
349 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
354 if (!Region2 || !Region2->isReplicator())
359 if (!Mask1 || Mask1 != Mask2)
362 assert(Mask1 && Mask2 &&
"both region must have conditions");
368 if (TransformedRegions.
contains(Region1))
375 if (!Then1 || !Then2)
395 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
401 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
402 Phi1ToMove.eraseFromParent();
405 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
419 TransformedRegions.
insert(Region1);
422 return !TransformedRegions.
empty();
429 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
430 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
431 auto *BlockInMask = PredRecipe->
getMask();
450 RecipeWithoutMask->getDebugLoc());
474 if (RepR->isPredicated())
493 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
507 if (!VPBB->getParent())
511 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
520 R.moveBefore(*PredVPBB, PredVPBB->
end());
522 auto *ParentRegion = VPBB->getParent();
523 if (ParentRegion && ParentRegion->getExiting() == VPBB)
524 ParentRegion->setExiting(PredVPBB);
525 for (
auto *Succ :
to_vector(VPBB->successors())) {
531 return !WorkList.
empty();
538 bool ShouldSimplify =
true;
539 while (ShouldSimplify) {
555 if (!
IV ||
IV->getTruncInst())
570 for (
auto *U : FindMyCast->
users()) {
572 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
573 FoundUserCast = UserCast;
577 FindMyCast = FoundUserCast;
602 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
615 WidenOriginalIV->dropPoisonGeneratingFlags();
628 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
630 if (IsConditionalAssume)
633 if (R.mayHaveSideEffects())
637 return all_of(R.definedValues(),
638 [](
VPValue *V) { return V->getNumUsers() == 0; });
654 if (!PhiR || PhiR->getNumOperands() != 2)
656 VPUser *PhiUser = PhiR->getSingleUser();
660 if (PhiUser !=
Incoming->getDefiningRecipe() ||
663 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
664 PhiR->eraseFromParent();
665 Incoming->getDefiningRecipe()->eraseFromParent();
680 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
690 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
696 if (ResultTy != StepTy) {
703 Builder.setInsertPoint(VecPreheader);
704 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
706 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
712 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
717 Users.insert_range(V->users());
719 return Users.takeVector();
733 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
770 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
771 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
779 Def->operands(),
true,
781 Clone->insertAfter(Def);
782 Def->replaceAllUsesWith(Clone);
793 PtrIV->replaceAllUsesWith(PtrAdd);
800 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
801 return U->usesScalars(WideIV);
807 Plan,
ID.getKind(),
ID.getInductionOpcode(),
809 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
810 WideIV->getDebugLoc(), Builder);
813 if (!HasOnlyVectorVFs) {
815 "plans containing a scalar VF cannot also include scalable VFs");
816 WideIV->replaceAllUsesWith(Steps);
819 WideIV->replaceUsesWithIf(Steps,
820 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
822 return U.usesFirstLaneOnly(WideIV);
823 return U.usesScalars(WideIV);
839 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
844 if (!Def || Def->getNumOperands() != 2)
852 auto IsWideIVInc = [&]() {
853 auto &
ID = WideIV->getInductionDescriptor();
856 VPValue *IVStep = WideIV->getStepValue();
857 switch (
ID.getInductionOpcode()) {
858 case Instruction::Add:
860 case Instruction::FAdd:
863 case Instruction::FSub:
866 case Instruction::Sub: {
885 return IsWideIVInc() ? WideIV :
nullptr;
905 if (WideIntOrFp && WideIntOrFp->getTruncInst())
918 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
919 FirstActiveLaneType,
DL);
921 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
928 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
931 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
933 VPValue *Start = WideIV->getStartValue();
934 VPValue *Step = WideIV->getStepValue();
935 EndValue =
B.createDerivedIV(
937 Start, EndValue, Step);
957 assert(EndValue &&
"end value must have been pre-computed");
967 VPValue *Step = WideIV->getStepValue();
970 return B.createNaryOp(Instruction::Sub, {EndValue, Step},
975 return B.createPtrAdd(EndValue,
976 B.createNaryOp(Instruction::Sub, {Zero, Step}),
980 const auto &
ID = WideIV->getInductionDescriptor();
981 return B.createNaryOp(
982 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
985 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1000 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1002 if (PredVPBB == MiddleVPBB)
1004 ExitIRI->getOperand(Idx),
1008 ExitIRI->getOperand(Idx), SE);
1010 ExitIRI->setOperand(Idx, Escape);
1027 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1030 ExpR->replaceAllUsesWith(V->second);
1031 ExpR->eraseFromParent();
1040 while (!WorkList.
empty()) {
1042 if (!Seen.
insert(Cur).second)
1050 R->eraseFromParent();
1057static std::optional<std::pair<bool, unsigned>>
1060 std::optional<std::pair<bool, unsigned>>>(R)
1063 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1064 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
1065 return std::make_pair(
true,
I->getVectorIntrinsicID());
1067 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1071 return std::make_pair(
false,
1074 .
Default([](
auto *) {
return std::nullopt; });
1090 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1092 Ops.push_back(
Op->getLiveInIRValue());
1095 auto FoldToIRValue = [&]() ->
Value * {
1097 if (OpcodeOrIID->first) {
1098 if (R.getNumOperands() != 2)
1100 unsigned ID = OpcodeOrIID->second;
1101 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1104 unsigned Opcode = OpcodeOrIID->second;
1113 return Folder.FoldSelect(
Ops[0],
Ops[1],
1116 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1118 case Instruction::Select:
1119 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1120 case Instruction::ICmp:
1121 case Instruction::FCmp:
1124 case Instruction::GetElementPtr: {
1127 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1137 case Instruction::ExtractElement:
1144 if (
Value *V = FoldToIRValue())
1145 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1151 VPlan *Plan = Def->getParent()->getPlan();
1158 return Def->replaceAllUsesWith(V);
1164 PredPHI->replaceAllUsesWith(
Op);
1172 if (TruncTy == ATy) {
1173 Def->replaceAllUsesWith(
A);
1182 : Instruction::ZExt;
1185 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1187 Ext->setUnderlyingValue(UnderlyingExt);
1189 Def->replaceAllUsesWith(Ext);
1191 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1192 Def->replaceAllUsesWith(Trunc);
1200 for (
VPUser *U :
A->users()) {
1202 for (
VPValue *VPV : R->definedValues())
1216 Def->replaceAllUsesWith(
X);
1217 Def->eraseFromParent();
1223 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1227 return Def->replaceAllUsesWith(
X);
1231 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1235 return Def->replaceAllUsesWith(Def->getOperand(1));
1242 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1243 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1244 return Def->replaceAllUsesWith(
1245 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1249 return Def->replaceAllUsesWith(Plan->
getFalse());
1252 return Def->replaceAllUsesWith(
X);
1257 Def->setOperand(0,
C);
1258 Def->setOperand(1,
Y);
1259 Def->setOperand(2,
X);
1268 X->hasMoreThanOneUniqueUser())
1269 return Def->replaceAllUsesWith(
1270 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1273 return Def->replaceAllUsesWith(
A);
1276 return Def->replaceAllUsesWith(
A);
1279 return Def->replaceAllUsesWith(
1280 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1284 return Def->replaceAllUsesWith(
A);
1299 R->setOperand(1,
Y);
1300 R->setOperand(2,
X);
1304 R->replaceAllUsesWith(Cmp);
1309 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1310 Cmp->setDebugLoc(Def->getDebugLoc());
1322 if (
Op->getNumUsers() > 1 ||
1326 }
else if (!UnpairedCmp) {
1327 UnpairedCmp =
Op->getDefiningRecipe();
1331 UnpairedCmp =
nullptr;
1338 if (NewOps.
size() < Def->getNumOperands()) {
1340 return Def->replaceAllUsesWith(NewAnyOf);
1352 return Def->replaceAllUsesWith(NewCmp);
1360 return Def->replaceAllUsesWith(Def->getOperand(1));
1366 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1367 Def->replaceAllUsesWith(
X);
1377 Def->setOperand(1, Def->getOperand(0));
1378 Def->setOperand(0,
Y);
1383 if (Phi->getOperand(0) == Phi->getOperand(1))
1384 Phi->replaceAllUsesWith(Phi->getOperand(0));
1391 Def->replaceAllUsesWith(
1392 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1399 Def->replaceAllUsesWith(
1400 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1407 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1412 Def->replaceAllUsesWith(
1422 "broadcast operand must be single-scalar");
1423 Def->setOperand(0,
C);
1428 if (Phi->getNumOperands() == 1)
1429 Phi->replaceAllUsesWith(Phi->getOperand(0));
1442 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1443 Phi->getSingleUser() == Def) {
1444 Phi->setOperand(0,
Y);
1445 Def->replaceAllUsesWith(Phi);
1452 if (VecPtr->isFirstPart()) {
1453 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1462 Steps->replaceAllUsesWith(Steps->getOperand(0));
1470 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1472 return PhiR && PhiR->isInLoop();
1478 Def->replaceAllUsesWith(
A);
1487 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1488 return Def->replaceAllUsesWith(
A);
1492 return Def->replaceAllUsesWith(
A);
1521 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1528 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1529 true ,
nullptr , *RepR ,
1530 *RepR , RepR->getDebugLoc());
1531 Clone->insertBefore(RepOrWidenR);
1533 VPValue *ExtractOp = Clone->getOperand(0);
1539 Clone->setOperand(0, ExtractOp);
1540 RepR->eraseFromParent();
1553 if (!
all_of(RepOrWidenR->users(),
1554 [RepOrWidenR](
const VPUser *U) {
1555 if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1556 unsigned Opcode = VPI->getOpcode();
1557 if (Opcode == VPInstruction::ExtractLastLane ||
1558 Opcode == VPInstruction::ExtractLastPart ||
1559 Opcode == VPInstruction::ExtractPenultimateElement)
1563 return U->usesScalars(RepOrWidenR);
1566 if (Op->getSingleUser() != RepOrWidenR)
1570 bool LiveInNeedsBroadcast =
1571 Op->isLiveIn() && !isa<Constant>(Op->getLiveInIRValue());
1572 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1573 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1578 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1579 true ,
nullptr, *RepOrWidenR);
1580 Clone->insertBefore(RepOrWidenR);
1581 RepOrWidenR->replaceAllUsesWith(Clone);
1583 RepOrWidenR->eraseFromParent();
1619 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1620 UniqueValues.
insert(Blend->getIncomingValue(0));
1621 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1623 UniqueValues.
insert(Blend->getIncomingValue(
I));
1625 if (UniqueValues.
size() == 1) {
1626 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1627 Blend->eraseFromParent();
1631 if (Blend->isNormalized())
1637 unsigned StartIndex = 0;
1638 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1643 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1650 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1652 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1653 if (
I == StartIndex)
1655 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1656 OperandsWithMask.
push_back(Blend->getMask(
I));
1661 OperandsWithMask, Blend->getDebugLoc());
1662 NewBlend->insertBefore(&R);
1664 VPValue *DeadMask = Blend->getMask(StartIndex);
1666 Blend->eraseFromParent();
1671 if (NewBlend->getNumOperands() == 3 &&
1673 VPValue *Inc0 = NewBlend->getOperand(0);
1674 VPValue *Inc1 = NewBlend->getOperand(1);
1675 VPValue *OldMask = NewBlend->getOperand(2);
1676 NewBlend->setOperand(0, Inc1);
1677 NewBlend->setOperand(1, Inc0);
1678 NewBlend->setOperand(2, NewMask);
1705 APInt MaxVal = AlignedTC - 1;
1708 unsigned NewBitWidth =
1714 bool MadeChange =
false;
1723 if (!WideIV || !WideIV->isCanonical() ||
1724 WideIV->hasMoreThanOneUniqueUser() ||
1725 NewIVTy == WideIV->getScalarType())
1730 VPUser *SingleUser = WideIV->getSingleUser();
1739 WideIV->setStartValue(NewStart);
1741 WideIV->setStepValue(NewStep);
1747 Cmp->setOperand(1, NewBTC);
1761 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1763 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1776 const SCEV *VectorTripCount =
1781 "Trip count SCEV must be computable");
1801 auto *Term = &ExitingVPBB->
back();
1814 for (
unsigned Part = 0; Part < UF; ++Part) {
1822 Extracts[Part] = Ext;
1834 match(Phi->getBackedgeValue(),
1836 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1849 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1856 "Expected incoming values of Phi to be ActiveLaneMasks");
1861 EntryALM->setOperand(2, ALMMultiplier);
1862 LoopALM->setOperand(2, ALMMultiplier);
1866 ExtractFromALM(EntryALM, EntryExtracts);
1871 ExtractFromALM(LoopALM, LoopExtracts);
1873 Not->setOperand(0, LoopExtracts[0]);
1876 for (
unsigned Part = 0; Part < UF; ++Part) {
1877 Phis[Part]->setStartValue(EntryExtracts[Part]);
1878 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1891 auto *Term = &ExitingVPBB->
back();
1899 const SCEV *VectorTripCount =
1904 "Trip count SCEV must be computable");
1926 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1927 return R->isCanonical();
1928 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1929 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1935 R->getScalarType());
1937 HeaderR.eraseFromParent();
1941 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1942 HeaderR.eraseFromParent();
1951 B->setParent(
nullptr);
1960 {}, {}, Term->getDebugLoc());
1964 Term->eraseFromParent();
1991 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2001 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2002 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2011 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
2026 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2029 if (SinkCandidate == Previous)
2033 !Seen.
insert(SinkCandidate).second ||
2046 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2049 "only recipes with a single defined value expected");
2064 if (SinkCandidate == FOR)
2067 SinkCandidate->moveAfter(Previous);
2068 Previous = SinkCandidate;
2086 for (
VPUser *U : FOR->users()) {
2092 [&VPDT, HoistPoint](
VPUser *U) {
2093 auto *R = cast<VPRecipeBase>(U);
2094 return HoistPoint == R ||
2095 VPDT.properlyDominates(HoistPoint, R);
2097 "HoistPoint must dominate all users of FOR");
2099 auto NeedsHoisting = [HoistPoint, &VPDT,
2101 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2102 if (!HoistCandidate)
2107 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2108 "CFG in VPlan should still be flat, without replicate regions");
2110 if (!Visited.
insert(HoistCandidate).second)
2122 return HoistCandidate;
2131 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2134 "only recipes with a single defined value expected");
2146 if (
auto *R = NeedsHoisting(
Op)) {
2149 if (R->getNumDefinedValues() != 1)
2163 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2182 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2185 while (
auto *PrevPhi =
2187 assert(PrevPhi->getParent() == FOR->getParent());
2189 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2207 {FOR, FOR->getBackedgeValue()});
2209 FOR->replaceAllUsesWith(RecurSplice);
2212 RecurSplice->setOperand(0, FOR);
2218 for (
VPUser *U : RecurSplice->users()) {
2229 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
2230 VPValue *PenultimateLastIter =
2232 {PenultimateIndex, FOR->getBackedgeValue()});
2237 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2250 RecurKind RK = PhiR->getRecurrenceKind();
2257 RecWithFlags->dropPoisonGeneratingFlags();
2263struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2265 return Def == getEmptyKey() || Def == getTombstoneKey();
2276 return GEP->getSourceElementType();
2279 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2280 [](
auto *
I) {
return I->getSourceElementType(); })
2281 .
Default([](
auto *) {
return nullptr; });
2285 static bool canHandle(
const VPSingleDefRecipe *Def) {
2294 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2295 C->second == Instruction::ExtractValue)))
2301 return !
Def->mayReadFromMemory();
2305 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2306 const VPlan *Plan =
Def->getParent()->getPlan();
2307 VPTypeAnalysis TypeInfo(*Plan);
2310 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2313 if (RFlags->hasPredicate())
2319 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2322 if (
L->getVPDefID() !=
R->getVPDefID() ||
2324 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2326 !
equal(
L->operands(),
R->operands()))
2329 "must have valid opcode info for both recipes");
2331 if (LFlags->hasPredicate() &&
2332 LFlags->getPredicate() !=
2338 const VPRegionBlock *RegionL =
L->getRegion();
2339 const VPRegionBlock *RegionR =
R->getRegion();
2342 L->getParent() !=
R->getParent())
2344 const VPlan *Plan =
L->getParent()->getPlan();
2345 VPTypeAnalysis TypeInfo(*Plan);
2346 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2361 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2365 if (!VPDT.
dominates(V->getParent(), VPBB))
2370 Def->replaceAllUsesWith(V);
2389 "Expected vector prehader's successor to be the vector loop region");
2396 return !Op->isDefinedOutsideLoopRegions();
2399 R.moveBefore(*Preheader, Preheader->
end());
2423 VPValue *ResultVPV = R.getVPSingleValue();
2425 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2426 if (!NewResSizeInBits)
2439 (void)OldResSizeInBits;
2447 VPW->dropPoisonGeneratingFlags();
2449 if (OldResSizeInBits != NewResSizeInBits &&
2454 Ext->insertAfter(&R);
2456 Ext->setOperand(0, ResultVPV);
2457 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2460 "Only ICmps should not need extending the result.");
2469 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2470 auto *
Op = R.getOperand(Idx);
2471 unsigned OpSizeInBits =
2473 if (OpSizeInBits == NewResSizeInBits)
2475 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2476 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2478 R.setOperand(Idx, ProcessedIter->second);
2486 Builder.setInsertPoint(&R);
2488 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2489 ProcessedIter->second = NewOp;
2490 R.setOperand(Idx, NewOp);
2505 assert(VPBB->getNumSuccessors() == 2 &&
2506 "Two successors expected for BranchOnCond");
2507 unsigned RemovedIdx;
2518 "There must be a single edge between VPBB and its successor");
2527 VPBB->back().eraseFromParent();
2589 VPValue *StartV = CanonicalIVPHI->getStartValue();
2591 auto *CanonicalIVIncrement =
2595 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2596 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2606 VPValue *TripCount, *IncrementValue;
2611 IncrementValue = CanonicalIVIncrement;
2617 IncrementValue = CanonicalIVPHI;
2621 auto *EntryIncrement = Builder.createOverflowingOp(
2629 {EntryIncrement, TC, ALMMultiplier},
DL,
2630 "active.lane.mask.entry");
2636 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2641 Builder.setInsertPoint(OriginalTerminator);
2642 auto *InLoopIncrement =
2644 {IncrementValue}, {
false,
false},
DL);
2646 {InLoopIncrement, TripCount, ALMMultiplier},
2647 DL,
"active.lane.mask.next");
2652 auto *NotMask = Builder.createNot(ALM,
DL);
2665 auto *FoundWidenCanonicalIVUser =
find_if(
2669 "Must have at most one VPWideCanonicalIVRecipe");
2670 if (FoundWidenCanonicalIVUser !=
2672 auto *WideCanonicalIV =
2674 WideCanonicalIVs.
push_back(WideCanonicalIV);
2682 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2683 WideCanonicalIVs.
push_back(WidenOriginalIV);
2689 for (
auto *Wide : WideCanonicalIVs) {
2695 assert(VPI->getOperand(0) == Wide &&
2696 "WidenCanonicalIV must be the first operand of the compare");
2697 assert(!HeaderMask &&
"Multiple header masks found?");
2705 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2708 UseActiveLaneMaskForControlFlow) &&
2709 "DataAndControlFlowWithoutRuntimeCheck implies "
2710 "UseActiveLaneMaskForControlFlow");
2713 auto *FoundWidenCanonicalIVUser =
find_if(
2715 assert(FoundWidenCanonicalIVUser &&
2716 "Must have widened canonical IV when tail folding!");
2718 auto *WideCanonicalIV =
2721 if (UseActiveLaneMaskForControlFlow) {
2731 nullptr,
"active.lane.mask");
2747 template <
typename OpTy>
bool match(OpTy *V)
const {
2758template <
typename Op0_t,
typename Op1_t>
2777 VPValue *Addr, *Mask, *EndPtr;
2780 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2782 EVLEndPtr->insertBefore(&CurRecipe);
2783 EVLEndPtr->setOperand(1, &EVL);
2787 if (
match(&CurRecipe,
2793 if (
match(&CurRecipe,
2798 AdjustEndPtr(EndPtr), EVL, Mask);
2811 AdjustEndPtr(EndPtr), EVL, Mask);
2814 if (Rdx->isConditional() &&
2819 if (Interleave->getMask() &&
2824 if (
match(&CurRecipe,
2833 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2856 "User of VF that we can't transform to EVL.");
2862 [&LoopRegion, &Plan](
VPUser *U) {
2864 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2865 m_Specific(&Plan.getVFxUF()))) ||
2866 isa<VPWidenPointerInductionRecipe>(U);
2868 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2869 "increment of the canonical induction.");
2889 MaxEVL = Builder.createScalarZExtOrTrunc(
2893 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2894 VPValue *PrevEVL = Builder.createScalarPhi(
2908 Intrinsic::experimental_vp_splice,
2909 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
2913 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2931 VPValue *EVLMask = Builder.createICmp(
2949 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2950 "New recipe must define the same number of values as the "
2955 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2956 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2968 R->eraseFromParent();
3018 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3026 VPValue *StartV = CanonicalIVPHI->getStartValue();
3030 EVLPhi->insertAfter(CanonicalIVPHI);
3031 VPBuilder Builder(Header, Header->getFirstNonPhi());
3034 VPPhi *AVLPhi = Builder.createScalarPhi(
3038 if (MaxSafeElements) {
3048 auto *CanonicalIVIncrement =
3050 Builder.setInsertPoint(CanonicalIVIncrement);
3054 OpVPEVL = Builder.createScalarZExtOrTrunc(
3055 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3057 auto *NextEVLIV = Builder.createOverflowingOp(
3058 Instruction::Add, {OpVPEVL, EVLPhi},
3059 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3060 CanonicalIVIncrement->hasNoSignedWrap()},
3061 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
3062 EVLPhi->addOperand(NextEVLIV);
3064 VPValue *NextAVL = Builder.createOverflowingOp(
3065 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
3073 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
3074 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3088 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
3099 [[maybe_unused]]
bool FoundAVL =
3102 assert(FoundAVL &&
"Didn't find AVL?");
3110 [[maybe_unused]]
bool FoundAVLNext =
3113 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3124 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3127 "Unexpected canonical iv");
3133 CanonicalIV->eraseFromParent();
3146 match(LatchExitingBr,
3149 "Unexpected terminator in EVL loop");
3156 LatchExitingBr->eraseFromParent();
3166 return R->getRegion() ||
3170 for (
const SCEV *Stride : StridesMap.
values()) {
3173 const APInt *StrideConst;
3174 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
3190 unsigned BW = U->getType()->getScalarSizeInBits();
3196 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3203 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3206 if (NewSCEV != ScevExpr) {
3208 ExpSCEV->replaceAllUsesWith(NewExp);
3217 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3221 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3226 while (!Worklist.
empty()) {
3229 if (!Visited.
insert(CurRec).second)
3251 RecWithFlags->isDisjoint()) {
3254 Instruction::Add, {
A,
B}, {
false,
false},
3255 RecWithFlags->getDebugLoc());
3256 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3257 RecWithFlags->replaceAllUsesWith(New);
3258 RecWithFlags->eraseFromParent();
3261 RecWithFlags->dropPoisonGeneratingFlags();
3266 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3267 "found instruction with poison generating flags not covered by "
3268 "VPRecipeWithIRFlags");
3273 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3285 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3286 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3287 if (AddrDef && WidenRec->isConsecutive() &&
3288 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3289 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3291 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3295 InterleaveRec->getInterleaveGroup();
3296 bool NeedPredication =
false;
3298 I < NumMembers; ++
I) {
3301 NeedPredication |= BlockNeedsPredication(Member->getParent());
3304 if (NeedPredication)
3305 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3317 if (InterleaveGroups.empty())
3324 for (
const auto *IG : InterleaveGroups) {
3330 StoredValues.
push_back(StoreR->getStoredValue());
3331 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3338 StoredValues.
push_back(StoreR->getStoredValue());
3342 bool NeedsMaskForGaps =
3343 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3344 (!StoredValues.
empty() && !IG->isFull());
3356 VPValue *Addr = Start->getAddr();
3365 assert(IG->getIndex(IRInsertPos) != 0 &&
3366 "index of insert position shouldn't be zero");
3370 IG->getIndex(IRInsertPos),
3374 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3380 if (IG->isReverse()) {
3383 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3384 ReversePtr->insertBefore(InsertPos);
3388 InsertPos->getMask(), NeedsMaskForGaps,
3389 InterleaveMD, InsertPos->getDebugLoc());
3390 VPIG->insertBefore(InsertPos);
3393 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3396 if (!Member->getType()->isVoidTy()) {
3455 AddOp = Instruction::Add;
3456 MulOp = Instruction::Mul;
3458 AddOp =
ID.getInductionOpcode();
3459 MulOp = Instruction::FMul;
3467 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3468 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3470 Flags.dropPoisonGeneratingFlags();
3479 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3484 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3485 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3491 WidePHI->insertBefore(WidenIVR);
3502 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3506 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3509 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3512 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3519 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3522 WidePHI->addOperand(
Next);
3550 VPlan *Plan = R->getParent()->getPlan();
3551 VPValue *Start = R->getStartValue();
3552 VPValue *Step = R->getStepValue();
3553 VPValue *VF = R->getVFValue();
3555 assert(R->getInductionDescriptor().getKind() ==
3557 "Not a pointer induction according to InductionDescriptor!");
3560 "Recipe should have been replaced");
3566 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3570 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3573 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3574 VPValue *PtrAdd = Builder.createNaryOp(
3576 R->replaceAllUsesWith(PtrAdd);
3581 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3583 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3586 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3595 if (!R->isReplicator())
3599 R->dissolveToCFGLoop();
3621 WidenIVR->replaceAllUsesWith(PtrAdd);
3634 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3635 Select = Builder.createSelect(Blend->getMask(
I),
3636 Blend->getIncomingValue(
I),
Select,
3637 R.getDebugLoc(),
"predphi");
3638 Blend->replaceAllUsesWith(
Select);
3653 for (
VPValue *
Op : LastActiveL->operands()) {
3654 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3659 VPValue *FirstInactiveLane = Builder.createNaryOp(
3661 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3666 VPValue *LastLane = Builder.createNaryOp(
3667 Instruction::Sub, {FirstInactiveLane, One},
3668 LastActiveL->getDebugLoc(),
"last.active.lane");
3686 ? Instruction::UIToFP
3687 : Instruction::Trunc;
3688 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3694 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3699 Flags = {VPI->getFastMathFlags()};
3704 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3706 VPI->replaceAllUsesWith(VectorStep);
3712 R->eraseFromParent();
3725 "unsupported early exit VPBB");
3736 "Terminator must be be BranchOnCond");
3737 VPValue *CondOfEarlyExitingVPBB =
3739 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3740 ? CondOfEarlyExitingVPBB
3741 : Builder.createNot(CondOfEarlyExitingVPBB);
3758 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3763 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3764 if (ExitIRI->getNumOperands() != 1) {
3767 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
3770 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3771 if (!IncomingFromEarlyExit->
isLiveIn()) {
3779 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3789 "Unexpected terminator");
3790 auto *IsLatchExitTaken =
3792 LatchExitingBranch->getOperand(1));
3793 auto *AnyExitTaken = Builder.createNaryOp(
3794 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3796 LatchExitingBranch->eraseFromParent();
3806 Type *RedTy = Ctx.Types.inferScalarType(Red);
3807 VPValue *VecOp = Red->getVecOp();
3810 auto IsExtendedRedValidAndClampRange =
3822 if (Red->isPartialReduction()) {
3827 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3828 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3831 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3832 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3833 Red->getFastMathFlags(),
CostKind);
3835 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3843 IsExtendedRedValidAndClampRange(
3846 Ctx.Types.inferScalarType(
A)))
3864 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3867 Type *RedTy = Ctx.Types.inferScalarType(Red);
3870 auto IsMulAccValidAndClampRange =
3877 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3880 if (Red->isPartialReduction()) {
3882 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3885 MulAccCost = Ctx.TTI.getPartialReductionCost(
3886 Opcode, SrcTy, SrcTy2, RedTy, VF,
3896 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3900 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3902 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3910 ExtCost += Ext0->computeCost(VF, Ctx);
3912 ExtCost += Ext1->computeCost(VF, Ctx);
3914 ExtCost += OuterExt->computeCost(VF, Ctx);
3916 return MulAccCost.
isValid() &&
3917 MulAccCost < ExtCost + MulCost + RedCost;
3922 VPValue *VecOp = Red->getVecOp();
3940 if (!ExtA || ExtB || !ValB->
isLiveIn())
3956 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
3957 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
3958 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
3959 Mul->setOperand(1, ExtB);
3969 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
3974 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
3981 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
3998 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4007 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4008 Ext0->getOpcode() == Ext1->getOpcode() &&
4009 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4011 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4012 *Ext0, *Ext0, Ext0->getDebugLoc());
4013 NewExt0->insertBefore(Ext0);
4018 Ext->getResultType(),
nullptr, *Ext1,
4019 *Ext1, Ext1->getDebugLoc());
4022 Mul->setOperand(0, NewExt0);
4023 Mul->setOperand(1, NewExt1);
4024 Red->setOperand(1,
Mul);
4037 auto IP = std::next(Red->getIterator());
4038 auto *VPBB = Red->getParent();
4048 Red->replaceAllUsesWith(AbstractR);
4078 for (
VPValue *VPV : VPValues) {
4080 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
4088 if (
User->usesScalars(VPV))
4091 HoistPoint = HoistBlock->
begin();
4095 "All users must be in the vector preheader or dominated by it");
4100 VPV->replaceUsesWithIf(Broadcast,
4101 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4102 return Broadcast != &U && !U.usesScalars(VPV);
4119 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4120 RepR->getOpcode() != Instruction::Load)
4123 VPValue *Addr = RepR->getOperand(0);
4126 if (!
Loc.AATags.Scope)
4131 if (R.mayWriteToMemory()) {
4133 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4141 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4145 const AAMDNodes &LoadAA = LoadLoc.AATags;
4161 return CommonMetadata;
4164template <
unsigned Opcode>
4168 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4169 "Only Load and Store opcodes supported");
4170 constexpr bool IsLoad = (Opcode == Instruction::Load);
4180 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4184 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4187 RecipesByAddress[AddrSCEV].push_back(RepR);
4194 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4196 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4197 if (Recipes.size() < 2)
4205 VPValue *MaskI = RecipeI->getMask();
4206 Type *TypeI = GetLoadStoreValueType(RecipeI);
4212 bool HasComplementaryMask =
false;
4217 VPValue *MaskJ = RecipeJ->getMask();
4218 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4219 if (TypeI == TypeJ) {
4229 if (HasComplementaryMask) {
4230 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4240template <
typename InstType>
4259 for (
auto &Group :
Groups) {
4285 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4286 false,
nullptr, *EarliestLoad,
4289 UnpredicatedLoad->insertBefore(EarliestLoad);
4293 Load->replaceAllUsesWith(UnpredicatedLoad);
4294 Load->eraseFromParent();
4302 if (!StoreLoc || !StoreLoc->AATags.Scope)
4308 StoresToSink.
end());
4313 true, &StoresToSinkSet);
4325 for (
auto &Group :
Groups) {
4342 VPValue *SelectedValue = Group[0]->getOperand(0);
4345 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4346 VPValue *Mask = Group[
I]->getMask();
4348 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4356 auto *UnpredicatedStore =
4358 {SelectedValue, LastStore->getOperand(1)},
4360 nullptr, *LastStore, CommonMetadata);
4361 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4365 Store->eraseFromParent();
4372 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4373 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4407 auto *TCMO = Builder.createNaryOp(
4435 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4437 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4444 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4454 DefR->replaceUsesWithIf(
4455 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4457 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4471 for (
VPValue *Def : R.definedValues()) {
4484 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4486 return U->usesScalars(Def) &&
4489 if (
none_of(Def->users(), IsCandidateUnpackUser))
4496 Unpack->insertAfter(&R);
4497 Def->replaceUsesWithIf(Unpack,
4498 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4499 return IsCandidateUnpackUser(&U);
4509 bool RequiresScalarEpilogue) {
4511 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4530 if (TailByMasking) {
4531 TC = Builder.createNaryOp(
4533 {TC, Builder.createNaryOp(Instruction::Sub,
4544 Builder.createNaryOp(Instruction::URem, {TC, Step},
4553 if (RequiresScalarEpilogue) {
4555 "requiring scalar epilogue is not supported with fail folding");
4558 R = Builder.createSelect(IsZero, Step, R);
4561 VPValue *Res = Builder.createNaryOp(
4580 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4587 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4591 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4596 VPValue *MulByUF = Builder.createOverflowingOp(
4597 Instruction::Mul, {RuntimeVF, UF}, {
true,
false});
4607 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4615 const SCEV *Expr = ExpSCEV->getSCEV();
4618 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4623 ExpSCEV->eraseFromParent();
4626 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4627 "after any VPIRInstructions");
4630 auto EI = Entry->begin();
4640 return ExpandedSCEVs;
4656 return Member0Op == OpV;
4658 return !W->getMask() && Member0Op == OpV;
4660 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4671 if (!InterleaveR || InterleaveR->
getMask())
4674 Type *GroupElementTy =
nullptr;
4678 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4679 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4686 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4687 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4696 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4697 GroupSize == VectorRegWidth;
4705 return RepR && RepR->isSingleScalar();
4712 auto *R = V->getDefiningRecipe();
4720 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4721 WideMember0->setOperand(
4730 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4732 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4733 false, {}, LoadGroup->getDebugLoc());
4734 L->insertBefore(LoadGroup);
4740 assert(RepR->isSingleScalar() &&
4742 "must be a single scalar load");
4743 NarrowedOps.
insert(RepR);
4748 VPValue *PtrOp = WideLoad->getAddr();
4750 PtrOp = VecPtr->getOperand(0);
4755 nullptr, {}, *WideLoad);
4756 N->insertBefore(WideLoad);
4786 if (R.mayWriteToMemory() && !InterleaveR)
4808 if (InterleaveR->getStoredValues().empty())
4813 auto *Member0 = InterleaveR->getStoredValues()[0];
4815 all_of(InterleaveR->getStoredValues(),
4816 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4824 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4827 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4828 return IR && IR->getInterleaveGroup()->isFull() &&
4829 IR->getVPValue(Op.index()) == Op.value();
4841 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4843 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4844 R->getNumOperands() > 2)
4847 [WideMember0, Idx =
I](
const auto &
P) {
4848 const auto &[OpIdx, OpV] = P;
4849 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4856 if (StoreGroups.
empty())
4862 for (
auto *StoreGroup : StoreGroups) {
4868 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
4869 false, {}, StoreGroup->getDebugLoc());
4870 S->insertBefore(StoreGroup);
4871 StoreGroup->eraseFromParent();
4886 Instruction::Mul, {VScale, UF}, {
true,
false});
4890 Inc->setOperand(1, UF);
4909 "must have a BranchOnCond");
4912 if (VF.
isScalable() && VScaleForTuning.has_value())
4913 VectorStep *= *VScaleForTuning;
4914 assert(VectorStep > 0 &&
"trip count should not be zero");
4918 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
4931 if (WideIntOrFp && WideIntOrFp->getTruncInst())
4938 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
4941 Start, VectorTC, Step);
4964 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4974 IVEndValues[WideIVR] = EndValue;
4975 ResumePhiR->setOperand(0, EndValue);
4976 ResumePhiR->setName(
"bc.resume.val");
4983 "should only skip truncated wide inductions");
4991 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
4993 "Cannot handle loops with uncountable early exits");
4999 "vector.recur.extract");
5001 ResumePhiR->setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
5002 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
5011 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
5012 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5024 "Cannot handle loops with uncountable early exits");
5097 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5111 "vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...