46#define LV_NAME "loop-vectorize"
47#define DEBUG_TYPE LV_NAME
55 case VPInterleaveEVLSC:
58 case VPWidenStoreEVLSC:
66 ->getCalledScalarFunction()
68 case VPWidenIntrinsicSC:
70 case VPCanonicalIVPHISC:
71 case VPBranchOnMaskSC:
73 case VPFirstOrderRecurrencePHISC:
74 case VPReductionPHISC:
75 case VPScalarIVStepsSC:
79 case VPReductionEVLSC:
81 case VPVectorPointerSC:
82 case VPWidenCanonicalIVSC:
85 case VPWidenIntOrFpInductionSC:
86 case VPWidenLoadEVLSC:
89 case VPWidenPointerInductionSC:
91 case VPWidenSelectSC: {
95 assert((!
I || !
I->mayWriteToMemory()) &&
96 "underlying instruction may write to memory");
108 case VPInstructionSC:
110 case VPWidenLoadEVLSC:
115 ->mayReadFromMemory();
118 ->getCalledScalarFunction()
119 ->onlyWritesMemory();
120 case VPWidenIntrinsicSC:
122 case VPBranchOnMaskSC:
124 case VPFirstOrderRecurrencePHISC:
125 case VPPredInstPHISC:
126 case VPScalarIVStepsSC:
127 case VPWidenStoreEVLSC:
131 case VPReductionEVLSC:
133 case VPVectorPointerSC:
134 case VPWidenCanonicalIVSC:
137 case VPWidenIntOrFpInductionSC:
139 case VPWidenPointerInductionSC:
141 case VPWidenSelectSC: {
145 assert((!
I || !
I->mayReadFromMemory()) &&
146 "underlying instruction may read from memory");
160 case VPFirstOrderRecurrencePHISC:
161 case VPPredInstPHISC:
162 case VPVectorEndPointerSC:
164 case VPInstructionSC: {
170 case VPWidenCallSC: {
174 case VPWidenIntrinsicSC:
177 case VPReductionEVLSC:
178 case VPPartialReductionSC:
180 case VPScalarIVStepsSC:
181 case VPVectorPointerSC:
182 case VPWidenCanonicalIVSC:
185 case VPWidenIntOrFpInductionSC:
187 case VPWidenPointerInductionSC:
189 case VPWidenSelectSC: {
193 assert((!
I || !
I->mayHaveSideEffects()) &&
194 "underlying instruction has side-effects");
197 case VPInterleaveEVLSC:
200 case VPWidenLoadEVLSC:
202 case VPWidenStoreEVLSC:
207 "mayHaveSideffects result for ingredient differs from this "
210 case VPReplicateSC: {
212 return R->getUnderlyingInstr()->mayHaveSideEffects();
220 assert(!Parent &&
"Recipe already in some VPBasicBlock");
222 "Insertion position not in any VPBasicBlock");
228 assert(!Parent &&
"Recipe already in some VPBasicBlock");
234 assert(!Parent &&
"Recipe already in some VPBasicBlock");
236 "Insertion position not in any VPBasicBlock");
271 UI = IG->getInsertPos();
273 UI = &WidenMem->getIngredient();
276 if (UI && Ctx.skipCostComputation(UI, VF.
isVector())) {
286 dbgs() <<
"Cost of " << RecipeCost <<
" for VF " << VF <<
": ";
310 std::optional<unsigned> Opcode;
317 auto *VecTy = Ctx.Types.inferScalarType(
Op);
318 auto *CondTy = Ctx.Types.inferScalarType(
getCondOp());
319 CondCost = Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy,
329 auto *PhiType = Ctx.Types.inferScalarType(
getChainOp());
330 auto *InputType = Ctx.Types.inferScalarType(
getVecOp());
331 return CondCost + Ctx.TTI.getPartialReductionCost(
332 getOpcode(), InputType, InputType, PhiType, VF,
337 Type *InputTypeA =
nullptr, *InputTypeB =
nullptr;
347 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
349 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
360 Opcode =
Widen->getOpcode();
363 InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->
getOperand(0)
364 :
Widen->getOperand(0));
365 InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->
getOperand(0)
366 :
Widen->getOperand(1));
367 ExtAType = GetExtendKind(ExtAR);
368 ExtBType = GetExtendKind(ExtBR);
374 InputTypeB = InputTypeA;
380 InputTypeA = Ctx.Types.inferScalarType(OpR->
getOperand(0));
381 ExtAType = GetExtendKind(OpR);
384 InputTypeA = Ctx.Types.inferScalarType(RedPhiOp1R->getOperand(0));
385 ExtAType = GetExtendKind(RedPhiOp1R);
391 return CondCost + Reduction->computeCost(VF, Ctx);
393 auto *PhiType = Ctx.Types.inferScalarType(
getOperand(1));
394 return CondCost + Ctx.TTI.getPartialReductionCost(
395 getOpcode(), InputTypeA, InputTypeB, PhiType, VF,
396 ExtAType, ExtBType, Opcode, Ctx.CostKind);
401 auto &Builder = State.Builder;
404 "Unhandled partial reduction opcode");
408 assert(PhiVal && BinOpVal &&
"Phi and Mul must be set");
415 BinOpVal = Builder.CreateSelect(
Cond, BinOpVal, Zero);
419 Builder.CreateIntrinsic(RetTy, Intrinsic::vector_partial_reduce_add,
420 {PhiVal, BinOpVal},
nullptr,
"partial.reduce");
425#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
428 O << Indent <<
"PARTIAL-REDUCE ";
436 assert(OpType == Other.OpType &&
"OpType must match");
438 case OperationType::OverflowingBinOp:
439 WrapFlags.HasNUW &= Other.WrapFlags.HasNUW;
440 WrapFlags.HasNSW &= Other.WrapFlags.HasNSW;
442 case OperationType::Trunc:
446 case OperationType::DisjointOp:
449 case OperationType::PossiblyExactOp:
450 ExactFlags.IsExact &= Other.ExactFlags.IsExact;
452 case OperationType::GEPOp:
455 case OperationType::FPMathOp:
456 case OperationType::FCmp:
457 assert((OpType != OperationType::FCmp ||
458 FCmpFlags.Pred == Other.FCmpFlags.Pred) &&
459 "Cannot drop CmpPredicate");
460 getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;
461 getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;
463 case OperationType::NonNegOp:
466 case OperationType::Cmp:
469 case OperationType::Other:
476 assert((OpType == OperationType::FPMathOp || OpType == OperationType::FCmp) &&
477 "recipe doesn't have fast math flags");
478 const FastMathFlagsTy &
F = getFMFsRef();
490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
503template <
unsigned PartOpIdx>
506 if (U.getNumOperands() == PartOpIdx + 1)
507 return U.getOperand(PartOpIdx);
511template <
unsigned PartOpIdx>
530 "Set flags not supported for the provided opcode");
531 assert((getNumOperandsForOpcode(Opcode) == -1u ||
533 "number of operands does not match opcode");
537unsigned VPInstruction::getNumOperandsForOpcode(
unsigned Opcode) {
548 case Instruction::Alloca:
549 case Instruction::ExtractValue:
550 case Instruction::Freeze:
551 case Instruction::Load:
568 case Instruction::ICmp:
569 case Instruction::FCmp:
570 case Instruction::ExtractElement:
571 case Instruction::Store:
581 case Instruction::Select:
588 case Instruction::Call:
589 case Instruction::GetElementPtr:
590 case Instruction::PHI:
591 case Instruction::Switch:
605bool VPInstruction::canGenerateScalarForFirstLane()
const {
611 case Instruction::Freeze:
612 case Instruction::ICmp:
613 case Instruction::PHI:
614 case Instruction::Select:
640 BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc);
642 BranchInst *CondBr = State.Builder.CreateCondBr(
Cond, IRBB, SecondIRSucc);
650 IRBuilderBase &Builder = State.
Builder;
669 case Instruction::ExtractElement: {
672 unsigned IdxToExtract =
680 case Instruction::Freeze: {
684 case Instruction::FCmp:
685 case Instruction::ICmp: {
691 case Instruction::PHI: {
694 case Instruction::Select: {
721 {VIVElem0, ScalarTC},
nullptr, Name);
737 if (!V1->getType()->isVectorTy())
757 "Requested vector length should be an integer.");
764 {AVL, VFArg, State.Builder.getTrue()});
770 assert(Part != 0 &&
"Must have a positive part");
801 for (
unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
825 IRBuilderBase::FastMathFlagGuard FMFG(Builder);
845 ReducedPartRdx,
"bin.rdx");
854 RecurKind RK = PhiR->getRecurrenceKind();
856 "Unexpected reduction kind");
857 assert(!PhiR->isInLoop() &&
858 "In-loop FindLastIV reduction is not supported yet");
870 for (
unsigned Part = 1; Part <
UF; ++Part)
871 ReducedPartRdx =
createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
885 RecurKind RK = PhiR->getRecurrenceKind();
887 "should be handled by ComputeFindIVResult");
893 for (
unsigned Part = 0; Part <
UF; ++Part)
894 RdxParts[Part] = State.
get(
getOperand(1 + Part), PhiR->isInLoop());
896 IRBuilderBase::FastMathFlagGuard FMFG(Builder);
901 Value *ReducedPartRdx = RdxParts[0];
902 if (PhiR->isOrdered()) {
903 ReducedPartRdx = RdxParts[
UF - 1];
906 for (
unsigned Part = 1; Part <
UF; ++Part) {
907 Value *RdxPart = RdxParts[Part];
909 ReducedPartRdx =
createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
915 Opcode = Instruction::Add;
920 Builder.
CreateBinOp(Opcode, RdxPart, ReducedPartRdx,
"bin.rdx");
927 if (State.
VF.
isVector() && !PhiR->isInLoop()) {
934 return ReducedPartRdx;
944 "invalid offset to extract from");
948 assert(
Offset <= 1 &&
"invalid offset to extract from");
962 "can only generate first lane for PtrAdd");
976 Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(
Op)));
977 return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
982 Value *Res =
nullptr;
987 Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
988 Value *VectorIdx = Idx == 1
990 : Builder.CreateSub(LaneToExtract, VectorStart);
991 Value *Ext = State.VF.isScalar()
993 : Builder.CreateExtractElement(
996 Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
997 Res = Builder.CreateSelect(Cmp, Ext, Res);
1016 Value *Res =
nullptr;
1017 for (
int Idx = LastOpIdx; Idx >= 0; --Idx) {
1018 Value *TrailingZeros =
1048 Type *ScalarTy = Ctx.Types.inferScalarType(
this);
1051 case Instruction::FNeg:
1052 return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);
1053 case Instruction::UDiv:
1054 case Instruction::SDiv:
1055 case Instruction::SRem:
1056 case Instruction::URem:
1057 case Instruction::Add:
1058 case Instruction::FAdd:
1059 case Instruction::Sub:
1060 case Instruction::FSub:
1061 case Instruction::Mul:
1062 case Instruction::FMul:
1063 case Instruction::FDiv:
1064 case Instruction::FRem:
1065 case Instruction::Shl:
1066 case Instruction::LShr:
1067 case Instruction::AShr:
1068 case Instruction::And:
1069 case Instruction::Or:
1070 case Instruction::Xor: {
1078 RHSInfo = Ctx.getOperandInfo(RHS);
1089 return Ctx.TTI.getArithmeticInstrCost(
1090 Opcode, ResultTy, Ctx.CostKind,
1091 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1092 RHSInfo, Operands, CtxI, &Ctx.TLI);
1094 case Instruction::Freeze:
1096 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, ResultTy,
1098 case Instruction::ExtractValue:
1099 return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
1101 case Instruction::ICmp:
1102 case Instruction::FCmp: {
1106 return Ctx.TTI.getCmpSelInstrCost(
1108 Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
1109 {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1125 "Should only generate a vector value or single scalar, not scalars "
1133 case Instruction::Select: {
1137 auto *CondTy = Ctx.Types.inferScalarType(
getOperand(0));
1138 auto *VecTy = Ctx.Types.inferScalarType(
getOperand(1));
1143 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred,
1146 case Instruction::ExtractElement:
1156 return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
1160 auto *VecTy =
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
1161 return Ctx.TTI.getArithmeticReductionCost(
1167 return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
1174 {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
1175 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1181 Type *VectorTy =
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
1190 unsigned Multiplier =
1195 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1202 I32Ty, {Arg0Ty, I32Ty, I1Ty});
1203 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1208 return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,
1209 VecTy, Ctx.CostKind, 0);
1219 "unexpected VPInstruction witht underlying value");
1228 getOpcode() == Instruction::ExtractElement ||
1239 case Instruction::PHI:
1250 assert(!State.Lane &&
"VPInstruction executing an Lane");
1253 "Set flags not supported for the provided opcode");
1256 Value *GeneratedValue = generate(State);
1259 assert(GeneratedValue &&
"generate must produce a value");
1260 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
1265 !GeneratesPerFirstLaneOnly) ||
1266 State.VF.isScalar()) &&
1267 "scalar value but not only first lane defined");
1268 State.set(
this, GeneratedValue,
1269 GeneratesPerFirstLaneOnly);
1276 case Instruction::ExtractElement:
1277 case Instruction::Freeze:
1278 case Instruction::FCmp:
1279 case Instruction::ICmp:
1280 case Instruction::Select:
1281 case Instruction::PHI:
1321 case Instruction::ExtractElement:
1323 case Instruction::PHI:
1325 case Instruction::FCmp:
1326 case Instruction::ICmp:
1327 case Instruction::Select:
1328 case Instruction::Or:
1329 case Instruction::Freeze:
1370 case Instruction::FCmp:
1371 case Instruction::ICmp:
1372 case Instruction::Select:
1382#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1390 O << Indent <<
"EMIT" << (
isSingleScalar() ?
"-SCALAR" :
"") <<
" ";
1402 O <<
"combined load";
1405 O <<
"combined store";
1408 O <<
"active lane mask";
1411 O <<
"EXPLICIT-VECTOR-LENGTH";
1414 O <<
"first-order splice";
1417 O <<
"branch-on-cond";
1420 O <<
"TC > VF ? TC - VF : 0";
1426 O <<
"branch-on-count";
1432 O <<
"buildstructvector";
1438 O <<
"extract-lane";
1441 O <<
"extract-last-element";
1444 O <<
"extract-last-lane-per-part";
1447 O <<
"extract-penultimate-element";
1450 O <<
"compute-anyof-result";
1453 O <<
"compute-find-iv-result";
1456 O <<
"compute-reduction-result";
1471 O <<
"first-active-lane";
1474 O <<
"reduction-start-vector";
1477 O <<
"resume-for-epilogue";
1497 State.set(
this, Cast,
VPLane(0));
1508 Value *
VScale = State.Builder.CreateVScale(ResultTy);
1509 State.set(
this,
VScale,
true);
1518#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1521 O << Indent <<
"EMIT" << (
isSingleScalar() ?
"-SCALAR" :
"") <<
" ";
1527 O <<
"wide-iv-step ";
1531 O <<
"step-vector " << *ResultTy;
1534 O <<
"vscale " << *ResultTy;
1540 O <<
" to " << *ResultTy;
1547 PHINode *NewPhi = State.Builder.CreatePHI(
1548 State.TypeAnalysis.inferScalarType(
this), 2,
getName());
1555 for (
unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
1560 State.set(
this, NewPhi,
VPLane(0));
1563#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1566 O << Indent <<
"EMIT" << (
isSingleScalar() ?
"-SCALAR" :
"") <<
" ";
1581 "PHINodes must be handled by VPIRPhi");
1584 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
1596 "can only update exiting operands to phi nodes");
1606#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1609 O << Indent <<
"IR " << I;
1621 auto *PredVPBB = Pred->getExitingBasicBlock();
1622 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
1629 if (Phi->getBasicBlockIndex(PredBB) == -1)
1630 Phi->addIncoming(V, PredBB);
1632 Phi->setIncomingValueForBlock(PredBB, V);
1637 State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
1642 assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
1643 "Number of phi operands must match number of predecessors");
1644 unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
1645 R->removeOperand(Position);
1648#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1662#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1668 O <<
" (extra operand" << (
getNumOperands() > 1 ?
"s" :
"") <<
": ";
1673 std::get<1>(
Op)->printAsOperand(O);
1681 for (
const auto &[Kind,
Node] : Metadata)
1682 I.setMetadata(Kind,
Node);
1687 for (
const auto &[KindA, MDA] : Metadata) {
1688 for (
const auto &[KindB, MDB] :
Other.Metadata) {
1689 if (KindA == KindB && MDA == MDB) {
1695 Metadata = std::move(MetadataIntersection);
1699 assert(State.VF.isVector() &&
"not widening");
1700 assert(Variant !=
nullptr &&
"Can't create vector function.");
1711 Arg = State.get(
I.value(),
VPLane(0));
1714 Args.push_back(Arg);
1720 CI->getOperandBundlesAsDefs(OpBundles);
1722 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
1725 V->setCallingConv(Variant->getCallingConv());
1727 if (!V->getType()->isVoidTy())
1733 return Ctx.TTI.getCallInstrCost(
nullptr, Variant->getReturnType(),
1734 Variant->getFunctionType()->params(),
1738#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1741 O << Indent <<
"WIDEN-CALL ";
1753 O <<
" @" << CalledFn->
getName() <<
"(";
1759 O <<
" (using library function";
1760 if (Variant->hasName())
1761 O <<
": " << Variant->getName();
1767 assert(State.VF.isVector() &&
"not widening");
1780 Arg = State.get(
I.value(),
VPLane(0));
1786 Args.push_back(Arg);
1790 Module *M = State.Builder.GetInsertBlock()->getModule();
1794 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1799 CI->getOperandBundlesAsDefs(OpBundles);
1801 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1806 if (!V->getType()->isVoidTy())
1822 for (
const auto &[Idx,
Op] :
enumerate(Operands)) {
1823 auto *V =
Op->getUnderlyingValue();
1826 Arguments.push_back(UI->getArgOperand(Idx));
1835 Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
1841 : Ctx.Types.inferScalarType(
Op));
1846 R.hasFastMathFlags() ? R.getFastMathFlags() :
FastMathFlags();
1851 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
1873#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1876 O << Indent <<
"WIDEN-INTRINSIC ";
1877 if (ResultTy->isVoidTy()) {
1905 Value *Mask =
nullptr;
1907 Mask = State.get(VPMask);
1910 Builder.CreateVectorSplat(VTy->
getElementCount(), Builder.getInt1(1));
1914 if (Opcode == Instruction::Sub)
1915 IncAmt = Builder.CreateNeg(IncAmt);
1917 assert(Opcode == Instruction::Add &&
"only add or sub supported for now");
1919 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1934 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1940 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
1953 {PtrTy, IncTy, MaskTy});
1956 return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
1957 Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
1960#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1963 O << Indent <<
"WIDEN-HISTOGRAM buckets: ";
1966 if (Opcode == Instruction::Sub)
1969 assert(Opcode == Instruction::Add);
1982 O << Indent <<
"WIDEN-SELECT ";
2001 Value *Sel = State.Builder.CreateSelect(
Cond, Op0, Op1);
2002 State.set(
this, Sel);
2014 Type *ScalarTy = Ctx.Types.inferScalarType(
this);
2015 Type *VectorTy =
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
2023 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
2024 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
2028 [](
VPValue *
Op) {
return Op->getUnderlyingValue(); }))
2029 Operands.
append(
SI->op_begin(),
SI->op_end());
2031 return Ctx.TTI.getArithmeticInstrCost(
2032 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,
2033 Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands,
SI);
2042 Pred = Cmp->getPredicate();
2043 return Ctx.TTI.getCmpSelInstrCost(
2044 Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
2045 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
SI);
2048VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(
const FastMathFlags &FMF) {
2061 case OperationType::OverflowingBinOp:
2062 return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
2063 Opcode == Instruction::Mul || Opcode == Instruction::Shl ||
2064 Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
2065 case OperationType::Trunc:
2066 return Opcode == Instruction::Trunc;
2067 case OperationType::DisjointOp:
2068 return Opcode == Instruction::Or;
2069 case OperationType::PossiblyExactOp:
2070 return Opcode == Instruction::AShr || Opcode == Instruction::LShr ||
2071 Opcode == Instruction::UDiv || Opcode == Instruction::SDiv;
2072 case OperationType::GEPOp:
2073 return Opcode == Instruction::GetElementPtr ||
2076 case OperationType::FPMathOp:
2077 return Opcode == Instruction::Call || Opcode == Instruction::FAdd ||
2078 Opcode == Instruction::FMul || Opcode == Instruction::FSub ||
2079 Opcode == Instruction::FNeg || Opcode == Instruction::FDiv ||
2080 Opcode == Instruction::FRem || Opcode == Instruction::FPExt ||
2081 Opcode == Instruction::FPTrunc || Opcode == Instruction::Select ||
2085 case OperationType::FCmp:
2086 return Opcode == Instruction::FCmp;
2087 case OperationType::NonNegOp:
2088 return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;
2089 case OperationType::Cmp:
2090 return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
2091 case OperationType::Other:
2098#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2101 case OperationType::Cmp:
2104 case OperationType::FCmp:
2108 case OperationType::DisjointOp:
2112 case OperationType::PossiblyExactOp:
2116 case OperationType::OverflowingBinOp:
2122 case OperationType::Trunc:
2128 case OperationType::FPMathOp:
2131 case OperationType::GEPOp:
2134 else if (
GEPFlags.hasNoUnsignedSignedWrap())
2139 case OperationType::NonNegOp:
2143 case OperationType::Other:
2151 auto &Builder = State.Builder;
2153 case Instruction::Call:
2154 case Instruction::Br:
2155 case Instruction::PHI:
2156 case Instruction::GetElementPtr:
2157 case Instruction::Select:
2159 case Instruction::UDiv:
2160 case Instruction::SDiv:
2161 case Instruction::SRem:
2162 case Instruction::URem:
2163 case Instruction::Add:
2164 case Instruction::FAdd:
2165 case Instruction::Sub:
2166 case Instruction::FSub:
2167 case Instruction::FNeg:
2168 case Instruction::Mul:
2169 case Instruction::FMul:
2170 case Instruction::FDiv:
2171 case Instruction::FRem:
2172 case Instruction::Shl:
2173 case Instruction::LShr:
2174 case Instruction::AShr:
2175 case Instruction::And:
2176 case Instruction::Or:
2177 case Instruction::Xor: {
2181 Ops.push_back(State.get(VPOp));
2183 Value *V = Builder.CreateNAryOp(Opcode,
Ops);
2194 case Instruction::ExtractValue: {
2198 Value *Extract = Builder.CreateExtractValue(
Op, CI->getZExtValue());
2199 State.set(
this, Extract);
2202 case Instruction::Freeze: {
2204 Value *Freeze = Builder.CreateFreeze(
Op);
2205 State.set(
this, Freeze);
2208 case Instruction::ICmp:
2209 case Instruction::FCmp: {
2211 bool FCmp = Opcode == Instruction::FCmp;
2238 State.get(
this)->getType() &&
2239 "inferred type and type from generated instructions do not match");
2246 case Instruction::UDiv:
2247 case Instruction::SDiv:
2248 case Instruction::SRem:
2249 case Instruction::URem:
2254 case Instruction::FNeg:
2255 case Instruction::Add:
2256 case Instruction::FAdd:
2257 case Instruction::Sub:
2258 case Instruction::FSub:
2259 case Instruction::Mul:
2260 case Instruction::FMul:
2261 case Instruction::FDiv:
2262 case Instruction::FRem:
2263 case Instruction::Shl:
2264 case Instruction::LShr:
2265 case Instruction::AShr:
2266 case Instruction::And:
2267 case Instruction::Or:
2268 case Instruction::Xor:
2269 case Instruction::Freeze:
2270 case Instruction::ExtractValue:
2271 case Instruction::ICmp:
2272 case Instruction::FCmp:
2279#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2282 O << Indent <<
"WIDEN ";
2291 auto &Builder = State.Builder;
2293 assert(State.VF.isVector() &&
"Not vectorizing?");
2298 State.set(
this, Cast);
2322 if (WidenMemoryRecipe ==
nullptr)
2324 if (!WidenMemoryRecipe->isConsecutive())
2326 if (WidenMemoryRecipe->isReverse())
2328 if (WidenMemoryRecipe->isMasked())
2336 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
2339 CCH = ComputeCCH(StoreRecipe);
2342 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
2343 Opcode == Instruction::FPExt) {
2354 return Ctx.TTI.getCastInstrCost(
2355 Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
2359#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2362 O << Indent <<
"WIDEN-CAST ";
2373 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2380 : ConstantFP::get(Ty,
C);
2383#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2388 O <<
" = WIDEN-INDUCTION";
2394 O <<
" (truncated to " << *TI->getType() <<
")";
2406 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
2410#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2415 O <<
" = DERIVED-IV ";
2439 assert(BaseIVTy == Step->
getType() &&
"Types of BaseIV and Step must match!");
2446 AddOp = Instruction::Add;
2447 MulOp = Instruction::Mul;
2449 AddOp = InductionOpcode;
2450 MulOp = Instruction::FMul;
2459 Type *VecIVTy =
nullptr;
2460 Value *UnitStepVec =
nullptr, *SplatStep =
nullptr, *SplatIV =
nullptr;
2461 if (!FirstLaneOnly && State.VF.isScalable()) {
2465 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
2466 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
2469 unsigned StartLane = 0;
2470 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
2472 StartLane = State.Lane->getKnownLane();
2473 EndLane = StartLane + 1;
2477 StartIdx0 = ConstantInt::get(IntStepTy, 0);
2482 Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->
getType(),
2485 StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
2488 if (!FirstLaneOnly && State.VF.isScalable()) {
2489 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
2490 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
2492 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
2493 auto *
Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
2494 auto *
Add = Builder.CreateBinOp(AddOp, SplatIV,
Mul);
2495 State.set(
this,
Add);
2502 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
2504 for (
unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
2505 Value *StartIdx = Builder.CreateBinOp(
2510 "Expected StartIdx to be folded to a constant when VF is not "
2512 auto *
Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
2513 auto *
Add = Builder.CreateBinOp(AddOp, BaseIV,
Mul);
2518#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2523 O <<
" = SCALAR-STEPS ";
2529 assert(State.VF.isVector() &&
"not widening");
2536 if (areAllOperandsInvariant()) {
2556 Value *
Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
2557 State.set(
this,
Splat);
2563 auto *Ptr = State.get(
getOperand(0), isPointerLoopInvariant());
2570 Indices.
push_back(State.get(Operand, isIndexLoopInvariant(
I - 1)));
2577 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2578 "NewGEP is not a pointer vector");
2579 State.set(
this, NewGEP);
2583#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2586 O << Indent <<
"WIDEN-GEP ";
2587 O << (isPointerLoopInvariant() ?
"Inv" :
"Var");
2589 O <<
"[" << (isIndexLoopInvariant(
I) ?
"Inv" :
"Var") <<
"]";
2593 O <<
" = getelementptr";
2603 const DataLayout &
DL = Builder.GetInsertBlock()->getDataLayout();
2604 return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
2605 ?
DL.getIndexType(Builder.getPtrTy(0))
2606 : Builder.getInt32Ty();
2610 auto &Builder = State.Builder;
2612 bool IsUnitStride = Stride == 1 || Stride == -1;
2614 IsUnitStride, CurrentPart, Builder);
2618 if (IndexTy != RunTimeVF->
getType())
2619 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2621 Value *NumElt = Builder.CreateMul(
2622 ConstantInt::get(IndexTy, Stride * (int64_t)CurrentPart), RunTimeVF);
2624 Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1));
2626 LastLane = Builder.CreateMul(ConstantInt::get(IndexTy, Stride), LastLane);
2630 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane,
"",
2633 State.set(
this, ResultPtr,
true);
2636#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2641 O <<
" = vector-end-pointer";
2648 auto &Builder = State.Builder;
2651 true, CurrentPart, Builder);
2658 State.set(
this, ResultPtr,
true);
2661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2666 O <<
" = vector-pointer ";
2677 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2679 Type *ResultTy =
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
2682 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2689 O << Indent <<
"BLEND ";
2711 assert(!State.Lane &&
"Reduction being replicated.");
2715 "In-loop AnyOf reductions aren't currently supported");
2721 Value *NewCond = State.get(
Cond, State.VF.isScalar());
2726 if (State.VF.isVector())
2727 Start = State.Builder.CreateVectorSplat(VecTy->
getElementCount(), Start);
2729 Value *
Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2735 if (State.VF.isVector())
2739 NewRed = State.Builder.CreateBinOp(
2741 PrevInChain, NewVecOp);
2742 PrevInChain = NewRed;
2743 NextInChain = NewRed;
2748 NextInChain =
createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
2750 NextInChain = State.Builder.CreateBinOp(
2752 PrevInChain, NewRed);
2754 State.set(
this, NextInChain,
true);
2758 assert(!State.Lane &&
"Reduction being replicated.");
2760 auto &Builder = State.Builder;
2772 Mask = State.get(CondOp);
2774 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2784 NewRed = Builder.CreateBinOp(
2788 State.set(
this, NewRed,
true);
2794 Type *ElementTy = Ctx.Types.inferScalarType(
this);
2798 std::optional<FastMathFlags> OptionalFMF =
2805 "Any-of reduction not implemented in VPlan-based cost model currently.");
2811 return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy,
FMFs, Ctx.CostKind);
2816 return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
2821 ExpressionTypes ExpressionType,
2824 ExpressionRecipes(ExpressionRecipes),
ExpressionType(ExpressionType) {
2825 assert(!ExpressionRecipes.empty() &&
"Nothing to combine?");
2829 "expression cannot contain recipes with side-effects");
2833 for (
auto *R : ExpressionRecipes)
2834 ExpressionRecipesAsSetOfUsers.
insert(R);
2840 if (R != ExpressionRecipes.back() &&
2841 any_of(
R->users(), [&ExpressionRecipesAsSetOfUsers](
VPUser *U) {
2842 return !ExpressionRecipesAsSetOfUsers.contains(U);
2847 R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers](
2849 return !ExpressionRecipesAsSetOfUsers.contains(&U);
2854 R->removeFromParent();
2861 for (
auto *R : ExpressionRecipes) {
2862 for (
const auto &[Idx,
Op] :
enumerate(
R->operands())) {
2863 auto *
Def =
Op->getDefiningRecipe();
2864 if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
2867 LiveInPlaceholders.push_back(
new VPValue());
2873 for (
auto *R : ExpressionRecipes)
2874 for (
auto const &[LiveIn, Tmp] :
zip(operands(), LiveInPlaceholders))
2875 R->replaceUsesOfWith(LiveIn, Tmp);
2879 for (
auto *R : ExpressionRecipes)
2882 if (!R->getParent())
2883 R->insertBefore(
this);
2886 LiveInPlaceholders[Idx]->replaceAllUsesWith(
Op);
2889 ExpressionRecipes.clear();
2894 Type *RedTy = Ctx.Types.inferScalarType(
this);
2898 "VPExpressionRecipe only supports integer types currently.");
2901 switch (ExpressionType) {
2902 case ExpressionTypes::ExtendedReduction: {
2907 ? Ctx.TTI.getPartialReductionCost(
2908 Opcode, Ctx.Types.inferScalarType(
getOperand(0)),
nullptr,
2913 : Ctx.TTI.getExtendedReductionCost(
2914 Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy,
2915 SrcVecTy, std::nullopt, Ctx.CostKind);
2917 case ExpressionTypes::MulAccReduction:
2918 return Ctx.TTI.getMulAccReductionCost(
false, Opcode, RedTy, SrcVecTy,
2921 case ExpressionTypes::ExtNegatedMulAccReduction:
2922 assert(Opcode == Instruction::Add &&
"Unexpected opcode");
2923 Opcode = Instruction::Sub;
2925 case ExpressionTypes::ExtMulAccReduction: {
2930 return Ctx.TTI.getPartialReductionCost(
2931 Opcode, Ctx.Types.inferScalarType(
getOperand(0)),
2932 Ctx.Types.inferScalarType(
getOperand(1)), RedTy, VF,
2934 Ext0R->getOpcode()),
2936 Ext1R->getOpcode()),
2937 Mul->getOpcode(), Ctx.CostKind);
2939 return Ctx.TTI.getMulAccReductionCost(
2942 Opcode, RedTy, SrcVecTy, Ctx.CostKind);
2950 return R->mayReadFromMemory() || R->mayWriteToMemory();
2958 "expression cannot contain recipes with side-effects");
2969#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2973 O << Indent <<
"EXPRESSION ";
2980 switch (ExpressionType) {
2981 case ExpressionTypes::ExtendedReduction: {
2983 O <<
" + " << (IsPartialReduction ?
"partial." :
"") <<
"reduce.";
2990 << *Ext0->getResultType();
2991 if (Red->isConditional()) {
2998 case ExpressionTypes::ExtNegatedMulAccReduction: {
3000 O <<
" + " << (IsPartialReduction ?
"partial." :
"") <<
"reduce.";
3010 << *Ext0->getResultType() <<
"), (";
3014 << *Ext1->getResultType() <<
")";
3015 if (Red->isConditional()) {
3022 case ExpressionTypes::MulAccReduction:
3023 case ExpressionTypes::ExtMulAccReduction: {
3025 O <<
" + " << (IsPartialReduction ?
"partial." :
"") <<
"reduce.";
3030 bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction;
3032 : ExpressionRecipes[0]);
3040 << *Ext0->getResultType() <<
"), (";
3048 << *Ext1->getResultType() <<
")";
3050 if (Red->isConditional()) {
3062 O << Indent <<
"REDUCE ";
3082 O << Indent <<
"REDUCE ";
3110 assert((!Instr->getType()->isAggregateType() ||
3112 "Expected vectorizable or non-aggregate type.");
3115 bool IsVoidRetTy = Instr->getType()->isVoidTy();
3119 Cloned->
setName(Instr->getName() +
".cloned");
3120 Type *ResultTy = State.TypeAnalysis.inferScalarType(RepRecipe);
3124 if (ResultTy != Cloned->
getType())
3135 State.setDebugLocFrom(
DL);
3140 auto InputLane = Lane;
3144 Cloned->
setOperand(
I.index(), State.get(Operand, InputLane));
3148 State.Builder.Insert(Cloned);
3150 State.set(RepRecipe, Cloned, Lane);
3154 State.AC->registerAssumption(
II);
3160 [](
VPValue *
Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
3161 "Expected a recipe is either within a region or all of its operands "
3162 "are defined outside the vectorized region.");
3169 assert(IsSingleScalar &&
"VPReplicateRecipes outside replicate regions "
3170 "must have already been unrolled");
3176 "uniform recipe shouldn't be predicated");
3177 assert(!State.VF.isScalable() &&
"Can't scalarize a scalable vector");
3182 State.Lane->isFirstLane()
3185 State.set(
this, State.packScalarIntoVectorizedValue(
this, WideValue,
3210 Instruction::GetElementPtr) ||
3218 if (!Opd->isDefinedOutsideLoopRegions() &&
3232 while (!WorkList.
empty()) {
3234 if (!Cur || !Seen.
insert(Cur).second)
3242 return Seen.contains(
3243 Blend->getIncomingValue(I)->getDefiningRecipe());
3247 for (
VPUser *U : Cur->users()) {
3249 if (InterleaveR->getAddr() == Cur)
3252 if (RepR->getOpcode() == Instruction::Load &&
3253 RepR->getOperand(0) == Cur)
3255 if (RepR->getOpcode() == Instruction::Store &&
3256 RepR->getOperand(1) == Cur)
3260 if (MemR->getAddr() == Cur && MemR->isConsecutive())
3281 Ctx.SkipCostComputation.insert(UI);
3287 case Instruction::GetElementPtr:
3293 case Instruction::Call: {
3299 for (
const VPValue *ArgOp : ArgOps)
3300 Tys.
push_back(Ctx.Types.inferScalarType(ArgOp));
3302 if (CalledFn->isIntrinsic())
3305 switch (CalledFn->getIntrinsicID()) {
3306 case Intrinsic::assume:
3307 case Intrinsic::lifetime_end:
3308 case Intrinsic::lifetime_start:
3309 case Intrinsic::sideeffect:
3310 case Intrinsic::pseudoprobe:
3311 case Intrinsic::experimental_noalias_scope_decl: {
3314 "scalarizing intrinsic should be free");
3321 Type *ResultTy = Ctx.Types.inferScalarType(
this);
3323 Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
3325 if (CalledFn->isIntrinsic())
3326 ScalarCallCost = std::min(
3330 return ScalarCallCost;
3334 Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF);
3336 case Instruction::Add:
3337 case Instruction::Sub:
3338 case Instruction::FAdd:
3339 case Instruction::FSub:
3340 case Instruction::Mul:
3341 case Instruction::FMul:
3342 case Instruction::FDiv:
3343 case Instruction::FRem:
3344 case Instruction::Shl:
3345 case Instruction::LShr:
3346 case Instruction::AShr:
3347 case Instruction::And:
3348 case Instruction::Or:
3349 case Instruction::Xor:
3350 case Instruction::ICmp:
3351 case Instruction::FCmp:
3355 case Instruction::SDiv:
3356 case Instruction::UDiv:
3357 case Instruction::SRem:
3358 case Instruction::URem: {
3365 Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(
this),
3374 Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
3378 ScalarCost /= Ctx.getPredBlockCostDivisor(UI->
getParent());
3381 case Instruction::Load:
3382 case Instruction::Store: {
3389 bool IsLoad = UI->
getOpcode() == Instruction::Load;
3395 Type *ValTy = Ctx.Types.inferScalarType(IsLoad ?
this :
getOperand(0));
3396 Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp);
3401 UI->
getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
3404 bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
3405 bool UsedByLoadStoreAddress =
3408 ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
3409 PtrTy, UsedByLoadStoreAddress ?
nullptr : &Ctx.SE,
3410 PtrSCEV, Ctx.CostKind);
3420 if (!UsedByLoadStoreAddress) {
3421 bool EfficientVectorLoadStore =
3422 Ctx.TTI.supportsEfficientVectorElementLoadStore();
3423 if (!(IsLoad && !PreferVectorizedAddressing) &&
3424 !(!IsLoad && EfficientVectorLoadStore))
3427 if (!EfficientVectorLoadStore)
3428 ResultTy = Ctx.Types.inferScalarType(
this);
3432 Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF,
true);
3436 return Ctx.getLegacyCost(UI, VF);
3439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3442 O << Indent << (IsSingleScalar ?
"CLONE " :
"REPLICATE ");
3451 O <<
"@" << CB->getCalledFunction()->getName() <<
"(";
3469 assert(State.Lane &&
"Branch on Mask works only on single instance.");
3472 Value *ConditionBit = State.get(BlockInMask, *State.Lane);
3476 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
3478 "Expected to replace unreachable terminator with conditional branch.");
3480 State.Builder.CreateCondBr(ConditionBit, State.CFG.PrevBB,
nullptr);
3481 CondBr->setSuccessor(0,
nullptr);
3482 CurrentTerminator->eraseFromParent();
3494 assert(State.Lane &&
"Predicated instruction PHI works per instance.");
3499 assert(PredicatingBB &&
"Predicated block has no single predecessor.");
3501 "operand must be VPReplicateRecipe");
3512 "Packed operands must generate an insertelement or insertvalue");
3520 for (
unsigned I = 0;
I < StructTy->getNumContainedTypes() - 1;
I++)
3523 PHINode *VPhi = State.Builder.CreatePHI(VecI->getType(), 2);
3524 VPhi->
addIncoming(VecI->getOperand(0), PredicatingBB);
3526 if (State.hasVectorValue(
this))
3527 State.reset(
this, VPhi);
3529 State.set(
this, VPhi);
3537 Type *PredInstType = State.TypeAnalysis.inferScalarType(
getOperand(0));
3538 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
3541 Phi->addIncoming(ScalarPredInst, PredicatedBB);
3542 if (State.hasScalarValue(
this, *State.Lane))
3543 State.reset(
this, Phi, *State.Lane);
3545 State.set(
this, Phi, *State.Lane);
3548 State.reset(
getOperand(0), Phi, *State.Lane);
3552#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3555 O << Indent <<
"PHI-PREDICATED-INSTRUCTION ";
3566 ->getAddressSpace();
3569 : Instruction::Store;
3576 "Inconsecutive memory access should not have the order.");
3586 return Ctx.TTI.getAddressComputationCost(PtrTy,
nullptr,
nullptr,
3595 : Intrinsic::masked_store;
3597 Ctx.TTI.getMaskedMemoryOpCost({IID, Ty,
Alignment, AS}, Ctx.CostKind);
3602 Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty,
Alignment, AS, Ctx.CostKind,
3608 return Cost += Ctx.TTI.getShuffleCost(
3618 auto &Builder = State.Builder;
3619 Value *Mask =
nullptr;
3620 if (
auto *VPMask =
getMask()) {
3623 Mask = State.get(VPMask);
3625 Mask = Builder.CreateVectorReverse(Mask,
"reverse");
3631 NewLI = Builder.CreateMaskedGather(DataTy, Addr,
Alignment, Mask,
nullptr,
3632 "wide.masked.gather");
3635 Builder.CreateMaskedLoad(DataTy, Addr,
Alignment, Mask,
3638 NewLI = Builder.CreateAlignedLoad(DataTy, Addr,
Alignment,
"wide.load");
3642 NewLI = Builder.CreateVectorReverse(NewLI,
"reverse");
3643 State.set(
this, NewLI);
3646#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3649 O << Indent <<
"WIDEN ";
3661 Value *AllTrueMask =
3662 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
3663 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
3664 {Operand, AllTrueMask, EVL},
nullptr, Name);
3672 auto &Builder = State.Builder;
3676 Value *Mask =
nullptr;
3678 Mask = State.get(VPMask);
3682 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3687 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
3688 nullptr,
"wide.masked.gather");
3690 NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_load,
3691 {Addr, Mask, EVL},
nullptr,
"vp.op.load");
3699 State.set(
this, Res);
3714 ->getAddressSpace();
3718 {Intrinsic::masked_load, Ty,
Alignment, AS}, Ctx.CostKind);
3722 return Cost + Ctx.TTI.getShuffleCost(
3727#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3730 O << Indent <<
"WIDEN ";
3741 auto &Builder = State.Builder;
3743 Value *Mask =
nullptr;
3744 if (
auto *VPMask =
getMask()) {
3747 Mask = State.get(VPMask);
3749 Mask = Builder.CreateVectorReverse(Mask,
"reverse");
3752 Value *StoredVal = State.get(StoredVPValue);
3756 StoredVal = Builder.CreateVectorReverse(StoredVal,
"reverse");
3763 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr,
Alignment, Mask);
3765 NewSI = Builder.CreateMaskedStore(StoredVal, Addr,
Alignment, Mask);
3767 NewSI = Builder.CreateAlignedStore(StoredVal, Addr,
Alignment);
3771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3774 O << Indent <<
"WIDEN store ";
3783 auto &Builder = State.Builder;
3786 Value *StoredVal = State.get(StoredValue);
3790 Value *Mask =
nullptr;
3792 Mask = State.get(VPMask);
3796 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3799 if (CreateScatter) {
3801 Intrinsic::vp_scatter,
3802 {StoredVal, Addr, Mask, EVL});
3805 Intrinsic::vp_store,
3806 {StoredVal, Addr, Mask, EVL});
3825 ->getAddressSpace();
3829 {Intrinsic::masked_store, Ty,
Alignment, AS}, Ctx.CostKind);
3833 return Cost + Ctx.TTI.getShuffleCost(
3838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3841 O << Indent <<
"WIDEN vp.store ";
3849 auto VF = DstVTy->getElementCount();
3851 assert(VF == SrcVecTy->getElementCount() &&
"Vector dimensions do not match");
3852 Type *SrcElemTy = SrcVecTy->getElementType();
3853 Type *DstElemTy = DstVTy->getElementType();
3854 assert((
DL.getTypeSizeInBits(SrcElemTy) ==
DL.getTypeSizeInBits(DstElemTy)) &&
3855 "Vector elements must have same size");
3859 return Builder.CreateBitOrPointerCast(V, DstVTy);
3866 "Only one type should be a pointer type");
3868 "Only one type should be a floating point type");
3872 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
3873 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
3879 const Twine &Name) {
3880 unsigned Factor = Vals.
size();
3881 assert(Factor > 1 &&
"Tried to interleave invalid number of vectors");
3885 for (
Value *Val : Vals)
3886 assert(Val->getType() == VecTy &&
"Tried to interleave mismatched types");
3891 if (VecTy->isScalableTy()) {
3892 assert(Factor <= 8 &&
"Unsupported interleave factor for scalable vectors");
3893 return Builder.CreateVectorInterleave(Vals, Name);
3900 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
3901 return Builder.CreateShuffleVector(
3934 assert(!State.Lane &&
"Interleave group being replicated.");
3936 "Masking gaps for scalable vectors is not yet supported.");
3942 unsigned InterleaveFactor = Group->
getFactor();
3949 auto CreateGroupMask = [&BlockInMask, &State,
3950 &InterleaveFactor](
Value *MaskForGaps) ->
Value * {
3951 if (State.VF.isScalable()) {
3952 assert(!MaskForGaps &&
"Interleaved groups with gaps are not supported.");
3953 assert(InterleaveFactor <= 8 &&
3954 "Unsupported deinterleave factor for scalable vectors");
3955 auto *ResBlockInMask = State.get(BlockInMask);
3963 Value *ResBlockInMask = State.get(BlockInMask);
3964 Value *ShuffledMask = State.Builder.CreateShuffleVector(
3967 "interleaved.mask");
3968 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
3969 ShuffledMask, MaskForGaps)
3973 const DataLayout &DL = Instr->getDataLayout();
3976 Value *MaskForGaps =
nullptr;
3980 assert(MaskForGaps &&
"Mask for Gaps is required but it is null");
3984 if (BlockInMask || MaskForGaps) {
3985 Value *GroupMask = CreateGroupMask(MaskForGaps);
3987 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
3989 PoisonVec,
"wide.masked.vec");
3991 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
3998 if (VecTy->isScalableTy()) {
4001 assert(InterleaveFactor <= 8 &&
4002 "Unsupported deinterleave factor for scalable vectors");
4003 NewLoad = State.Builder.CreateIntrinsic(
4006 nullptr,
"strided.vec");
4009 auto CreateStridedVector = [&InterleaveFactor, &State,
4010 &NewLoad](
unsigned Index) ->
Value * {
4011 assert(Index < InterleaveFactor &&
"Illegal group index");
4012 if (State.VF.isScalable())
4013 return State.Builder.CreateExtractValue(NewLoad, Index);
4019 return State.Builder.CreateShuffleVector(NewLoad, StrideMask,
4023 for (
unsigned I = 0, J = 0;
I < InterleaveFactor; ++
I) {
4030 Value *StridedVec = CreateStridedVector(
I);
4033 if (Member->getType() != ScalarTy) {
4040 StridedVec = State.Builder.CreateVectorReverse(StridedVec,
"reverse");
4042 State.set(VPDefs[J], StridedVec);
4052 Value *MaskForGaps =
4055 "Mismatch between NeedsMaskForGaps and MaskForGaps");
4059 unsigned StoredIdx = 0;
4060 for (
unsigned i = 0; i < InterleaveFactor; i++) {
4062 "Fail to get a member from an interleaved store group");
4072 Value *StoredVec = State.get(StoredValues[StoredIdx]);
4076 StoredVec = State.Builder.CreateVectorReverse(StoredVec,
"reverse");
4080 if (StoredVec->
getType() != SubVT)
4089 if (BlockInMask || MaskForGaps) {
4090 Value *GroupMask = CreateGroupMask(MaskForGaps);
4091 NewStoreInstr = State.Builder.CreateMaskedStore(
4092 IVec, ResAddr, Group->
getAlign(), GroupMask);
4095 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->
getAlign());
4102#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4106 O << Indent <<
"INTERLEAVE-GROUP with factor " << IG->getFactor() <<
" at ";
4107 IG->getInsertPos()->printAsOperand(O,
false);
4117 for (
unsigned i = 0; i < IG->getFactor(); ++i) {
4118 if (!IG->getMember(i))
4121 O <<
"\n" << Indent <<
" store ";
4123 O <<
" to index " << i;
4125 O <<
"\n" << Indent <<
" ";
4127 O <<
" = load from index " << i;
4135 assert(!State.Lane &&
"Interleave group being replicated.");
4136 assert(State.VF.isScalable() &&
4137 "Only support scalable VF for EVL tail-folding.");
4139 "Masking gaps for scalable vectors is not yet supported.");
4145 unsigned InterleaveFactor = Group->
getFactor();
4146 assert(InterleaveFactor <= 8 &&
4147 "Unsupported deinterleave/interleave factor for scalable vectors");
4154 Value *InterleaveEVL = State.Builder.CreateMul(
4155 EVL, ConstantInt::get(EVL->
getType(), InterleaveFactor),
"interleave.evl",
4159 Value *GroupMask =
nullptr;
4165 State.Builder.CreateVectorSplat(WideVF, State.Builder.getTrue());
4170 CallInst *NewLoad = State.Builder.CreateIntrinsic(
4171 VecTy, Intrinsic::vp_load, {ResAddr, GroupMask, InterleaveEVL},
nullptr,
4182 NewLoad = State.Builder.CreateIntrinsic(
4185 nullptr,
"strided.vec");
4187 const DataLayout &DL = Instr->getDataLayout();
4188 for (
unsigned I = 0, J = 0;
I < InterleaveFactor; ++
I) {
4194 Value *StridedVec = State.Builder.CreateExtractValue(NewLoad,
I);
4196 if (Member->getType() != ScalarTy) {
4214 const DataLayout &DL = Instr->getDataLayout();
4215 for (
unsigned I = 0, StoredIdx = 0;
I < InterleaveFactor;
I++) {
4223 Value *StoredVec = State.get(StoredValues[StoredIdx]);
4225 if (StoredVec->
getType() != SubVT)
4235 State.Builder.CreateIntrinsic(
Type::getVoidTy(Ctx), Intrinsic::vp_store,
4236 {IVec, ResAddr, GroupMask, InterleaveEVL});
4245#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4249 O << Indent <<
"INTERLEAVE-GROUP with factor " << IG->getFactor() <<
" at ";
4250 IG->getInsertPos()->printAsOperand(O,
false);
4261 for (
unsigned i = 0; i < IG->getFactor(); ++i) {
4262 if (!IG->getMember(i))
4265 O <<
"\n" << Indent <<
" vp.store ";
4267 O <<
" to index " << i;
4269 O <<
"\n" << Indent <<
" ";
4271 O <<
" = vp.load from index " << i;
4282 unsigned InsertPosIdx = 0;
4283 for (
unsigned Idx = 0; IG->getFactor(); ++Idx)
4284 if (
auto *Member = IG->getMember(Idx)) {
4285 if (Member == InsertPos)
4289 Type *ValTy = Ctx.Types.inferScalarType(
4294 ->getAddressSpace();
4296 unsigned InterleaveFactor = IG->getFactor();
4301 for (
unsigned IF = 0; IF < InterleaveFactor; IF++)
4302 if (IG->getMember(IF))
4307 InsertPos->
getOpcode(), WideVecTy, IG->getFactor(), Indices,
4308 IG->getAlign(), AS, Ctx.CostKind,
getMask(), NeedsMaskForGaps);
4310 if (!IG->isReverse())
4313 return Cost + IG->getNumMembers() *
4315 VectorTy, VectorTy, {}, Ctx.CostKind,
4319#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4322 O << Indent <<
"EMIT ";
4324 O <<
" = CANONICAL-INDUCTION ";
4334#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4338 "unexpected number of operands");
4339 O << Indent <<
"EMIT ";
4341 O <<
" = WIDEN-POINTER-INDUCTION ";
4357 O << Indent <<
"EMIT ";
4359 O <<
" = EXPAND SCEV " << *Expr;
4366 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
4370 : Builder.CreateVectorSplat(VF, CanonicalIV,
"broadcast");
4373 VStep = Builder.CreateVectorSplat(VF, VStep);
4375 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->
getType()));
4377 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep,
"vec.iv");
4378 State.set(
this, CanonicalVectorIV);
4381#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4384 O << Indent <<
"EMIT ";
4386 O <<
" = WIDEN-CANONICAL-INDUCTION ";
4392 auto &Builder = State.Builder;
4396 Type *VecTy = State.VF.isScalar()
4397 ? VectorInit->getType()
4401 State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
4402 if (State.VF.isVector()) {
4403 auto *IdxTy = Builder.getInt32Ty();
4404 auto *One = ConstantInt::get(IdxTy, 1);
4407 auto *RuntimeVF =
getRuntimeVF(Builder, IdxTy, State.VF);
4408 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
4409 VectorInit = Builder.CreateInsertElement(
4415 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
4416 Phi->addIncoming(VectorInit, VectorPH);
4417 State.set(
this, Phi);
4424 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
4429#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4432 O << Indent <<
"FIRST-ORDER-RECURRENCE-PHI ";
4449 State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
4450 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
4451 Value *StartV = State.get(StartVPV, ScalarPHI);
4455 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
4456 "recipe must be in the vector loop header");
4459 State.set(
this, Phi, IsInLoop);
4461 Phi->addIncoming(StartV, VectorPH);
4464#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4467 O << Indent <<
"WIDEN-REDUCTION-PHI ";
4472 if (VFScaleFactor != 1)
4473 O <<
" (VF scaled by 1/" << VFScaleFactor <<
")";
4480 Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
4481 State.set(
this, VecPhi);
4484#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4487 O << Indent <<
"WIDEN-PHI ";
4499 State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
4502 State.Builder.CreatePHI(StartMask->
getType(), 2,
"active.lane.mask");
4503 Phi->addIncoming(StartMask, VectorPH);
4504 State.set(
this, Phi);
4507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4510 O << Indent <<
"ACTIVE-LANE-MASK-PHI ";
4518#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4521 O << Indent <<
"EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static const SCEV * getAddressAccessSCEV(Value *Ptr, LoopVectorizationLegality *Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop)
Gets Address Access SCEV after verifying that the access pattern is loop invariant except the inducti...
static bool isOrdered(const Instruction *I)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file contains the declarations of different VPlan-related auxiliary helpers.
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static InstructionCost getCostForIntrinsics(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost for the intrinsic ID with Operands, produced by R.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, unsigned CurrentPart, IRBuilderBase &Builder)
SmallVector< Value *, 2 > VectorParts
static bool isUsedByLoadStoreAddress(const VPUser *V)
Returns true if V is used as part of the address of another load or store.
static void scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State)
A helper function to scalarize a single Instruction in the innermost loop.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static BranchInst * createCondBranch(Value *Cond, VPBasicBlock *VPBB, VPTransformState &State)
Create a conditional branch using Cond branching to the successors of VPBB.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_UGT
unsigned greater than
@ ICMP_ULT
unsigned less than
static LLVM_ABI StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
LLVM_ABI void print(raw_ostream &O) const
Print fast-math flags to O.
void setAllowContract(bool B=true)
bool noSignedZeros() const
void setAllowReciprocal(bool B=true)
bool allowReciprocal() const
void setNoSignedZeros(bool B=true)
bool allowReassoc() const
Flag queries.
void setNoNaNs(bool B=true)
void setAllowReassoc(bool B=true)
Flag setters.
void setApproxFunc(bool B=true)
void setNoInfs(bool B=true)
bool allowContract() const
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
bool willReturn() const
Determine if the function will return.
bool doesNotThrow() const
Determine if the function cannot unwind.
Type * getReturnType() const
Returns the type of the ret val.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
ConstantInt * getFalse()
Get the constant value for i1 false.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
InstTy * getInsertPos() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static bool isSignedRecurrenceKind(RecurKind Kind)
Returns true if recurrece kind is a signed redux kind.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
value_op_iterator value_op_end()
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
value_op_iterator value_op_begin()
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
void insert(VPRecipeBase *Recipe, iterator InsertPt)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBlocksTy & getPredecessors() const
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
const VPBlocksTy & getSuccessors() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
VPlan-based builder utility analogous to IRBuilder.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
LLVM_ABI_FOR_TEST void dump() const
Dump the VPDef to stderr (for debugging).
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
unsigned getVPDefID() const
VPValue * getStepValue() const
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue() const
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Class to record and manage LLVM IR flags.
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
CmpInst::Predicate CmpPredicate
void printFlags(raw_ostream &O) const
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
CmpInst::Predicate getPredicate() const
bool hasNoSignedWrap() const
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
GEPNoWrapFlags getGEPNoWrapFlags() const
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
DisjointFlagsTy DisjointFlags
bool hasNoUnsignedWrap() const
NonNegFlagsTy NonNegFlags
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Instruction & getInstruction() const
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
bool doesGeneratePerAllLanes() const
Returns true if this VPInstruction generates scalar values for all lanes.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
@ ExtractPenultimateElement
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ VScale
Returns the value for vscale.
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
unsigned getOpcode() const
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this recipe.
Instruction * getInsertPos() const
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
VPValue * getAddr() const
Return the address accessed by this recipe.
VPValue * getEVL() const
The VPValue of the explicit vector length.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
static VPLane getLastLaneForVF(const ElementCount &VF)
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
static VPLane getFirstLane()
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe, delegating to printRecipe().
bool isPhi() const
Returns true for PHI-like recipes.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
bool isScalarCast() const
Return true if the recipe is a scalar cast.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getEVL() const
The VPValue of the explicit vector length.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
VPValue * getCondOp() const
The VPValue of the condition for the block.
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
bool isSingleScalar() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPValue * getStepValue() const
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
This class can be used to assign names to VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
operand_iterator op_begin()
VPValue * getOperand(unsigned N) const
virtual bool usesFirstLaneOnly(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
friend class VPExpressionRecipe
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
void replaceAllUsesWith(VPValue *New)
user_iterator user_begin()
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Function * getCalledScalarFunction() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
Type * getSourceElementType() const
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue()
Returns the step value of the induction.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Type * getScalarType() const
Returns the scalar type of the induction.
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
VPValue * getMask() const
Return the mask used by this recipe.
Align Alignment
Alignment information for this memory access.
VPValue * getAddr() const
Return the address accessed by this recipe.
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
self_iterator getIterator()
typename base_list_type::iterator iterator
iterator erase(iterator where)
pointer remove(iterator &IT)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor)
Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Value * createFindLastIVReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::FindLastIV.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
auto cast_or_null(const Y &Val)
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool isa_and_nonnull(const Y &Val)
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
auto reverse(ContainerTy &&C)
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
@ Increment
Incrementally increasing token ID.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, Value *InitVal, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of kind RecurKind::AnyOf.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void execute(VPTransformState &State) override
Generate the instruction.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
void execute(VPTransformState &State) override
Generate the wide load or gather.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPValue * getCond() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
void execute(VPTransformState &State) override
Generate a wide store or scatter.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStoredValue() const
Return the value stored by this recipe.