47#define LV_NAME "loop-vectorize"
48#define DEBUG_TYPE LV_NAME
50#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
56 cl::desc(
"Controls the printing of recipe metadata when debugging."));
63 case VPInstructionSC: {
66 if (VPI->getOpcode() == Instruction::Load)
68 return VPI->opcodeMayReadOrWriteFromMemory();
70 case VPInterleaveEVLSC:
73 case VPWidenStoreEVLSC:
81 ->getCalledScalarFunction()
83 case VPWidenMemIntrinsicSC:
84 case VPWidenIntrinsicSC:
86 case VPActiveLaneMaskPHISC:
87 case VPCurrentIterationPHISC:
88 case VPBranchOnMaskSC:
90 case VPFirstOrderRecurrencePHISC:
91 case VPReductionPHISC:
92 case VPScalarIVStepsSC:
96 case VPReductionEVLSC:
98 case VPVectorPointerSC:
99 case VPWidenCanonicalIVSC:
102 case VPWidenIntOrFpInductionSC:
103 case VPWidenLoadEVLSC:
106 case VPWidenPointerInductionSC:
111 assert((!
I || !
I->mayWriteToMemory()) &&
112 "underlying instruction may write to memory");
124 case VPInstructionSC:
126 case VPWidenLoadEVLSC:
131 ->mayReadFromMemory();
134 ->getCalledScalarFunction()
135 ->onlyWritesMemory();
136 case VPWidenMemIntrinsicSC:
137 case VPWidenIntrinsicSC:
139 case VPBranchOnMaskSC:
141 case VPCurrentIterationPHISC:
142 case VPFirstOrderRecurrencePHISC:
143 case VPReductionPHISC:
144 case VPPredInstPHISC:
145 case VPScalarIVStepsSC:
146 case VPWidenStoreEVLSC:
150 case VPReductionEVLSC:
152 case VPVectorPointerSC:
153 case VPWidenCanonicalIVSC:
156 case VPWidenIntOrFpInductionSC:
158 case VPWidenPointerInductionSC:
163 assert((!
I || !
I->mayReadFromMemory()) &&
164 "underlying instruction may read from memory");
177 case VPActiveLaneMaskPHISC:
179 case VPCurrentIterationPHISC:
180 case VPFirstOrderRecurrencePHISC:
181 case VPReductionPHISC:
182 case VPPredInstPHISC:
183 case VPVectorEndPointerSC:
185 case VPInstructionSC: {
192 case VPWidenCallSC: {
196 case VPWidenMemIntrinsicSC:
197 case VPWidenIntrinsicSC:
200 case VPReductionEVLSC:
202 case VPScalarIVStepsSC:
203 case VPVectorPointerSC:
204 case VPWidenCanonicalIVSC:
207 case VPWidenIntOrFpInductionSC:
209 case VPWidenPointerInductionSC:
214 assert((!
I || !
I->mayHaveSideEffects()) &&
215 "underlying instruction has side-effects");
218 case VPInterleaveEVLSC:
221 case VPWidenLoadEVLSC:
223 case VPWidenStoreEVLSC:
228 "mayHaveSideffects result for ingredient differs from this "
231 case VPReplicateSC: {
233 return R->getUnderlyingInstr()->mayHaveSideEffects();
244 case VPInstructionSC: {
252 case Instruction::Add:
253 case Instruction::Sub:
254 case Instruction::Mul:
255 case Instruction::GetElementPtr:
263 assert(!Parent &&
"Recipe already in some VPBasicBlock");
265 "Insertion position not in any VPBasicBlock");
271 assert(!Parent &&
"Recipe already in some VPBasicBlock");
277 assert(!Parent &&
"Recipe already in some VPBasicBlock");
279 "Insertion position not in any VPBasicBlock");
314 UI = IG->getInsertPos();
316 UI = &WidenMem->getIngredient();
319 if (UI && Ctx.skipCostComputation(UI, VF.
isVector())) {
333 dbgs() <<
"Cost of " << RecipeCost <<
" for VF " << VF <<
": ";
350 assert(OpType == Other.OpType &&
"OpType must match");
352 case OperationType::OverflowingBinOp:
353 WrapFlags.HasNUW &= Other.WrapFlags.HasNUW;
354 WrapFlags.HasNSW &= Other.WrapFlags.HasNSW;
356 case OperationType::Trunc:
360 case OperationType::DisjointOp:
363 case OperationType::PossiblyExactOp:
364 ExactFlags.IsExact &= Other.ExactFlags.IsExact;
366 case OperationType::GEPOp:
369 case OperationType::FPMathOp:
370 case OperationType::FCmp:
371 assert((OpType != OperationType::FCmp ||
372 FCmpFlags.CmpPredStorage == Other.FCmpFlags.CmpPredStorage) &&
373 "Cannot drop CmpPredicate");
374 getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;
375 getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;
377 case OperationType::NonNegOp:
380 case OperationType::Cmp:
382 "Cannot drop CmpPredicate");
384 case OperationType::ReductionOp:
386 "Cannot change RecurKind");
388 "Cannot change IsOrdered");
390 "Cannot change IsInLoop");
391 getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;
392 getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;
394 case OperationType::Other:
402 const FastMathFlagsTy &
F = getFMFsRef();
414#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
443 "expected function operand");
456 "zero-operand VPInstruction opcodes must pass explicit ResultTy");
458 [[maybe_unused]]
auto AssertOperandType = [&Operands](
unsigned Idx,
460 if (!ExpectedTy || Operands.
size() <= Idx)
462 [[maybe_unused]]
Type *OpTy = Operands[Idx]->getScalarType();
463 assert((!OpTy || OpTy == ExpectedTy) &&
464 "different types inferred for different operands");
467 Type *Op0Ty = Operands[0]->getScalarType();
479 AssertOperandType(1, Op0Ty);
484 for (
unsigned Idx = 1; Idx != Operands.
size(); ++Idx)
485 AssertOperandType(Idx, Op0Ty);
487 case Instruction::Switch:
488 for (
unsigned Idx = 1; Idx != Operands.
size(); ++Idx)
489 AssertOperandType(Idx, Op0Ty);
491 case Instruction::Store:
493 case Instruction::ICmp:
495 AssertOperandType(1, Op0Ty);
497 case Instruction::FCmp:
499 AssertOperandType(1, Op0Ty);
503 AssertOperandType(1, Op0Ty);
511 AssertOperandType(1, Op0Ty);
515 for (
unsigned Idx = 1; Idx != Operands.
size(); ++Idx)
516 AssertOperandType(Idx, Op0Ty);
521 case Instruction::Select: {
523 "select condition must be bool");
524 Type *Op1Ty = Operands[1]->getScalarType();
525 AssertOperandType(2, Op1Ty);
528 case Instruction::InsertElement:
531 AssertOperandType(1, Op0Ty);
532 assert(Operands[2]->getScalarType()->isIntegerTy() &&
533 "expected integer operand");
538 AssertOperandType(1, Op0Ty);
541 assert(Operands.
size() >= 2 &&
"ExtractLane requires a lane operand and "
542 "at least one source vector operand");
545 Type *Op1Ty = Operands[1]->getScalarType();
546 for (
unsigned Idx = 2; Idx != Operands.
size(); ++Idx)
547 AssertOperandType(Idx, Op1Ty);
553 "expected pointer operand");
554 assert(Operands[1]->getScalarType()->isIntegerTy() &&
555 "expected integer operand");
557 case Instruction::ExtractValue: {
558 assert(Operands.
size() == 2 &&
"expected single level extractvalue");
560 return StructTy->getTypeAtIndex(
567 case Instruction::Load:
568 case Instruction::Alloca:
570 case Instruction::Call:
578 bool AllOperandsSameType =
584 if (AllOperandsSameType)
585 for (
unsigned Idx = 1; Idx != Operands.
size(); ++Idx)
586 AssertOperandType(Idx, Op0Ty);
593 unsigned Opcode =
I->getOpcode();
596 Instruction::Load, Instruction::Alloca}),
612 "Set flags not supported for the provided opcode");
614 "Opcode requires specific flags to be set");
618 "number of operands does not match opcode");
633 case Instruction::Alloca:
634 case Instruction::ExtractValue:
635 case Instruction::Freeze:
636 case Instruction::Load:
650 case Instruction::ICmp:
651 case Instruction::FCmp:
652 case Instruction::ExtractElement:
653 case Instruction::Store:
665 case Instruction::InsertElement:
666 case Instruction::Select:
670 case Instruction::Call:
673 case Instruction::GetElementPtr:
674 case Instruction::PHI:
675 case Instruction::Switch:
695bool VPInstruction::canGenerateScalarForFirstLane()
const {
701 case Instruction::Freeze:
702 case Instruction::ICmp:
703 case Instruction::PHI:
704 case Instruction::Select:
722 return Instruction::Add;
724 return Instruction::FAdd;
729 IRBuilderBase &Builder = State.
Builder;
748 case Instruction::ExtractElement: {
751 return State.
get(
getOperand(0), VPLane(Idx->getZExtValue()));
756 case Instruction::InsertElement: {
763 case Instruction::Freeze: {
767 case Instruction::FCmp:
768 case Instruction::ICmp: {
774 case Instruction::PHI: {
777 case Instruction::Select: {
804 {VIVElem0, ScalarTC},
nullptr, Name);
809 assert(VecTy->getScalarSizeInBits() == 1 &&
810 "NumActiveLanes only implemented for i1 vectors");
833 if (!
V1->getType()->isVectorTy())
853 "Requested vector length should be an integer.");
859 Builder.
getInt32Ty(), Intrinsic::experimental_get_vector_length,
860 {AVL, VFArg, Builder.getTrue()});
869 VPBasicBlock *SecondVPSucc =
890 for (
unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
914 IRBuilderBase::FastMathFlagGuard FMFG(Builder);
929 "FindIV should use min/max reduction kinds");
934 for (
unsigned Part = 0; Part < NumOperandsToReduce; ++Part)
937 IRBuilderBase::FastMathFlagGuard FMFG(Builder);
941 Value *ReducedPartRdx = RdxParts[0];
943 ReducedPartRdx = RdxParts[NumOperandsToReduce - 1];
946 for (
unsigned Part = 1; Part < NumOperandsToReduce; ++Part) {
947 Value *RdxPart = RdxParts[Part];
949 ReducedPartRdx =
createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
958 Builder.
CreateBinOp(Opcode, RdxPart, ReducedPartRdx,
"bin.rdx");
972 return ReducedPartRdx;
981 "invalid offset to extract from");
986 assert(
Offset <= 1 &&
"invalid offset to extract from");
1005 "can only generate first lane for PtrAdd");
1024 "simplified to ExtractElement.");
1027 Value *Res =
nullptr;
1031 Value *VectorStart =
1032 Builder.
CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
1033 Value *VectorIdx = Idx == 1
1035 : Builder.
CreateSub(LaneToExtract, VectorStart);
1061 Value *Res =
nullptr;
1062 for (
int Idx = LastOpIdx; Idx >= 0; --Idx) {
1063 Value *TrailingZeros =
1073 Builder.
CreateMul(RuntimeVF, ConstantInt::get(Ty, Idx)),
1100 Intrinsic::experimental_vector_extract_last_active, {VTy},
1116 case Instruction::FNeg:
1117 return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);
1118 case Instruction::UDiv:
1119 case Instruction::SDiv:
1120 case Instruction::SRem:
1121 case Instruction::URem:
1122 case Instruction::Add:
1123 case Instruction::FAdd:
1124 case Instruction::Sub:
1125 case Instruction::FSub:
1126 case Instruction::Mul:
1127 case Instruction::FMul:
1128 case Instruction::FDiv:
1129 case Instruction::FRem:
1130 case Instruction::Shl:
1131 case Instruction::LShr:
1132 case Instruction::AShr:
1133 case Instruction::And:
1134 case Instruction::Or:
1135 case Instruction::Xor: {
1149 return Ctx.TTI.getArithmeticInstrCost(
1150 Opcode, ResultTy, Ctx.CostKind,
1151 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1152 RHSInfo, Operands, CtxI, &Ctx.TLI);
1154 case Instruction::Freeze:
1161 case Instruction::ExtractValue:
1162 return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
1164 case Instruction::ICmp:
1165 case Instruction::FCmp: {
1169 return Ctx.TTI.getCmpSelInstrCost(
1171 Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
1172 {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1174 case Instruction::BitCast: {
1180 case Instruction::SExt:
1181 case Instruction::ZExt:
1182 case Instruction::FPToUI:
1183 case Instruction::FPToSI:
1184 case Instruction::FPExt:
1185 case Instruction::PtrToInt:
1186 case Instruction::PtrToAddr:
1187 case Instruction::IntToPtr:
1188 case Instruction::SIToFP:
1189 case Instruction::UIToFP:
1190 case Instruction::Trunc:
1191 case Instruction::FPTrunc:
1192 case Instruction::AddrSpaceCast: {
1207 if (WidenMemoryRecipe ==
nullptr)
1211 if (!WidenMemoryRecipe->isConsecutive())
1213 if (WidenMemoryRecipe->isMasked())
1220 bool IsReverse =
false;
1222 if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
1230 Recipe->getVPSingleValue()->getSingleUser());
1233 CCH = ComputeCCH(Recipe);
1237 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1238 Opcode == Instruction::FPExt) {
1249 CCH = ComputeCCH(Recipe);
1258 return Ctx.TTI.getCastInstrCost(
1259 Opcode, ResultTy, SrcTy, CCH, Ctx.CostKind,
1262 case Instruction::Select: {
1281 (IsLogicalAnd || IsLogicalOr)) {
1284 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1285 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1289 [](
VPValue *
Op) {
return Op->getUnderlyingValue(); }))
1291 return Ctx.TTI.getArithmeticInstrCost(
1292 IsLogicalOr ? Instruction::Or : Instruction::And, ResultTy,
1293 Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands,
SI);
1297 if (!IsScalarCond && VF.
isVector())
1304 Pred = Cmp->getPredicate();
1306 return Ctx.TTI.getCmpSelInstrCost(
1307 Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
1308 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
SI);
1324 "Should only generate a vector value or single scalar, not scalars "
1332 case Instruction::Select: {
1341 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred,
1344 case Instruction::ExtractElement:
1354 return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
1359 return Ctx.TTI.getArithmeticReductionCost(
1366 return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
1373 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1379 return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
1388 Cost += Ctx.TTI.getArithmeticInstrCost(
1389 Instruction::Xor, PredTy, Ctx.CostKind,
1390 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1391 {TargetTransformInfo::OK_UniformConstantValue,
1392 TargetTransformInfo::OP_None});
1394 Cost += Ctx.TTI.getArithmeticInstrCost(Instruction::Sub, Ty, Ctx.CostKind);
1402 Intrinsic::experimental_vector_extract_last_active, ScalarTy,
1403 {VecTy, MaskTy, ScalarTy});
1404 return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind);
1409 return Ctx.TTI.getShuffleCost(
1419 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1426 I32Ty, {Arg0Ty, I32Ty, I1Ty});
1427 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1430 assert(VF.
isVector() &&
"Reverse operation must be vector type");
1439 VectorTy, {}, Ctx.CostKind,
1445 return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,
1446 VecTy, Ctx.CostKind, 0);
1456 return Ctx.TTI.getArithmeticInstrCost(Instruction::Xor, ValTy,
1474 return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ValTy,
1478 case Instruction::FCmp:
1479 case Instruction::ICmp:
1491 "unexpected VPInstruction witht underlying value");
1499 getOpcode() == Instruction::ExtractElement ||
1511 case Instruction::Load:
1512 case Instruction::PHI:
1524 Type *Ty =
Op->getScalarType();
1530 "types of operand 0 and new operand must match");
1536 "appended operand must match operand 0's scalar type");
1540 "appended operand must match operand 1's scalar type");
1545 constexpr unsigned NumInitialOperands = 3;
1547 "ExtractLastActive must have at least the initial 3 operands");
1548 bool IsMaskSlot = ((
getNumOperands() - NumInitialOperands) & 1u) == 1u;
1549 assert((IsMaskSlot ? Ty->isIntegerTy(1)
1551 "ExtractLastActive expects alternating data/mask operands "
1552 "matching operand 1's type and i1, respectively");
1557 "outside of construction");
1567 "Set flags not supported for the provided opcode");
1569 "Opcode requires specific flags to be set");
1571 Value *GeneratedValue = generate(State);
1574 assert(GeneratedValue &&
"generate must produce a value");
1575 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
1580 !GeneratesPerFirstLaneOnly) ||
1581 State.VF.isScalar()) &&
1582 "scalar value but not only first lane defined");
1583 State.set(
this, GeneratedValue,
1584 GeneratesPerFirstLaneOnly);
1600 case Instruction::ExtractValue:
1601 case Instruction::InsertValue:
1602 case Instruction::GetElementPtr:
1603 case Instruction::ExtractElement:
1604 case Instruction::InsertElement:
1605 case Instruction::Freeze:
1606 case Instruction::FCmp:
1607 case Instruction::ICmp:
1608 case Instruction::Select:
1609 case Instruction::PHI:
1644 case Instruction::Call:
1660 case Instruction::ExtractElement:
1662 case Instruction::InsertElement:
1664 case Instruction::PHI:
1666 case Instruction::FCmp:
1667 case Instruction::ICmp:
1668 case Instruction::Select:
1669 case Instruction::Or:
1670 case Instruction::Freeze:
1674 case Instruction::Load:
1712 case Instruction::FCmp:
1713 case Instruction::ICmp:
1714 case Instruction::Select:
1725#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1733 O << Indent <<
"EMIT" << (
isSingleScalar() ?
"-SCALAR" :
"") <<
" ";
1745 O <<
"active lane mask";
1748 O <<
"incoming-alias-mask";
1751 O <<
"EXPLICIT-VECTOR-LENGTH";
1754 O <<
"first-order splice";
1757 O <<
"branch-on-cond";
1760 O <<
"branch-on-two-conds";
1763 O <<
"TC > VF ? TC - VF : 0";
1769 O <<
"branch-on-count";
1775 O <<
"buildstructvector";
1781 O <<
"exiting-iv-value";
1787 O <<
"extract-lane";
1790 O <<
"extract-last-lane";
1793 O <<
"extract-last-part";
1796 O <<
"extract-penultimate-element";
1799 O <<
"compute-reduction-result";
1817 O <<
"first-active-lane";
1820 O <<
"last-active-lane";
1823 O <<
"reduction-start-vector";
1826 O <<
"resume-for-epilogue";
1835 O <<
"extract-last-active";
1838 O <<
"num-active-lanes";
1859 State.set(
this, Cast,
VPLane(0));
1870 Value *
VScale = State.Builder.CreateVScale(ResultTy);
1871 State.set(
this,
VScale,
true);
1894 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1912#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1915 O << Indent <<
"EMIT" << (
isSingleScalar() ?
"-SCALAR" :
"") <<
" ";
1922 O <<
"wide-iv-step ";
1926 O <<
"step-vector " << *ResultTy;
1929 O <<
"vscale " << *ResultTy;
1931 case Instruction::Load:
1940 O <<
" to " << *ResultTy;
1951 const Twine &Name) {
1954 : Phi.getNumIncoming();
1955 Value *FirstInc = State.get(Phi.getIncomingValue(0), IsScalar);
1956 PHINode *NewPhi = State.Builder.CreatePHI(FirstInc->
getType(), 2, Name);
1958 State.CFG.VPBB2IRBB.at(Phi.getIncomingBlock(0)));
1959 for (
unsigned Idx = 1; Idx != NumIncoming; ++Idx)
1960 NewPhi->
addIncoming(State.get(Phi.getIncomingValue(Idx), IsScalar),
1961 State.CFG.VPBB2IRBB.at(Phi.getIncomingBlock(Idx)));
1962 State.set(R, NewPhi, IsScalar);
1969#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1972 O << Indent <<
"EMIT" << (
isSingleScalar() ?
"-SCALAR" :
"") <<
" ";
1988 "PHINodes must be handled by VPIRPhi");
1991 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
2001#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2004 O << Indent <<
"IR " << I;
2016 auto *PredVPBB = Pred->getExitingBasicBlock();
2017 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
2024 if (Phi->getBasicBlockIndex(PredBB) == -1)
2025 Phi->addIncoming(V, PredBB);
2027 Phi->setIncomingValueForBlock(PredBB, V);
2032 State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
2037 assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
2038 "Number of phi operands must match number of predecessors");
2039 unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
2040 R->removeOperand(Position);
2052 R->setOperand(R->getParent()->getIndexForPredecessor(VPBB), V);
2055#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2069#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2075 O <<
" (extra operand" << (
getNumOperands() > 1 ?
"s" :
"") <<
": ";
2080 std::get<1>(
Op)->printAsOperand(O);
2088 for (
const auto &[Kind,
Node] : Metadata)
2089 I.setMetadata(Kind,
Node);
2094 for (
const auto &[KindA, MDA] : Metadata) {
2095 for (
const auto &[KindB, MDB] :
Other.Metadata) {
2096 if (KindA == KindB && MDA == MDB) {
2102 Metadata = std::move(MetadataIntersection);
2105#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2114 auto [Kind,
Node] = KindNodePair;
2116 "Unexpected unnamed metadata kind");
2117 O <<
"!" << MDNames[Kind] <<
" ";
2125 assert(State.VF.isVector() &&
"not widening");
2126 assert(Variant !=
nullptr &&
"Can't create vector function.");
2137 Arg = State.get(
I.value(),
VPLane(0));
2140 Args.push_back(Arg);
2146 CI->getOperandBundlesAsDefs(OpBundles);
2148 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
2151 V->setCallingConv(Variant->getCallingConv());
2153 if (!V->getType()->isVoidTy())
2160 "Variant return type must match VF");
2166 return Ctx.TTI.getCallInstrCost(
nullptr, Variant->getReturnType(),
2167 Variant->getFunctionType()->params(),
2173 assert(Variant &&
"Variant not set");
2176 auto [Idx, V] = Arg;
2183#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2186 O << Indent <<
"WIDEN-CALL ";
2198 O <<
" @" << CalledFn->
getName() <<
"(";
2204 O <<
" (using library function";
2205 if (Variant->hasName())
2206 O <<
": " << Variant->getName();
2212 assert(State.VF.isVector() &&
"not widening");
2220 for (
auto [Idx, Ty] :
enumerate(ContainedTys)) {
2233 Arg = State.get(
I.value(),
VPLane(0));
2239 Args.push_back(Arg);
2243 Module *M = State.Builder.GetInsertBlock()->getModule();
2247 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
2252 CI->getOperandBundlesAsDefs(OpBundles);
2254 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
2264 if (!V->getType()->isVoidTy())
2271 Type *ScalarRetTy = R.getScalarType();
2275 if (
ID == Intrinsic::experimental_vp_reverse && ScalarRetTy->
isIntegerTy(1))
2284 for (
const auto &[Idx,
Op] :
enumerate(Operands)) {
2285 auto *V =
Op->getUnderlyingValue();
2288 Arguments.push_back(UI->getArgOperand(Idx));
2305 ID, RetTy,
Arguments, ParamTys, R.getFastMathFlagsOrNone(),
2308 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
2330#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2333 O << Indent <<
"WIDEN-INTRINSIC ";
2356 State.set(
this, MemI);
2362 return Ctx.TTI.getMemIntrinsicInstrCost(
2386 Value *Mask =
nullptr;
2388 Mask = State.get(VPMask);
2391 Builder.CreateVectorSplat(VTy->
getElementCount(), Builder.getInt1(1));
2395 if (Opcode == Instruction::Sub)
2396 IncAmt = Builder.CreateNeg(IncAmt);
2398 assert(Opcode == Instruction::Add &&
"only add or sub supported for now");
2400 Instruction *HistogramInst = State.Builder.CreateIntrinsicWithoutFolding(
2401 Intrinsic::experimental_vector_histogram_add, {VTy, IncAmt->
getType()},
2422 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
2431 {PtrTy, IncTy, MaskTy});
2434 return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
2435 Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
2438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2441 O << Indent <<
"WIDEN-HISTOGRAM buckets: ";
2444 if (Opcode == Instruction::Sub)
2447 assert(Opcode == Instruction::Add);
2459VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(
const FastMathFlags &FMF) {
2471 case Instruction::Add:
2472 case Instruction::Sub:
2473 case Instruction::Mul:
2474 case Instruction::Shl:
2477 case Instruction::Trunc:
2479 case Instruction::Or:
2481 case Instruction::AShr:
2482 case Instruction::LShr:
2483 case Instruction::UDiv:
2484 case Instruction::SDiv:
2485 return ExactFlagsTy(
false);
2486 case Instruction::GetElementPtr:
2490 case Instruction::ZExt:
2491 case Instruction::UIToFP:
2493 case Instruction::FAdd:
2494 case Instruction::FSub:
2495 case Instruction::FMul:
2496 case Instruction::FDiv:
2497 case Instruction::FRem:
2498 case Instruction::FNeg:
2499 case Instruction::FPExt:
2500 case Instruction::FPTrunc:
2502 case Instruction::ICmp:
2503 case Instruction::FCmp:
2514 case OperationType::OverflowingBinOp:
2515 return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
2516 Opcode == Instruction::Mul || Opcode == Instruction::Shl ||
2517 Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
2518 case OperationType::Trunc:
2519 return Opcode == Instruction::Trunc;
2520 case OperationType::DisjointOp:
2521 return Opcode == Instruction::Or;
2522 case OperationType::PossiblyExactOp:
2523 return Opcode == Instruction::AShr || Opcode == Instruction::LShr ||
2524 Opcode == Instruction::UDiv || Opcode == Instruction::SDiv;
2525 case OperationType::GEPOp:
2526 return Opcode == Instruction::GetElementPtr ||
2529 case OperationType::FPMathOp:
2530 return Opcode == Instruction::Call || Opcode == Instruction::FAdd ||
2531 Opcode == Instruction::FMul || Opcode == Instruction::FSub ||
2532 Opcode == Instruction::FNeg || Opcode == Instruction::FDiv ||
2533 Opcode == Instruction::FRem || Opcode == Instruction::FPExt ||
2534 Opcode == Instruction::FPTrunc || Opcode == Instruction::PHI ||
2535 Opcode == Instruction::Select || Opcode == Instruction::SIToFP ||
2536 Opcode == Instruction::UIToFP ||
2539 case OperationType::FCmp:
2540 return Opcode == Instruction::FCmp;
2541 case OperationType::NonNegOp:
2542 return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;
2543 case OperationType::Cmp:
2544 return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
2545 case OperationType::ReductionOp:
2547 case OperationType::Other:
2555 if (Opcode == Instruction::ICmp)
2556 return OpType == OperationType::Cmp;
2557 if (Opcode == Instruction::FCmp)
2558 return OpType == OperationType::FCmp;
2560 return OpType == OperationType::ReductionOp;
2563 return Required == OperationType::Other || Required == OpType;
2567#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2580 OS <<
"add-chain-with-subs";
2610 OS <<
"fadd-chain-with-subs";
2637 OS <<
"fminimumnum";
2640 OS <<
"fmaximumnum";
2659 case OperationType::Cmp:
2662 case OperationType::FCmp:
2666 case OperationType::DisjointOp:
2670 case OperationType::PossiblyExactOp:
2674 case OperationType::OverflowingBinOp:
2680 case OperationType::Trunc:
2686 case OperationType::FPMathOp:
2689 case OperationType::GEPOp: {
2691 if (Flags.isInBounds())
2693 else if (Flags.hasNoUnsignedSignedWrap())
2695 if (Flags.hasNoUnsignedWrap())
2699 case OperationType::NonNegOp:
2703 case OperationType::ReductionOp: {
2714 case OperationType::Other:
2722 auto &Builder = State.Builder;
2724 case Instruction::Call:
2725 case Instruction::UncondBr:
2726 case Instruction::CondBr:
2727 case Instruction::PHI:
2728 case Instruction::GetElementPtr:
2730 case Instruction::UDiv:
2731 case Instruction::SDiv:
2732 case Instruction::SRem:
2733 case Instruction::URem:
2734 case Instruction::Add:
2735 case Instruction::FAdd:
2736 case Instruction::Sub:
2737 case Instruction::FSub:
2738 case Instruction::FNeg:
2739 case Instruction::Mul:
2740 case Instruction::FMul:
2741 case Instruction::FDiv:
2742 case Instruction::FRem:
2743 case Instruction::Shl:
2744 case Instruction::LShr:
2745 case Instruction::AShr:
2746 case Instruction::And:
2747 case Instruction::Or:
2748 case Instruction::Xor: {
2752 Ops.push_back(State.get(VPOp));
2754 Value *V = Builder.CreateNAryOp(Opcode,
Ops);
2765 case Instruction::ExtractValue: {
2768 Value *Extract = Builder.CreateExtractValue(
2770 State.set(
this, Extract);
2773 case Instruction::Freeze: {
2775 Value *Freeze = Builder.CreateFreeze(
Op);
2776 State.set(
this, Freeze);
2779 case Instruction::ICmp:
2780 case Instruction::FCmp: {
2782 bool FCmp = Opcode == Instruction::FCmp;
2798 case Instruction::Select: {
2803 Value *Sel = State.Builder.CreateSelect(
Cond, Op0, Op1);
2804 State.set(
this, Sel);
2823 State.get(
this)->getType() &&
2824 "inferred type and type from generated instructions do not match");
2831 case Instruction::UDiv:
2832 case Instruction::SDiv:
2833 case Instruction::SRem:
2834 case Instruction::URem:
2839 case Instruction::FNeg:
2840 case Instruction::Add:
2841 case Instruction::FAdd:
2842 case Instruction::Sub:
2843 case Instruction::FSub:
2844 case Instruction::Mul:
2845 case Instruction::FMul:
2846 case Instruction::FDiv:
2847 case Instruction::FRem:
2848 case Instruction::Shl:
2849 case Instruction::LShr:
2850 case Instruction::AShr:
2851 case Instruction::And:
2852 case Instruction::Or:
2853 case Instruction::Xor:
2854 case Instruction::Freeze:
2855 case Instruction::ExtractValue:
2856 case Instruction::ICmp:
2857 case Instruction::FCmp:
2858 case Instruction::Select:
2865#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2868 O << Indent <<
"WIDEN ";
2877 auto &Builder = State.Builder;
2879 assert(State.VF.isVector() &&
"Not vectorizing?");
2884 State.set(
this, Cast);
2896#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2899 O << Indent <<
"WIDEN-CAST ";
2910 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2913#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2918 O <<
" = WIDEN-INDUCTION";
2923 O <<
" (truncated to " << *TI->getType() <<
")";
2953 bool NeedsMul =
true, NeedsAdd =
true, NeedsShl =
false;
2958 NeedsAdd = !StartC->isZero();
2970 else if (StepC->isMinusOne()) {
2977 }
else if (StepC->getValue().isPowerOf2()) {
2990 if ((NeedsAdd || NeedsMul || NeedsShl) && StepTySize != IndexTySize) {
2992 StepTySize < IndexTySize ? Instruction::Trunc : Instruction::SExt;
2993 Cost += Ctx.TTI.getCastInstrCost(
2998 Cost += Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, StepTy,
3001 Cost += Ctx.TTI.getArithmeticInstrCost(
3002 Instruction::Shl, StepTy, Ctx.CostKind,
3003 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
3004 {TargetTransformInfo::OK_UniformConstantValue,
3005 TargetTransformInfo::OP_None});
3007 Cost += Ctx.TTI.getArithmeticInstrCost(Instruction::Add, StepTy,
3016#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3021 O <<
" = DERIVED-IV ";
3066 return Ctx.TTI.getArithmeticInstrCost(Instruction::Add, BaseIVTy,
3084 assert(BaseIVTy == Step->
getType() &&
"Types of BaseIV and Step must match!");
3091 AddOp = Instruction::Add;
3092 MulOp = Instruction::Mul;
3094 AddOp = InductionOpcode;
3095 MulOp = Instruction::FMul;
3102 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
3106 for (
unsigned Lane = 0; Lane < EndLane; ++Lane) {
3111 ? ConstantInt::get(BaseIVTy, Lane,
false,
3113 : ConstantFP::get(BaseIVTy, Lane);
3114 Value *StartIdx = Builder.CreateBinOp(AddOp, StartIdx0, LaneValue);
3116 "Expected StartIdx to be folded to a constant when VF is not "
3118 auto *
Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
3119 auto *
Add = Builder.CreateBinOp(AddOp, BaseIV,
Mul);
3124#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3129 O <<
" = SCALAR-STEPS ";
3140 assert(State.VF.isVector() &&
"not widening");
3150#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3153 O << Indent <<
"WIDEN-GEP ";
3155 O <<
" = getelementptr";
3171 VPValue *VF = Builder.createScalarZExtOrTrunc(VFVal, IndexTy, VFTy,
3179 Builder.createOverflowingOp(Instruction::Mul, {VFMinusOne, Stride});
3186 Builder.createOverflowingOp(Instruction::Mul, {PartxStride, VF}));
3191 auto &Builder = State.Builder;
3197 State.set(
this, ResultPtr,
true);
3200#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3205 O <<
" = vector-end-pointer";
3213 "Expected prior simplification of recipe without VFxPart");
3215 auto &Builder = State.Builder;
3220 Value *Stride = Builder.CreateZExtOrTrunc(State.get(
getStride(),
true),
3226 State.set(
this, ResultPtr,
true);
3229#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3234 O <<
" = vector-pointer";
3250 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
3254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3257 O << Indent <<
"BLEND ";
3282 "In-loop AnyOf reductions aren't currently supported");
3288 Value *NewCond = State.get(
Cond, State.VF.isScalar());
3294 if (State.VF.isVector())
3295 Start = State.Builder.CreateVectorSplat(VecTy->
getElementCount(), Start);
3297 Value *
Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
3304 if (State.VF.isVector())
3308 NewRed = State.Builder.CreateBinOp(
3310 PrevInChain, NewVecOp);
3311 PrevInChain = NewRed;
3312 NextInChain = NewRed;
3315 "Unexpected partial reduction kind");
3317 NewRed = State.Builder.CreateIntrinsic(
3320 : Intrinsic::vector_partial_reduce_fadd,
3321 {PrevInChain, NewVecOp}, State.Builder.getFastMathFlags(),
3323 PrevInChain = NewRed;
3324 NextInChain = NewRed;
3327 "The reduction must either be ordered, partial or in-loop");
3331 NextInChain =
createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
3333 NextInChain = State.Builder.CreateBinOp(
3335 PrevInChain, NewRed);
3342 auto &Builder = State.Builder;
3354 Mask = State.get(CondOp);
3356 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3366 NewRed = Builder.CreateBinOp(
3370 State.set(
this, NewRed,
true);
3380 std::optional<FastMathFlags> OptionalFMF =
3389 CondCost = Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy,
3390 CondTy, Pred, Ctx.CostKind);
3392 return CondCost + Ctx.TTI.getPartialReductionCost(
3393 Opcode, ElementTy, ElementTy, ElementTy, VF,
3402 "Any-of reduction not implemented in VPlan-based cost model currently.");
3408 return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy,
FMFs, Ctx.CostKind);
3413 return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
3417VPExpressionRecipe::VPExpressionRecipe(
3418 ExpressionTypes ExpressionType,
3424 ExpressionRecipes(ExpressionRecipes), ExpressionType(ExpressionType) {
3425 assert(!ExpressionRecipes.empty() &&
"Nothing to combine?");
3429 "expression cannot contain recipes with side-effects");
3433 for (
auto *R : ExpressionRecipes)
3434 ExpressionRecipesAsSetOfUsers.
insert(R);
3440 if (R != ExpressionRecipes.back() &&
3441 any_of(
R->users(), [&ExpressionRecipesAsSetOfUsers](
VPUser *U) {
3442 return !ExpressionRecipesAsSetOfUsers.contains(U);
3447 R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers](
3449 return !ExpressionRecipesAsSetOfUsers.contains(&U);
3454 R->removeFromParent();
3461 for (
auto *R : ExpressionRecipes) {
3462 for (
const auto &[Idx,
Op] :
enumerate(
R->operands())) {
3463 auto *
Def =
Op->getDefiningRecipe();
3464 if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
3473 for (
auto *R : ExpressionRecipes)
3474 for (
auto const &[LiveIn, Tmp] :
zip(operands(), LiveInPlaceholders))
3475 R->replaceUsesOfWith(LiveIn, Tmp);
3479 for (
auto *R : ExpressionRecipes)
3482 if (!R->getParent())
3483 R->insertBefore(
this);
3486 LiveInPlaceholders[Idx]->replaceAllUsesWith(
Op);
3489 ExpressionRecipes.clear();
3499 switch (ExpressionType) {
3500 case ExpressionTypes::NegatedExtendedReduction:
3501 assert((Opcode == Instruction::Add || Opcode == Instruction::FAdd) &&
3502 "Unexpected opcode");
3503 Opcode = Opcode == Instruction::Add ? Instruction::Sub : Instruction::FSub;
3505 case ExpressionTypes::ExtendedReduction: {
3509 if (RedR->isPartialReduction())
3510 return Ctx.TTI.getPartialReductionCost(
3515 ? std::optional{RedR->getFastMathFlagsOrNone()}
3519 return Ctx.TTI.getExtendedReductionCost(
3520 Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy, SrcVecTy,
3521 std::nullopt, Ctx.CostKind);
3525 case ExpressionTypes::MulAccReduction:
3526 return Ctx.TTI.getMulAccReductionCost(
false, Opcode, RedTy, SrcVecTy,
3529 case ExpressionTypes::ExtNegatedMulAccReduction:
3531 case Instruction::Add:
3532 Opcode = Instruction::Sub;
3534 case Instruction::FAdd:
3535 Opcode = Instruction::FSub;
3541 case ExpressionTypes::ExtMulAccReduction: {
3543 if (RedR->isPartialReduction()) {
3547 return Ctx.TTI.getPartialReductionCost(
3551 Ext0R->getOpcode()),
3553 Ext1R->getOpcode()),
3554 Mul->getOpcode(), Ctx.CostKind,
3556 ? std::optional{RedR->getFastMathFlagsOrNone()}
3559 assert(Opcode != Instruction::FSub &&
"Only integer types are supported");
3560 return Ctx.TTI.getMulAccReductionCost(
3563 Opcode, RedTy, SrcVecTy, Ctx.CostKind);
3571 return R->mayReadFromMemory() || R->mayWriteToMemory();
3579 "expression cannot contain recipes with side-effects");
3585 return RR && !RR->isPartialReduction();
3588#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3592 O << Indent <<
"EXPRESSION ";
3600 switch (ExpressionType) {
3601 case ExpressionTypes::NegatedExtendedReduction:
3602 case ExpressionTypes::ExtendedReduction: {
3603 bool Negated = ExpressionType == ExpressionTypes::NegatedExtendedReduction;
3605 O <<
" + " << (Red->isPartialReduction() ?
"partial." :
"") <<
"reduce.";
3608 O << (Opcode == Instruction::Add ?
"sub (0, " :
"fneg(");
3616 << *Ext0->getScalarType();
3617 if (Red->isConditional()) {
3624 case ExpressionTypes::ExtNegatedMulAccReduction: {
3626 O <<
" + " << (Red->isPartialReduction() ?
"partial." :
"") <<
"reduce.";
3636 << *Ext0->getScalarType() <<
"), (";
3640 << *Ext1->getScalarType() <<
")";
3641 if (Red->isConditional()) {
3648 case ExpressionTypes::MulAccReduction:
3649 case ExpressionTypes::ExtMulAccReduction: {
3651 O <<
" + " << (Red->isPartialReduction() ?
"partial." :
"") <<
"reduce.";
3656 bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction;
3658 : ExpressionRecipes[0]);
3666 << *Ext0->getScalarType() <<
"), (";
3674 << *Ext1->getScalarType() <<
")";
3676 if (Red->isConditional()) {
3689 O << Indent <<
"PARTIAL-REDUCE ";
3691 O << Indent <<
"REDUCE ";
3710 O << Indent <<
"REDUCE ";
3734 "VPReplicateRecipes must be unrolled before ::execute");
3739 Cloned->
setName(Instr->getName() +
".cloned");
3743 if (ResultTy != Cloned->
getType())
3759 State.Builder.Insert(Cloned);
3761 State.set(
this, Cloned,
true);
3765 State.AC->registerAssumption(
II);
3788 Ctx.SkipCostComputation.insert(UI);
3794 case Instruction::Alloca:
3797 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul,
3799 case Instruction::GetElementPtr:
3805 case Instruction::Call: {
3813 case Instruction::Add:
3814 case Instruction::Sub:
3815 case Instruction::FAdd:
3816 case Instruction::FSub:
3817 case Instruction::Mul:
3818 case Instruction::FMul:
3819 case Instruction::FDiv:
3820 case Instruction::FRem:
3821 case Instruction::Shl:
3822 case Instruction::LShr:
3823 case Instruction::AShr:
3824 case Instruction::And:
3825 case Instruction::Or:
3826 case Instruction::Xor:
3827 case Instruction::ICmp:
3828 case Instruction::FCmp:
3832 case Instruction::SDiv:
3833 case Instruction::UDiv:
3834 case Instruction::SRem:
3835 case Instruction::URem: {
3848 return Ctx.skipCostComputation(
3850 PredR->getOperand(0)->getUnderlyingValue()),
3865 Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
3869 ScalarCost /= Ctx.getPredBlockCostDivisor(UI->
getParent());
3872 case Instruction::Load:
3873 case Instruction::Store: {
3874 bool IsLoad = UI->
getOpcode() == Instruction::Load;
3885 bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
3886 bool UsedByLoadStoreAddress =
3889 UI->
getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo,
3890 UsedByLoadStoreAddress ? UI :
nullptr);
3895 Ctx.TTI.getAddressComputationCost(
3896 PtrTy, UsedByLoadStoreAddress ?
nullptr : Ctx.PSE.getSE(), PtrSCEV,
3907 if (!UsedByLoadStoreAddress) {
3908 bool EfficientVectorLoadStore =
3909 Ctx.TTI.supportsEfficientVectorElementLoadStore();
3910 if (!(IsLoad && !PreferVectorizedAddressing) &&
3911 !(!IsLoad && EfficientVectorLoadStore))
3914 if (!EfficientVectorLoadStore)
3922 Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, VIC,
true);
3928 Cost /= Ctx.getPredBlockCostDivisor(UI->getParent());
3929 Cost += Ctx.TTI.getCFInstrCost(Instruction::CondBr, Ctx.CostKind);
3933 Cost += Ctx.TTI.getScalarizationOverhead(
3935 false,
true, Ctx.CostKind);
3937 if (Ctx.useEmulatedMaskMemRefHack(
this, VF)) {
3945 case Instruction::SExt:
3946 case Instruction::ZExt:
3947 case Instruction::FPToUI:
3948 case Instruction::FPToSI:
3949 case Instruction::FPExt:
3950 case Instruction::PtrToInt:
3951 case Instruction::PtrToAddr:
3952 case Instruction::IntToPtr:
3953 case Instruction::SIToFP:
3954 case Instruction::UIToFP:
3955 case Instruction::Trunc:
3956 case Instruction::FPTrunc:
3957 case Instruction::Select:
3958 case Instruction::AddrSpaceCast: {
3963 case Instruction::ExtractValue:
3964 case Instruction::InsertValue:
3965 return Ctx.TTI.getInsertExtractValueCost(
getOpcode(), Ctx.CostKind);
3968 return Ctx.getLegacyCost(UI, VF);
3975 ArgOps, [&](
const VPValue *
Op) {
return Op->getScalarType(); });
3978 auto GetIntrinsicCost = [&] {
3981 return Ctx.TTI.getIntrinsicInstrCost(
3986 assert(GetIntrinsicCost() == 0 &&
"scalarizing intrinsic should be free");
3991 Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
3992 if (IsSingleScalar) {
3993 ScalarCallCost = std::min(ScalarCallCost, GetIntrinsicCost());
3994 return ScalarCallCost;
4002 Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF);
4005#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4008 O << Indent << (IsSingleScalar ?
"CLONE " :
"REPLICATE ");
4017 O <<
"@" << CB->getCalledFunction()->getName() <<
"(";
4041 llvm_unreachable(
"recipe must be removed when dissolving replicate region");
4053 llvm_unreachable(
"recipe must be removed when dissolving replicate region");
4056#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4059 O << Indent <<
"PHI-PREDICATED-INSTRUCTION ";
4083 : R->getOperand(1)->getScalarType();
4087 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
4093 [[maybe_unused]]
auto IsReverseMask = [
this, R]() {
4103 assert(!IsReverseMask() &&
4104 "Inconsecutive memory access should not have reverse order");
4116 : Intrinsic::vp_scatter;
4117 return Ctx.TTI.getAddressComputationCost(PtrTy,
nullptr,
nullptr,
4119 Ctx.TTI.getMemIntrinsicInstrCost(
4128 : Intrinsic::masked_store;
4129 Cost += Ctx.TTI.getMemIntrinsicInstrCost(
4134 : R->getOperand(1));
4135 Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty,
Alignment, AS, Ctx.CostKind,
4146 auto &Builder = State.Builder;
4147 Value *Mask =
nullptr;
4149 Mask = State.get(VPMask);
4154 NewLI = Builder.CreateMaskedGather(DataTy, Addr,
Alignment, Mask,
nullptr,
4155 "wide.masked.gather");
4158 Builder.CreateMaskedLoad(DataTy, Addr,
Alignment, Mask,
4161 NewLI = Builder.CreateAlignedLoad(DataTy, Addr,
Alignment,
"wide.load");
4164 State.set(
this, NewLI);
4167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4170 O << Indent <<
"WIDEN ";
4182 auto &Builder = State.Builder;
4186 Value *Mask =
nullptr;
4188 Mask = State.get(VPMask);
4190 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
4193 NewLI = Builder.CreateIntrinsicWithoutFolding(DataTy, Intrinsic::vp_gather,
4194 {Addr, Mask, EVL},
nullptr,
4195 "wide.masked.gather");
4197 NewLI = Builder.CreateIntrinsicWithoutFolding(
4198 DataTy, Intrinsic::vp_load, {Addr, Mask, EVL},
nullptr,
"vp.op.load");
4203 State.set(
this, NewLI);
4219 return Ctx.TTI.getMemIntrinsicInstrCost(
4224#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4227 O << Indent <<
"WIDEN ";
4238 auto &Builder = State.Builder;
4240 Value *Mask =
nullptr;
4242 Mask = State.get(VPMask);
4244 Value *StoredVal = State.get(StoredVPValue);
4248 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr,
Alignment, Mask);
4250 NewSI = Builder.CreateMaskedStore(StoredVal, Addr,
Alignment, Mask);
4252 NewSI = Builder.CreateAlignedStore(StoredVal, Addr,
Alignment);
4256#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4259 O << Indent <<
"WIDEN store ";
4268 auto &Builder = State.Builder;
4271 Value *StoredVal = State.get(StoredValue);
4273 Value *Mask =
nullptr;
4275 Mask = State.get(VPMask);
4277 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
4280 if (CreateScatter) {
4281 NewSI = Builder.CreateIntrinsicWithoutFolding(
4283 {StoredVal, Addr, Mask, EVL});
4285 NewSI = Builder.CreateIntrinsicWithoutFolding(
4287 {StoredVal, Addr, Mask, EVL});
4307 return Ctx.TTI.getMemIntrinsicInstrCost(
4312#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4315 O << Indent <<
"WIDEN vp.store ";
4323 auto VF = DstVTy->getElementCount();
4325 assert(VF == SrcVecTy->getElementCount() &&
"Vector dimensions do not match");
4326 Type *SrcElemTy = SrcVecTy->getElementType();
4327 Type *DstElemTy = DstVTy->getElementType();
4328 assert((
DL.getTypeSizeInBits(SrcElemTy) ==
DL.getTypeSizeInBits(DstElemTy)) &&
4329 "Vector elements must have same size");
4333 return Builder.CreateBitOrPointerCast(V, DstVTy);
4340 "Only one type should be a pointer type");
4342 "Only one type should be a floating point type");
4346 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
4347 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
4353 const Twine &Name) {
4354 unsigned Factor = Vals.
size();
4355 assert(Factor > 1 &&
"Tried to interleave invalid number of vectors");
4359 for (
Value *Val : Vals)
4360 assert(Val->getType() == VecTy &&
"Tried to interleave mismatched types");
4365 if (VecTy->isScalableTy()) {
4366 assert(Factor <= 8 &&
"Unsupported interleave factor for scalable vectors");
4367 return Builder.CreateVectorInterleave(Vals, Name);
4374 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
4375 return Builder.CreateShuffleVector(
4409 "Masking gaps for scalable vectors is not yet supported.");
4415 unsigned InterleaveFactor = Group->
getFactor();
4422 auto CreateGroupMask = [&BlockInMask, &State,
4423 &InterleaveFactor](
Value *MaskForGaps) ->
Value * {
4424 if (State.VF.isScalable()) {
4425 assert(!MaskForGaps &&
"Interleaved groups with gaps are not supported.");
4426 assert(InterleaveFactor <= 8 &&
4427 "Unsupported deinterleave factor for scalable vectors");
4428 auto *ResBlockInMask = State.get(BlockInMask);
4436 Value *ResBlockInMask = State.get(BlockInMask);
4437 Value *ShuffledMask = State.Builder.CreateShuffleVector(
4440 "interleaved.mask");
4441 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
4442 ShuffledMask, MaskForGaps)
4446 const DataLayout &DL = Instr->getDataLayout();
4449 Value *MaskForGaps =
nullptr;
4453 assert(MaskForGaps &&
"Mask for Gaps is required but it is null");
4457 if (BlockInMask || MaskForGaps) {
4458 Value *GroupMask = CreateGroupMask(MaskForGaps);
4460 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
4462 PoisonVec,
"wide.masked.vec");
4464 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
4471 if (VecTy->isScalableTy()) {
4474 assert(InterleaveFactor <= 8 &&
4475 "Unsupported deinterleave factor for scalable vectors");
4476 NewLoad = State.Builder.CreateIntrinsicWithoutFolding(
4479 nullptr,
"strided.vec");
4482 auto CreateStridedVector = [&InterleaveFactor, &State,
4483 &NewLoad](
unsigned Index) ->
Value * {
4484 assert(Index < InterleaveFactor &&
"Illegal group index");
4485 if (State.VF.isScalable())
4486 return State.Builder.CreateExtractValue(NewLoad, Index);
4492 return State.Builder.CreateShuffleVector(NewLoad, StrideMask,
4496 for (
unsigned I = 0, J = 0;
I < InterleaveFactor; ++
I) {
4503 Value *StridedVec = CreateStridedVector(
I);
4506 if (Member->getType() != ScalarTy) {
4513 StridedVec = State.Builder.CreateVectorReverse(StridedVec,
"reverse");
4515 State.set(VPDefs[J], StridedVec);
4525 Value *MaskForGaps =
4528 "Mismatch between NeedsMaskForGaps and MaskForGaps");
4532 unsigned StoredIdx = 0;
4533 for (
unsigned i = 0; i < InterleaveFactor; i++) {
4535 "Fail to get a member from an interleaved store group");
4545 Value *StoredVec = State.get(StoredValues[StoredIdx]);
4549 StoredVec = State.Builder.CreateVectorReverse(StoredVec,
"reverse");
4553 if (StoredVec->
getType() != SubVT)
4562 if (BlockInMask || MaskForGaps) {
4563 Value *GroupMask = CreateGroupMask(MaskForGaps);
4564 NewStoreInstr = State.Builder.CreateMaskedStore(
4565 IVec, ResAddr, Group->
getAlign(), GroupMask);
4568 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->
getAlign());
4575#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4579 O << Indent <<
"INTERLEAVE-GROUP with factor " << IG->getFactor() <<
", ";
4588 for (
unsigned i = 0; i < IG->getFactor(); ++i) {
4589 if (!IG->getMember(i))
4592 O <<
"\n" << Indent <<
" store ";
4594 O <<
" to index " << i;
4596 O <<
"\n" << Indent <<
" ";
4598 O <<
" = load from index " << i;
4606 assert(State.VF.isScalable() &&
4607 "Only support scalable VF for EVL tail-folding.");
4609 "Masking gaps for scalable vectors is not yet supported.");
4615 unsigned InterleaveFactor = Group->
getFactor();
4616 assert(InterleaveFactor <= 8 &&
4617 "Unsupported deinterleave/interleave factor for scalable vectors");
4624 Value *InterleaveEVL = State.Builder.CreateMul(
4625 EVL, ConstantInt::get(EVL->
getType(), InterleaveFactor),
"interleave.evl",
4629 Value *GroupMask =
nullptr;
4635 State.Builder.CreateVectorSplat(WideVF, State.Builder.getTrue());
4640 CallInst *NewLoad = State.Builder.CreateIntrinsicWithoutFolding(
4641 VecTy, Intrinsic::vp_load, {ResAddr, GroupMask, InterleaveEVL},
nullptr,
4652 NewLoad = State.Builder.CreateIntrinsicWithoutFolding(
4655 nullptr,
"strided.vec");
4657 const DataLayout &DL = Instr->getDataLayout();
4658 for (
unsigned I = 0, J = 0;
I < InterleaveFactor; ++
I) {
4664 Value *StridedVec = State.Builder.CreateExtractValue(NewLoad,
I);
4666 if (Member->getType() != ScalarTy) {
4684 const DataLayout &DL = Instr->getDataLayout();
4685 for (
unsigned I = 0, StoredIdx = 0;
I < InterleaveFactor;
I++) {
4693 Value *StoredVec = State.get(StoredValues[StoredIdx]);
4695 if (StoredVec->
getType() != SubVT)
4704 CallInst *NewStore = State.Builder.CreateIntrinsicWithoutFolding(
4706 {IVec, ResAddr, GroupMask, InterleaveEVL});
4716#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4720 O << Indent <<
"INTERLEAVE-GROUP with factor " << IG->getFactor() <<
", ";
4730 for (
unsigned i = 0; i < IG->getFactor(); ++i) {
4731 if (!IG->getMember(i))
4734 O <<
"\n" << Indent <<
" vp.store ";
4736 O <<
" to index " << i;
4738 O <<
"\n" << Indent <<
" ";
4740 O <<
" = vp.load from index " << i;
4751 unsigned InsertPosIdx = 0;
4752 for (
unsigned Idx = 0; IG->getFactor(); ++Idx)
4753 if (
auto *Member = IG->getMember(Idx)) {
4754 if (Member == InsertPos)
4766 unsigned InterleaveFactor = IG->getFactor();
4771 for (
unsigned IF = 0; IF < InterleaveFactor; IF++)
4772 if (IG->getMember(IF))
4777 InsertPos->
getOpcode(), WideVecTy, IG->getFactor(), Indices,
4778 IG->getAlign(), AS, Ctx.CostKind,
getMask(), NeedsMaskForGaps);
4780 if (!IG->isReverse())
4783 return Cost + IG->getNumMembers() *
4785 VectorTy, VectorTy, {}, Ctx.CostKind,
4794#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4798 "unexpected number of operands");
4799 O << Indent <<
"EMIT ";
4801 O <<
" = WIDEN-POINTER-INDUCTION ";
4817 O << Indent <<
"EMIT ";
4819 O <<
" = EXPAND SCEV " << *Expr;
4823#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4826 O << Indent <<
"EMIT ";
4828 O <<
" = WIDEN-CANONICAL-INDUCTION";
4835 auto &Builder = State.Builder;
4839 Type *VecTy = State.VF.isScalar()
4840 ? VectorInit->getType()
4844 State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
4845 if (State.VF.isVector()) {
4847 auto *One = ConstantInt::get(IdxTy, 1);
4850 auto *RuntimeVF =
getRuntimeVF(Builder, IdxTy, State.VF);
4851 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
4852 VectorInit = Builder.CreateInsertElement(
4858 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
4859 Phi->addIncoming(VectorInit, VectorPH);
4860 State.set(
this, Phi);
4867 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
4872#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4875 O << Indent <<
"FIRST-ORDER-RECURRENCE-PHI ";
4892 State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
4893 bool ScalarPHI = State.VF.isScalar() ||
isInLoop();
4894 Value *StartV = State.get(StartVPV, ScalarPHI);
4898 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
4899 "recipe must be in the vector loop header");
4904 Phi->addIncoming(StartV, VectorPH);
4907#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4910 O << Indent <<
"WIDEN-REDUCTION-PHI ";
4934 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
4937#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4940 O << Indent <<
"WIDEN-PHI ";
4950 State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
4953 State.Builder.CreatePHI(StartMask->
getType(), 2,
"active.lane.mask");
4954 Phi->addIncoming(StartMask, VectorPH);
4955 State.set(
this, Phi);
4958#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4961 O << Indent <<
"ACTIVE-LANE-MASK-PHI ";
4969#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4972 O << Indent <<
"CURRENT-ITERATION-PHI ";
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
Value * getPointer(Value *Ptr)
static constexpr Value * getValue(Ty &ValueOrUse)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static const SCEV * getAddressAccessSCEV(Value *Ptr, PredicatedScalarEvolution &PSE, const Loop *TheLoop)
Gets the address access SCEV for Ptr, if it should be used for cost modeling according to isAddressSC...
static const Function * getCalledFunction(const Value *V)
static bool isOrdered(const Instruction *I)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This file contains the declarations of different VPlan-related auxiliary helpers.
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static void executePhiRecipe(VPSingleDefRecipe *R, VPPhiAccessors &Phi, VPTransformState &State, bool IsScalar, const Twine &Name)
Shared execute logic for VPPhi and VPWidenPHIRecipe.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
static Instruction::BinaryOps getSubRecurOpcode(RecurKind Kind)
SmallVector< Value *, 2 > VectorParts
static cl::opt< bool > VPlanPrintMetadata("vplan-print-metadata", cl::init(true), cl::Hidden, cl::desc("Controls the printing of recipe metadata when debugging."))
static void printRecurrenceKind(raw_ostream &OS, const RecurKind &Kind)
static unsigned getCalledFnOperandIndex(ArrayRef< VPValue * > Operands)
For call VPInstruction operands, return the operand index of the called function.
This file contains the declarations of the Vectorization Plan base classes:
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_UGT
unsigned greater than
@ ICMP_ULT
unsigned less than
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getUnknown()
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
LLVM_ABI void print(raw_ostream &O) const
Print fast-math flags to O.
void setAllowContract(bool B=true)
bool noSignedZeros() const
void setAllowReciprocal(bool B=true)
bool allowReciprocal() const
void setNoSignedZeros(bool B=true)
bool allowReassoc() const
Flag queries.
void setNoNaNs(bool B=true)
void setAllowReassoc(bool B=true)
Flag setters.
void setApproxFunc(bool B=true)
void setNoInfs(bool B=true)
bool allowContract() const
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
bool willReturn() const
Determine if the function will return.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
bool doesNotThrow() const
Determine if the function cannot unwind.
bool doesNotAccessMemory() const
Determine if the function does not access memory.
Type * getReturnType() const
Returns the type of the ret val.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LLVM_ABI Value * CreateVectorSpliceRight(Value *V1, Value *V2, Value *Offset, const Twine &Name="")
Create a vector.splice.right intrinsic call, or a shufflevector that produces the same result if the ...
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
LLVM_ABI Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
ConstantInt * getFalse()
Get the constant value for i1 false.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
@ IK_IntInduction
Integer induction variable. Step = C.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
InstTy * getInsertPos() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
Information for memory intrinsic cost model.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI bool isSubRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is for a sub operation.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isByteTy() const
True if this is an instance of ByteType.
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
value_op_iterator value_op_end()
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
value_op_iterator value_op_begin()
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
void insert(VPRecipeBase *Recipe, iterator InsertPt)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBlocksTy & getPredecessors() const
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
VPlan-based builder utility analogous to IRBuilder.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
InductionDescriptor::InductionKind getInductionKind() const
VPValue * getIndex() const
VPIRValue * getStartValue() const
VPValue * getStepValue() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe(const SCEV *Expr)
bool isVectorToScalar() const
Returns true if this VPExpressionRecipe produces a single scalar.
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Class to record and manage LLVM IR flags.
ReductionFlagsTy ReductionFlags
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
void printFlags(raw_ostream &O) const
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
bool isReductionOrdered() const
CmpInst::Predicate getPredicate() const
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
GEPNoWrapFlags getGEPNoWrapFlags() const
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
DisjointFlagsTy DisjointFlags
NonNegFlagsTy NonNegFlags
bool isReductionInLoop() const
void applyFlags(Instruction &I) const
Apply the IR flags to I.
RecurKind getRecurKind() const
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
bool doesGeneratePerAllLanes() const
Returns true if this VPInstruction generates scalar values for all lanes.
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
@ ExtractPenultimateElement
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ VScale
Returns the value for vscale.
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
@ CalculateTripCountMinusVF
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
unsigned getOpcode() const
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void addOperand(VPValue *Op)
Add Op as operand of this VPInstruction.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
void execute(VPTransformState &State) override
Generate the instruction.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this recipe.
Instruction * getInsertPos() const
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
VPValue * getAddr() const
Return the address accessed by this recipe.
VPValue * getEVL() const
The VPValue of the explicit vector length.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
static VPLane getLastLaneForVF(const ElementCount &VF)
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
static VPLane getFirstLane()
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
bool isPhi() const
Returns true for PHI-like recipes.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
bool isSafeToSpeculativelyExecute() const
Return true if we can safely execute this recipe unconditionally even if it is masked originally.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getEVL() const
The VPValue of the explicit vector length.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
VPValue * getCondOp() const
The VPValue of the condition for the block.
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
bool isInLoop() const
Returns true if the reduction is in-loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
bool isSingleScalar() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
static Type * computeScalarType(const Instruction *I, ArrayRef< VPValue * > Operands)
Compute the scalar result type for a VPReplicateRecipe wrapping I with Operands (excluding any predic...
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
unsigned getOpcode() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
VPValue * getStepValue() const
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
This class can be used to assign names to VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
unsigned getNumOperands() const
operand_iterator op_end()
operand_iterator op_begin()
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
VPValue * getVFValue() const
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
int64_t getStride() const
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
VPValue * getStride() const
Type * getSourceElementType() const
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPValue * getVFxPart() const
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Function * getCalledScalarFunction() const
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
Type * getSourceElementType() const
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getStepValue()
Returns the step value of the induction.
VPIRValue * getStartValue() const
Returns the start value of the induction.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
bool IsMasked
Whether the memory access is masked.
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
VPValue * getMask() const
Return the mask used by this recipe.
Align Alignment
Alignment information for this memory access.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
VPValue * getAddr() const
Return the address accessed by this recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
const DataLayout & getDataLayout() const
VPValue * getTripCount() const
The trip count of the original loop.
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVMContext & getContext() const
All values hold a context through their type.
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
self_iterator getIterator()
typename base_list_type::iterator iterator
iterator erase(iterator where)
pointer remove(iterator &IT)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor)
Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::Or, true > m_c_LogicalOr(const LHS &L, const RHS &R)
Matches L || R with LHS and RHS in either order.
specific_intval< 1 > m_False()
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, const Loop *L)
Returns true if Addr is an address SCEV that can be passed to TTI::getAddressComputationCost,...
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUsedByLoadStoreAddress(const VPValue *V)
Returns true if V is used as part of the address of another load or store.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
@ Undef
Value of the register doesn't matter.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
auto cast_or_null(const Y &Val)
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool isa_and_nonnull(const Y &Val)
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
auto dyn_cast_or_null(const Y &Val)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
auto reverse(ContainerTy &&C)
ElementCount getVectorizedTypeVF(Type *Ty)
Returns the number of vector elements for a vectorized type.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isPointerTy(const Type *T)
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
cl::opt< unsigned > ForceTargetInstructionCost
LLVM_ABI Type * computeScalarTypeForInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands)
Compute the scalar result type for an IR Opcode given Operands.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
TargetTransformInfo::TargetCostKind CostKind
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void execute(VPTransformState &State) override
Generate the instruction.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
LLVM_ABI_FOR_TEST VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
void execute(VPTransformState &State) override
Generate a wide store or scatter.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
VPValue * getStoredValue() const
Return the value stored by this recipe.