43 VPTypeAnalysis TypeInfo;
47 SmallPtrSet<VPRecipeBase *, 8> ToSkip;
51 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
54 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
57 void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
unsigned Part);
61 void unrollRecipeByUF(VPRecipeBase &R);
66 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
71 void unrollWidenInductionByUF(VPWidenInductionRecipe *
IV,
75 Type *CanIVIntTy = Plan.getVectorLoopRegion()->getCanonicalIVType();
76 return Plan.getConstantInt(CanIVIntTy, Part);
80 UnrollState(VPlan &Plan,
unsigned UF) : Plan(Plan), UF(UF), TypeInfo(Plan) {}
82 void unrollBlock(VPBlockBase *VPB);
84 VPValue *getValueForPart(VPValue *V,
unsigned Part) {
87 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
88 "accessed value does not exist");
89 return VPV2Parts[
V][Part - 1];
95 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
98 const auto &[
V,
_] = VPV2Parts.try_emplace(VPV);
99 assert(
V->second.size() == Part - 1 &&
"earlier parts not set");
105 void addUniformForAllParts(VPSingleDefRecipe *R) {
106 const auto &[
V,
Inserted] = VPV2Parts.try_emplace(R);
107 assert(Inserted &&
"uniform value already added");
108 for (
unsigned Part = 0; Part != UF; ++Part)
109 V->second.push_back(R);
112 bool contains(VPValue *VPV)
const {
return VPV2Parts.contains(VPV); }
116 void remapOperand(VPRecipeBase *R,
unsigned OpIdx,
unsigned Part) {
118 R->setOperand(
OpIdx, getValueForPart(
Op, Part));
124 R->setOperand(
OpIdx, getValueForPart(
Op, Part));
134 VPBuilder Builder(Steps);
140 StartIndex = Builder.createOverflowingOp(
145 StartIndex = Builder.createScalarSExtOrTrunc(
150 StartIndex = Builder.createScalarCast(Instruction::SIToFP, StartIndex,
156void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
158 for (
unsigned Part = 1; Part !=
UF; ++Part) {
164 for (
const auto &[PartIVPBB, Part0VPBB] :
167 for (
const auto &[PartIR, Part0R] :
zip(*PartIVPBB, *Part0VPBB)) {
170 addStartIndexForScalarSteps(Steps, Part);
172 addRecipeForPart(&Part0R, &PartIR, Part);
178void UnrollState::unrollWidenInductionByUF(
181 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
183 auto &
ID =
IV->getInductionDescriptor();
186 Flags =
ID.getInductionBinOp()->getFastMathFlags();
188 VPValue *ScalarStep =
IV->getStepValue();
189 VPBuilder Builder(PH);
192 VPInstruction *VectorStep = Builder.createNaryOp(
196 ToSkip.
insert(VectorStep);
211 Builder.setInsertPoint(
IV->getParent(), InsertPtForPhi);
216 AddOpc =
ID.getInductionOpcode();
218 AddOpc = Instruction::Add;
219 for (
unsigned Part = 1; Part !=
UF; ++Part) {
221 Part > 1 ?
"step.add." + std::to_string(Part) :
"step.add";
223 VPInstruction *
Add = Builder.createNaryOp(AddOpc,
228 Flags,
IV->getDebugLoc(), Name);
230 addRecipeForPart(
IV,
Add, Part);
233 IV->addOperand(VectorStep);
234 IV->addOperand(Prev);
237void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
246 unrollWidenInductionByUF(
IV, InsertPtForPhi);
251 if (RdxPhi && RdxPhi->isOrdered())
254 auto InsertPt = std::next(
R->getIterator());
255 for (
unsigned Part = 1; Part !=
UF; ++Part) {
256 VPRecipeBase *
Copy =
R->clone();
257 Copy->insertBefore(*
R->getParent(), InsertPt);
258 addRecipeForPart(R, Copy, Part);
266 "unexpected start VPInstruction");
271 StartV = VPI->getOperand(1);
273 auto *
C = VPI->clone();
274 C->setOperand(0,
C->getOperand(1));
278 for (
unsigned Part = 1; Part !=
UF; ++Part)
279 VPV2Parts[VPI][Part - 1] = StartV;
284 "unexpected header phi recipe not needing unrolled part");
290void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
296 addUniformForAllParts(VPI);
302 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
309 addUniformForAllParts(RepR);
315 auto InsertPt = std::next(
R.getIterator());
316 VPBasicBlock &VPBB = *
R.getParent();
317 for (
unsigned Part = 1; Part !=
UF; ++Part) {
318 VPRecipeBase *
Copy =
R.clone();
319 Copy->insertBefore(VPBB, InsertPt);
320 addRecipeForPart(&R, Copy, Part);
325 Copy->setOperand(0, getValueForPart(
Op, Part - 1));
326 Copy->setOperand(1, getValueForPart(
Op, Part));
330 VPBuilder Builder(VPR);
331 const DataLayout &
DL =
335 VPValue *VF = Builder.createScalarZExtOrTrunc(
338 VPValue *VFxPart = Builder.createOverflowingOp(
341 Copy->setOperand(0, VPR->getOperand(0));
342 Copy->addOperand(VFxPart);
347 if (Phi &&
Phi->isOrdered()) {
348 auto &Parts = VPV2Parts[
Phi];
351 Parts.push_back(Red);
353 Parts.push_back(
Copy->getVPSingleValue());
354 Phi->setOperand(1,
Copy->getVPSingleValue());
360 addStartIndexForScalarSteps(ScalarIVSteps, Part);
370 Copy->setOperand(0,
R.getOperand(0));
374void UnrollState::unrollBlock(VPBlockBase *VPB) {
378 return unrollReplicateRegionByUF(VPR);
382 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
384 for (VPBlockBase *VPB : RPOT)
408 for (
unsigned Part = 1; Part !=
UF; ++Part)
409 R.addOperand(getValueForPart(Op1, Part));
415 for (
unsigned Part = 1; Part !=
UF; ++Part)
416 R.addOperand(getValueForPart(Op1, Part));
424 bool IsPenultimatePart =
426 unsigned PartIdx = IsPenultimatePart ?
UF - 2 :
UF - 1;
428 I->replaceAllUsesWith(getValueForPart(Op0, PartIdx));
436 R.setOperand(0, getValueForPart(Op0, UF - 1));
442 addUniformForAllParts(SingleDef);
447 unrollHeaderPHIByUF(
H, InsertPtForPhi);
456 assert(UF > 0 &&
"Unroll factor must be positive");
466 VPI->getNumOperands() == 1) {
467 VPI->replaceAllUsesWith(VPI->getOperand(0));
468 VPI->eraseFromParent();
477 UnrollState Unroller(Plan, UF);
485 Unroller.unrollBlock(VPB);
497 Unroller.remapOperand(&
H, 1, UF - 1);
500 if (Unroller.contains(
H.getVPSingleValue())) {
504 Unroller.remapOperands(&
H, Part);
520 auto LaneDefs = Def2LaneDefs.find(
Op);
521 if (LaneDefs != Def2LaneDefs.end())
525 return Builder.createNaryOp(Instruction::ExtractElement, {
Op, Idx});
533 auto LaneDefs = Def2LaneDefs.find(
Op);
534 if (LaneDefs != Def2LaneDefs.end()) {
540 [[maybe_unused]]
bool Matched =
542 assert(Matched &&
"original op must have been Unpack");
561 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {
Op, Idx});
572 *RepR, *RepR, RepR->getDebugLoc());
575 "DefR must be a VPReplicateRecipe or VPInstruction");
578 New->setOperand(Idx,
Op);
581 New->insertBefore(DefR);
616 if (DefR->getNumUsers() == 0) {
620 DefR->eraseFromParent();
629 Def2LaneDefs[DefR] = LaneDefs;
632 DefR->replaceUsesWithIf(LaneDefs[0], [DefR](
VPUser &U,
unsigned) {
633 return U.usesFirstLaneOnly(DefR);
643 assert(VPI->getNumOperands() == 1 &&
644 "Build(Struct)Vector must have a single operand before "
645 "replicating by VF");
646 VPI->setOperand(0, LaneDefs[0]);
648 VPI->addOperand(LaneDef);
654 R->eraseFromParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const HTTPClientCleanup Cleanup
MachineInstr unsigned OpIdx
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static VPValue * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPSingleDefRecipe *DefR, VPLane Lane, const DenseMap< VPValue *, SmallVector< VPValue * > > &Def2LaneDefs)
Create a single-scalar clone of DefR (must be a VPReplicateRecipe or VPInstruction) for lane Lane.
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
static DebugLoc getUnknown()
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy::iterator iterator
Instruction iterators...
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleSuccessor() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
VPlan-based builder utility analogous to IRBuilder.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
BasicBlock * getIRBasicBlock() const
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Kind getKind() const
Returns the Kind of lane offset.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPBasicBlock * getEntry()
VPValue & getVF()
Returns the VF of the vector loop region.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
bool hasScalarVFOnly() const
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ComputeFindIVResult, Op0_t, Op1_t, Op2_t > m_ComputeFindIVResult(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::ComputeAnyOfResult, Op0_t, Op1_t, Op2_t > m_ComputeAnyOfResult(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
NodeAddr< PhiNode * > Phi
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
auto reverse(ContainerTy &&C)
bool isa_and_present(const Y &Val)
isa_and_present<X> - Functionally identical to isa, except that a null value is accepted.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.