71 VPValue *getConstantVPV(
unsigned Part) {
77 UnrollState(
VPlan &Plan,
unsigned UF) : Plan(Plan),
UF(
UF), TypeInfo(Plan) {}
82 if (Part == 0 ||
V->isLiveIn())
85 "accessed value does not exist");
86 return VPV2Parts[
V][Part - 1];
96 assert(
V->second.size() == Part - 1 &&
"earlier parts not set");
104 assert(Inserted &&
"uniform value already added");
105 for (
unsigned Part = 0; Part !=
UF; ++Part)
106 V->second.push_back(R);
115 R->setOperand(
OpIdx, getValueForPart(
Op, Part));
121 R->setOperand(
OpIdx, getValueForPart(
Op, Part));
126void UnrollState::unrollReplicateRegionByUF(
VPRegionBlock *VPR) {
128 for (
unsigned Part = 1; Part !=
UF; ++Part) {
134 for (
const auto &[PartIVPBB, Part0VPBB] :
135 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
136 VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
137 for (
const auto &[PartIR, Part0R] :
zip(*PartIVPBB, *Part0VPBB)) {
139 if (
auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
140 ScalarIVSteps->addOperand(getConstantVPV(Part));
143 addRecipeForPart(&Part0R, &PartIR, Part);
149void UnrollState::unrollWidenInductionByUF(
152 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
154 auto &
ID =
IV->getInductionDescriptor();
156 if (isa_and_present<FPMathOperator>(
ID.getInductionBinOp()))
157 Flags =
ID.getInductionBinOp()->getFastMathFlags();
159 VPValue *ScalarStep =
IV->getStepValue();
167 ToSkip.
insert(VectorStep);
182 Builder.setInsertPoint(
IV->getParent(), InsertPtForPhi);
187 AddOpc =
ID.getInductionOpcode();
189 AddOpc = Instruction::Add;
190 for (
unsigned Part = 1; Part !=
UF; ++Part) {
192 Part > 1 ?
"step.add." + std::to_string(Part) :
"step.add";
201 addRecipeForPart(
IV,
Add, Part);
204 IV->addOperand(VectorStep);
205 IV->addOperand(Prev);
212 if (isa<VPFirstOrderRecurrencePHIRecipe>(R))
216 if (
auto *
IV = dyn_cast<VPWidenInductionRecipe>(R)) {
217 unrollWidenInductionByUF(
IV, InsertPtForPhi);
221 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
222 if (RdxPhi && RdxPhi->isOrdered())
225 auto InsertPt = std::next(
R->getIterator());
226 for (
unsigned Part = 1; Part !=
UF; ++Part) {
228 Copy->insertBefore(*
R->getParent(), InsertPt);
229 addRecipeForPart(R, Copy, Part);
235 if (
auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
237 "unexpected start VPInstruction");
242 StartV = VPI->getOperand(1);
244 auto *
C = VPI->clone();
245 C->setOperand(0,
C->getOperand(1));
249 for (
unsigned Part = 1; Part !=
UF; ++Part)
250 VPV2Parts[VPI][Part - 1] = StartV;
252 Copy->addOperand(getConstantVPV(Part));
254 assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
255 "unexpected header phi recipe not needing unrolled part");
266 if (
auto *VPI = dyn_cast<VPInstruction>(&R)) {
268 addUniformForAllParts(VPI);
272 if (
auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
273 if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
274 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
279 if (
auto *
II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
280 if (
II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
281 addUniformForAllParts(RepR);
288 auto InsertPt = std::next(
R.getIterator());
290 for (
unsigned Part = 1; Part !=
UF; ++Part) {
292 Copy->insertBefore(VPBB, InsertPt);
293 addRecipeForPart(&R, Copy, Part);
296 if (
match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
298 Copy->setOperand(0, getValueForPart(
Op, Part - 1));
299 Copy->setOperand(1, getValueForPart(
Op, Part));
302 if (
auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
303 auto *
Phi = dyn_cast<VPReductionPHIRecipe>(
R.getOperand(0));
304 if (Phi &&
Phi->isOrdered()) {
305 auto &Parts = VPV2Parts[
Phi];
308 Parts.push_back(Red);
310 Parts.push_back(
Copy->getVPSingleValue());
311 Phi->setOperand(1,
Copy->getVPSingleValue());
320 match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
322 Copy->addOperand(getConstantVPV(Part));
324 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
325 Copy->setOperand(0,
R.getOperand(0));
330 auto *VPR = dyn_cast<VPRegionBlock>(VPB);
333 return unrollReplicateRegionByUF(VPR);
345 auto *VPBB = cast<VPBasicBlock>(VPB);
348 if (ToSkip.
contains(&R) || isa<VPIRInstruction>(&R))
354 if (
match(&R, m_VPInstruction<VPInstruction::AnyOf>(
m_VPValue(Op1))) ||
355 match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
357 match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
359 match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
361 match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
363 addUniformForAllParts(cast<VPInstruction>(&R));
364 for (
unsigned Part = 1; Part !=
UF; ++Part)
365 R.addOperand(getValueForPart(Op1, Part));
369 if (
match(&R, m_VPInstruction<VPInstruction::ExtractLane>(
371 addUniformForAllParts(cast<VPInstruction>(&R));
372 for (
unsigned Part = 1; Part !=
UF; ++Part)
373 R.addOperand(getValueForPart(Op1, Part));
377 match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
379 addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
381 auto *
I = cast<VPInstruction>(&R);
386 I->replaceAllUsesWith(getValueForPart(Op0, UF -
Offset));
395 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
397 addUniformForAllParts(SingleDef);
401 if (
auto *
H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
402 unrollHeaderPHIByUF(
H, InsertPtForPhi);
411 assert(UF > 0 &&
"Unroll factor must be positive");
416 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
418 auto *VPI = dyn_cast<VPInstruction>(&R);
421 VPI->getNumOperands() == 1) {
422 VPI->replaceAllUsesWith(VPI->getOperand(0));
423 VPI->eraseFromParent();
432 UnrollState Unroller(Plan, UF);
440 Unroller.unrollBlock(VPB);
451 if (isa<VPFirstOrderRecurrencePHIRecipe>(&
H)) {
452 Unroller.remapOperand(&
H, 1, UF - 1);
455 if (Unroller.contains(
H.getVPSingleValue())) {
459 Unroller.remapOperands(&
H, Part);
477 auto LaneDefs = Def2LaneDefs.find(
Op);
478 if (LaneDefs != Def2LaneDefs.end()) {
506 true,
nullptr, *RepR);
507 New->transferFlags(*RepR);
508 New->insertBefore(RepR);
518 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
520 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
523 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
533 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
534 if (!RepR || RepR->isSingleScalar())
538 if (RepR->getNumUsers() == 0) {
539 if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
558 Def2LaneDefs[RepR] = LaneDefs;
561 RepR->replaceUsesWithIf(LaneDefs[0], [RepR](
VPUser &U,
unsigned) {
562 return U.onlyFirstLaneUsed(RepR);
568 auto *VPI = dyn_cast<VPInstruction>(U);
572 assert(VPI->getNumOperands() == 1 &&
573 "Build(Struct)Vector must have a single operand before "
574 "replicating by VF");
575 VPI->setOperand(0, LaneDefs[0]);
577 VPI->addOperand(LaneDef);
583 R->eraseFromParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefAnalysis InstSet & ToRemove
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static const HTTPClientCleanup Cleanup
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file contains the declarations of different VPlan-related auxiliary helpers.
static VPReplicateRecipe * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPReplicateRecipe *RepR, VPLane Lane, const DenseMap< VPValue *, SmallVector< VPValue * > > &Def2LaneDefs)
Create a single-scalar clone of RepR for lane Lane.
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents an Operation in the Expression.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy::iterator iterator
Instruction iterators...
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleSuccessor() const
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Type * getScalarType() const
Returns the scalar type of the induction.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
This is a concrete Recipe that models a single VPlan-level instruction.
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
static VPLane getLastLaneForVF(const ElementCount &VF)
Kind getKind() const
Returns the Kind of lane offset.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe to compute the pointers for widened memory accesses of IndexTy.
A Recipe for widening the canonical induction variable of the vector loop.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPBasicBlock * getEntry()
VPValue & getVF()
Returns the VF of the vector loop region.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
@ C
The default llvm calling convention, compatible with C.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
NodeAddr< PhiNode * > Phi
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
auto reverse(ContainerTy &&C)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op