69 VPValue *getConstantVPV(
unsigned Part) {
76 : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}
81 if (Part == 0 ||
V->isLiveIn())
84 "accessed value does not exist");
85 return VPV2Parts[
V][Part - 1];
95 assert(
Ins.first->second.size() == Part - 1 &&
"earlier parts not set");
103 assert(
Ins.second &&
"uniform value already added");
104 for (
unsigned Part = 0; Part != UF; ++Part)
105 Ins.first->second.push_back(R);
112 void remapOperand(
VPRecipeBase *R,
unsigned OpIdx,
unsigned Part) {
113 auto *
Op =
R->getOperand(OpIdx);
114 R->setOperand(OpIdx, getValueForPart(
Op, Part));
119 for (
const auto &[OpIdx,
Op] :
enumerate(
R->operands()))
120 R->setOperand(OpIdx, getValueForPart(
Op, Part));
125void UnrollState::unrollReplicateRegionByUF(
VPRegionBlock *VPR) {
127 for (
unsigned Part = 1; Part != UF; ++Part) {
133 for (
const auto &[PartIVPBB, Part0VPBB] :
134 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
135 VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
136 for (
const auto &[PartIR, Part0R] :
zip(*PartIVPBB, *Part0VPBB)) {
138 if (
auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
139 ScalarIVSteps->addOperand(getConstantVPV(Part));
142 addRecipeForPart(&Part0R, &PartIR, Part);
148void UnrollState::unrollWidenInductionByUF(
151 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
153 auto &
ID =
IV->getInductionDescriptor();
154 std::optional<FastMathFlags> FMFs;
155 if (isa_and_present<FPMathOperator>(
ID.getInductionBinOp()))
156 FMFs =
ID.getInductionBinOp()->getFastMathFlags();
163 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
167 VPValue *ScalarStep =
IV->getStepValue();
168 auto *ConstStep = ScalarStep->
isLiveIn()
171 if (!ConstStep || ConstStep->getValue() != 1) {
174 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
181 FMFs,
IV->getDebugLoc());
199 Builder.setInsertPoint(
IV->getParent(), InsertPtForPhi);
202 for (
unsigned Part = 1; Part != UF; ++Part) {
204 Part > 1 ?
"step.add." + std::to_string(Part) :
"step.add";
211 FMFs,
IV->getDebugLoc(),
Name);
213 addRecipeForPart(
IV,
Add, Part);
216 IV->addOperand(VectorStep);
217 IV->addOperand(Prev);
224 if (isa<VPFirstOrderRecurrencePHIRecipe>(R))
228 if (
auto *
IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
229 unrollWidenInductionByUF(
IV, InsertPtForPhi);
233 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
234 if (RdxPhi && RdxPhi->isOrdered())
237 auto InsertPt = std::next(
R->getIterator());
238 for (
unsigned Part = 1; Part != UF; ++Part) {
240 Copy->insertBefore(*
R->getParent(), InsertPt);
241 addRecipeForPart(R, Copy, Part);
242 if (isa<VPWidenPointerInductionRecipe>(R)) {
244 Copy->addOperand(getConstantVPV(Part));
246 Copy->addOperand(getConstantVPV(Part));
248 assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
249 "unexpected header phi recipe not needing unrolled part");
261 if (
auto *VPI = dyn_cast<VPInstruction>(&R)) {
263 addUniformForAllParts(VPI);
267 if (
auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
268 if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
269 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
274 if (
auto *
II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
275 if (
II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
276 addUniformForAllParts(RepR);
283 auto InsertPt = std::next(
R.getIterator());
285 for (
unsigned Part = 1; Part != UF; ++Part) {
287 Copy->insertBefore(VPBB, InsertPt);
288 addRecipeForPart(&R, Copy, Part);
291 if (
match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
293 Copy->setOperand(0, getValueForPart(
Op, Part - 1));
294 Copy->setOperand(1, getValueForPart(
Op, Part));
297 if (
auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
298 auto *
Phi = cast<VPReductionPHIRecipe>(
R.getOperand(0));
299 if (
Phi->isOrdered()) {
300 auto &Parts = VPV2Parts[
Phi];
303 Parts.push_back(Red);
305 Parts.push_back(
Copy->getVPSingleValue());
306 Phi->setOperand(1,
Copy->getVPSingleValue());
315 match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
317 Copy->addOperand(getConstantVPV(Part));
319 if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
320 Copy->setOperand(0,
R.getOperand(0));
326 auto *VPR = dyn_cast<VPRegionBlock>(VPB);
329 return unrollReplicateRegionByUF(VPR);
341 auto *VPBB = cast<VPBasicBlock>(VPB);
344 if (ToSkip.
contains(&R) || isa<VPIRInstruction>(&R))
350 if (
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
352 addUniformForAllParts(cast<VPInstruction>(&R));
353 for (
unsigned Part = 1; Part != UF; ++Part)
354 R.addOperand(getValueForPart(Op1, Part));
358 if (
match(&R, m_VPInstruction<VPInstruction::ExtractFromEnd>(
360 addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
366 R.getVPSingleValue()->replaceAllUsesWith(
367 getValueForPart(Op0, UF -
Offset));
376 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
378 addUniformForAllParts(SingleDef);
382 if (
auto *
H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
383 unrollHeaderPHIByUF(
H, InsertPtForPhi);
392 assert(UF > 0 &&
"Unroll factor must be positive");
397 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
399 auto *VPI = dyn_cast<VPInstruction>(&R);
402 VPI->getNumOperands() == 1) {
403 VPI->replaceAllUsesWith(VPI->getOperand(0));
404 VPI->eraseFromParent();
413 UnrollState Unroller(Plan, UF, Ctx);
421 Unroller.unrollBlock(VPB);
432 if (isa<VPFirstOrderRecurrencePHIRecipe>(&
H)) {
433 Unroller.remapOperand(&
H, 1, UF - 1);
436 if (Unroller.contains(
H.getVPSingleValue()) ||
437 isa<VPWidenPointerInductionRecipe>(&
H)) {
441 Unroller.remapOperands(&
H, Part);
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static const HTTPClientCleanup Cleanup
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
This class represents an Operation in the Expression.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
This is an important class for using LLVM in a threaded context.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
RecipeListTy::iterator iterator
Instruction iterators...
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleSuccessor() const
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
VPlan-based builder utility analogous to IRBuilder.
Type * getScalarType() const
Returns the scalar type of the induction.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
This is a concrete Recipe that models a single VPlan-level instruction.
@ CanonicalIVIncrementForPart
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
A recipe to compute the pointers for widened memory accesses of IndexTy.
A Recipe for widening the canonical induction variable of the vector loop.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPBasicBlock * getEntry()
VPValue & getVF()
Returns the VF of the vector loop region.
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
bool match(Val *V, const Pattern &P)
BinaryVPInstruction_match< Op0_t, Op1_t, VPInstruction::BranchOnCount > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
UnaryVPInstruction_match< Op0_t, VPInstruction::BranchOnCond > m_BranchOnCond(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
NodeAddr< PhiNode * > Phi
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op