LLVM 23.0.0git
VPlanTransforms.h
Go to the documentation of this file.
1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
19#include "llvm/ADT/ScopeExit.h"
22
23namespace llvm {
24
26class Instruction;
27class LoopVersioning;
28class PHINode;
29class ScalarEvolution;
33class VPBuilder;
34class VPRecipeBuilder;
35struct VFRange;
36
39
40#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
42#endif
43
45 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
46 /// to the pass. Performs verification/printing after each VPlan pass if
47 /// requested via command line options.
48 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
49 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
50 ArgsTy &&...Args) {
51 scope_exit PostTransformActions{[&]() {
52#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
53 // Make sure to print before verification, so that output is more useful
54 // in case of failures:
56 dbgs() << "VPlan after " << PassName << '\n';
57 dbgs() << Plan << '\n';
58 }
59#endif
60 if (VerifyEachVPlan && EnableVerify)
62 }};
63
64 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
65 }
66#define RUN_VPLAN_PASS(PASS, ...) \
67 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
68#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
69 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
70
71 /// Create a base VPlan0, serving as the common starting point for all later
72 /// candidates. It consists of an initial plain CFG loop with loop blocks from
73 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
74 /// corresponding to the input IR.
75 ///
76 /// The created loop is wrapped in an initial skeleton to facilitate
77 /// vectorization, consisting of a vector pre-header, an exit block for the
78 /// main vector loop (middle.block) and a new block as preheader of the scalar
79 /// loop (scalar.ph). See below for an illustration. It also adds a canonical
80 /// IV and its increment, using \p InductionTy and \p IVDL, and creates a
81 /// VPValue expression for the original trip count.
82 ///
83 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
84 /// / \ old preheader. Will contain iteration number check and SCEV
85 /// | | expansions.
86 /// | |
87 /// / v
88 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
89 /// | / | added later.
90 /// | / v
91 /// || [ ] <-- vector pre header.
92 /// |/ |
93 /// | v
94 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
95 /// | [ ]_|
96 /// | |
97 /// | v
98 /// | [ ] <--- middle-block with the branch to successors
99 /// | / |
100 /// | / |
101 /// | | v
102 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
103 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
104 /// | | preheader basic block.
105 /// | |
106 /// v <-- edge from middle to exit iff epilogue is not required.
107 /// | [ ] \
108 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
109 /// | | header wrapped in VPIRBasicBlock).
110 /// \ |
111 /// \ v
112 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
113 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
114 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
115 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
116
117 /// Replace VPPhi recipes in \p Plan's header with corresponding
118 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
119 /// fixed-order recurrences. This processes all header phis and creates
120 /// the appropriate widened recipe for each one.
121 static void createHeaderPhiRecipes(
122 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
123 const MapVector<PHINode *, InductionDescriptor> &Inductions,
124 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
125 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
126 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
127
128 /// Create VPReductionRecipes for in-loop reductions. This processes chains
129 /// of operations contributing to in-loop reductions and creates appropriate
130 /// VPReductionRecipe instances. Block masks from \p BlockMaskCache are used
131 /// to add predication for blocks in \p BlocksNeedingPredication.
133 VPlan &Plan, const DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
134 const DenseSet<BasicBlock *> &BlocksNeedingPredication,
135 ElementCount MinVF);
136
137 /// Update \p Plan to account for all early exits.
138 LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan,
139 bool HasUncountableExit);
140
141 /// If a check is needed to guard executing the scalar epilogue loop, it will
142 /// be added to the middle block.
143 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan,
144 bool RequiresScalarEpilogueCheck,
145 bool TailFolded);
146
147 // Create a check to \p Plan to see if the vector loop should be executed.
148 static void
149 addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
150 ElementCount MinProfitableTripCount,
151 bool RequiresScalarEpilogue, bool TailFolded,
152 bool CheckNeededWithTailFolding, Loop *OrigLoop,
154 PredicatedScalarEvolution &PSE);
155
156 /// Add a check to \p Plan to see if the epilogue vector loop should be
157 /// executed.
159 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
160 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
161 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE);
162
163 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
164 /// flat CFG into a hierarchical CFG.
165 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
166
167 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
168 /// VPValue and connect the block to \p Plan, using the VPValue as branch
169 /// condition.
170 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
171 bool AddBranchWeights);
172
173 /// Replaces the VPInstructions in \p Plan with corresponding
174 /// widen recipes. Returns false if any VPInstructions could not be converted
175 /// to a wide recipe if needed.
176 LLVM_ABI_FOR_TEST static bool
178 const TargetLibraryInfo &TLI);
179
180 /// Try to legalize reductions with multiple in-loop uses. Currently only
181 /// min/max reductions used by FindLastIV reductions are supported. Otherwise
182 /// return false.
183 static bool handleMultiUseReductions(VPlan &Plan);
184
185 /// Try to have all users of fixed-order recurrences appear after the recipe
186 /// defining their previous value, by either sinking users or hoisting recipes
187 /// defining their previous value (and its operands). Then introduce
188 /// FirstOrderRecurrenceSplice VPInstructions to combine the value from the
189 /// recurrence phis and previous values.
190 /// \returns true if all users of fixed-order recurrences could be re-arranged
191 /// as needed or false if it is not possible. In the latter case, \p Plan is
192 /// not valid.
193 static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
194
195 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
196 /// try to update the vector loop to exit early if any input is NaN and resume
197 /// executing in the scalar loop to handle the NaNs there. Return false if
198 /// this attempt was unsuccessful.
199 static bool handleMaxMinNumReductions(VPlan &Plan);
200
201 /// Check if \p Plan contains any FindLast reductions. If it does, try to
202 /// update the vector loop to save the appropriate state using selects
203 /// for entire vectors for both the latest mask containing at least one active
204 /// element and the corresponding data vector. Return false if this attempt
205 /// was unsuccessful.
206 static bool handleFindLastReductions(VPlan &Plan);
207
208 /// Clear NSW/NUW flags from reduction instructions if necessary.
209 static void clearReductionWrapFlags(VPlan &Plan);
210
211 /// Explicitly unroll \p Plan by \p UF.
212 static void unrollByUF(VPlan &Plan, unsigned UF);
213
214 /// Replace each replicating VPReplicateRecipe and VPInstruction outside of
215 /// any replicate region in \p Plan with \p VF single-scalar recipes.
216 /// TODO: Also replicate VPScalarIVSteps and VPReplicateRecipes inside
217 /// replicate regions, thereby dissolving the latter.
218 static void replicateByVF(VPlan &Plan, ElementCount VF);
219
220 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
221 /// resulting plan to \p BestVF and \p BestUF.
222 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
223 unsigned BestUF,
224 PredicatedScalarEvolution &PSE);
225
226 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
227 /// optimizations, dead recipe removal, replicate region optimizations and
228 /// block merging.
229 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
230
231 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
232 /// region block and remove the mask operand. Optimize the created regions by
233 /// iteratively sinking scalar operands into the region, followed by merging
234 /// regions until no improvements are remaining.
235 static void createAndOptimizeReplicateRegions(VPlan &Plan);
236
237 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
238 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
239 /// UseActiveLaneMaskForControlFlow is true, introduce an
240 /// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is
241 /// true, no minimum-iteration runtime check will be created (during skeleton
242 /// creation) and instead it is handled using active-lane-mask. \p
243 /// DataAndControlFlowWithoutRuntimeCheck implies \p
244 /// UseActiveLaneMaskForControlFlow.
245 static void addActiveLaneMask(VPlan &Plan,
246 bool UseActiveLaneMaskForControlFlow,
248
249 /// Insert truncates and extends for any truncated recipe. Redundant casts
250 /// will be folded later.
251 static void
252 truncateToMinimalBitwidths(VPlan &Plan,
253 const MapVector<Instruction *, uint64_t> &MinBWs);
254
255 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
256 /// possible.
257 static void
258 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
259 const DenseMap<Value *, const SCEV *> &StridesMap);
260
261 /// Drop poison flags from recipes that may generate a poison value that is
262 /// used after vectorization, even when their operands are not poison. Those
263 /// recipes meet the following conditions:
264 /// * Contribute to the address computation of a recipe generating a widen
265 /// memory load/store (VPWidenMemoryInstructionRecipe or
266 /// VPInterleaveRecipe).
267 /// * Such a widen memory load/store has at least one underlying Instruction
268 /// that is in a basic block that needs predication and after vectorization
269 /// the generated instruction won't be predicated.
270 /// Uses \p BlockNeedsPredication to check if a block needs predicating.
271 /// TODO: Replace BlockNeedsPredication callback with retrieving info from
272 /// VPlan directly.
273 static void dropPoisonGeneratingRecipes(
274 VPlan &Plan,
275 const std::function<bool(BasicBlock *)> &BlockNeedsPredication);
276
277 /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
278 /// replaces all uses except the canonical IV increment of
279 /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe.
280 /// VPCanonicalIVPHIRecipe is only used to control the loop after
281 /// this transformation.
282 static void
283 addExplicitVectorLength(VPlan &Plan,
284 const std::optional<unsigned> &MaxEVLSafeElements);
285
286 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
287 /// example:
288 ///
289 /// %mask = icmp ult step-vector, EVL
290 /// %load = load %ptr, %mask
291 /// -->
292 /// %load = vp.load %ptr, EVL
293 static void optimizeEVLMasks(VPlan &Plan);
294
295 // For each Interleave Group in \p InterleaveGroups replace the Recipes
296 // widening its memory instructions with a single VPInterleaveRecipe at its
297 // insertion point.
298 static void createInterleaveGroups(
299 VPlan &Plan,
300 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
301 &InterleaveGroups,
302 VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed);
303
304 /// Remove dead recipes from \p Plan.
305 static void removeDeadRecipes(VPlan &Plan);
306
307 /// Update \p Plan to account for the uncountable early exit from \p
308 /// EarlyExitingVPBB to \p EarlyExitVPBB by introducing a BranchOnTwoConds
309 /// terminator in the latch that handles the early exit and the latch exit
310 /// condition.
311 static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
312 VPBasicBlock *EarlyExitVPBB,
313 VPlan &Plan, VPBasicBlock *HeaderVPBB,
314 VPBasicBlock *LatchVPBB);
315
316 /// Replace loop regions with explicit CFG.
317 static void dissolveLoopRegions(VPlan &Plan);
318
319 /// Expand BranchOnTwoConds instructions into explicit CFG with
320 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
321 static void expandBranchOnTwoConds(VPlan &Plan);
322
323 /// Transform EVL loops to use variable-length stepping after region
324 /// dissolution.
325 ///
326 /// Once loop regions are replaced with explicit CFG, EVL loops can step with
327 /// variable vector lengths instead of fixed lengths. This transformation:
328 /// * Makes EVL-Phi concrete.
329 // * Removes CanonicalIV and increment.
330 /// * Replaces the exit condition from
331 /// (branch-on-count CanonicalIVInc, VectorTripCount)
332 /// to
333 /// (branch-on-cond eq AVLNext, 0)
334 static void canonicalizeEVLLoops(VPlan &Plan);
335
336 /// Lower abstract recipes to concrete ones, that can be codegen'd.
337 static void convertToConcreteRecipes(VPlan &Plan);
338
339 /// This function converts initial recipes to the abstract recipes and clamps
340 /// \p Range based on cost model for following optimizations and cost
341 /// estimations. The converted abstract recipes will lower to concrete
342 /// recipes before codegen.
343 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
344 VFRange &Range);
345
346 /// Perform instcombine-like simplifications on recipes in \p Plan.
347 static void simplifyRecipes(VPlan &Plan);
348
349 /// Remove BranchOnCond recipes with true or false conditions together with
350 /// removing dead edges to their successors.
351 static void removeBranchOnConst(VPlan &Plan);
352
353 /// Perform common-subexpression-elimination on \p Plan.
354 static void cse(VPlan &Plan);
355
356 /// If there's a single exit block, optimize its phi recipes that use exiting
357 /// IV values by feeding them precomputed end values instead, possibly taken
358 /// one step backwards.
359 static void
360 optimizeInductionExitUsers(VPlan &Plan,
361 DenseMap<VPValue *, VPValue *> &EndValues,
362 PredicatedScalarEvolution &PSE);
363
364 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
365 static void materializeBroadcasts(VPlan &Plan);
366
367 /// Hoist single-scalar loads with invariant addresses out of the vector loop
368 /// to the preheader, if they are proven not to alias with any stores in the
369 /// plan using noalias metadata.
370 static void hoistInvariantLoads(VPlan &Plan);
371
372 /// Hoist predicated loads from the same address to the loop entry block, if
373 /// they are guaranteed to execute on both paths (i.e., in replicate regions
374 /// with complementary masks P and NOT P).
375 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
376 const Loop *L);
377
378 /// Sink predicated stores to the same address with complementary predicates
379 /// (P and NOT P) to an unconditional store with select recipes for the
380 /// stored values. This eliminates branching overhead when all paths
381 /// unconditionally store to the same location.
382 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
383 const Loop *L);
384
385 // Materialize vector trip counts for constants early if it can simply be
386 // computed as (Original TC / VF * UF) * VF * UF.
387 static void
388 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
389 unsigned BestUF,
390 PredicatedScalarEvolution &PSE);
391
392 /// Materialize vector trip count computations to a set of VPInstructions.
393 static void materializeVectorTripCount(VPlan &Plan,
394 VPBasicBlock *VectorPHVPBB,
395 bool TailByMasking,
396 bool RequiresScalarEpilogue);
397
398 /// Materialize the backedge-taken count to be computed explicitly using
399 /// VPInstructions.
400 static void materializeBackedgeTakenCount(VPlan &Plan,
401 VPBasicBlock *VectorPH);
402
403 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
404 /// into vectors and Unpack recipes to extract scalars from vectors as
405 /// needed.
406 static void materializePacksAndUnpacks(VPlan &Plan);
407
408 /// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
409 static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
410 ElementCount VF);
411
412 /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
413 /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
414 /// value. A mapping from SCEV expressions to their expanded IR value is
415 /// returned.
416 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
417 ScalarEvolution &SE);
418
419 /// Try to convert a plan with interleave groups with VF elements to a plan
420 /// with the interleave groups replaced by wide loads and stores processing VF
421 /// elements, if all transformed interleave groups access the full vector
422 /// width (checked via \o VectorRegWidth). This effectively is a very simple
423 /// form of loop-aware SLP, where we use interleave groups to identify
424 /// candidates.
425 static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
426 TypeSize VectorRegWidth);
427
428 /// Predicate and linearize the control-flow in the only loop region of
429 /// \p Plan. If \p FoldTail is true, create a mask guarding the loop
430 /// header, otherwise use all-true for the header mask. Masks for blocks are
431 /// added to a block-to-mask map which is returned in order to be used later
432 /// for wide recipe construction. This argument is temporary and will be
433 /// removed in the future.
434 static DenseMap<VPBasicBlock *, VPValue *>
435 introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
436
437 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
438 /// a BranchOnCond recipe.
439 static void
440 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
441 std::optional<unsigned> VScaleForTuning);
442
443 /// Update the resume phis in the scalar preheader after creating wide recipes
444 /// for first-order recurrences, reductions and inductions. End values for
445 /// inductions are added to \p IVEndValues.
446 static void
447 updateScalarResumePhis(VPlan &Plan,
448 DenseMap<VPValue *, VPValue *> &IVEndValues);
449
450 /// Handle users in the exit block for first order reductions in the original
451 /// exit block. The penultimate value of recurrences is fed to their LCSSA phi
452 /// users in the original exit block using the VPIRInstruction wrapping to the
453 /// LCSSA phi.
454 static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range);
455
456 /// Detect and create partial reduction recipes for scaled reductions in
457 /// \p Plan. Must be called after recipe construction. If partial reductions
458 /// are only valid for a subset of VFs in Range, Range.End is updated.
459 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
460 VFRange &Range);
461};
462
463} // namespace llvm
464
465#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static constexpr uint32_t MinItersBypassWeights[]
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const char PassName[]
A struct for saving information about induction variables.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The main scalar evolution driver.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VPlan-based builder utility analogous to IRBuilder.
Helper class to create VPRecipies from IR instructions.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4408
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI_FOR_TEST cl::opt< bool > VerifyEachVPlan
LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan, bool VerifyLate=false)
Verify invariants for general VPlans.
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
LLVM_ABI_FOR_TEST cl::opt< bool > PrintAfterEachVPlanPass
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan, ArgsTy &&...Args)
Helper to run a VPlan pass Pass on VPlan, forwarding extra arguments to the pass.
static void materializeBroadcasts(VPlan &Plan)
Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...
static void materializePacksAndUnpacks(VPlan &Plan)
Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...
static bool handleMultiUseReductions(VPlan &Plan)
Try to legalize reductions with multiple in-loop uses.
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void optimizeInductionExitUsers(VPlan &Plan, DenseMap< VPValue *, VPValue * > &EndValues, PredicatedScalarEvolution &PSE)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)
Materialize the backedge-taken count to be computed explicitly using VPInstructions.
static void hoistInvariantLoads(VPlan &Plan)
Hoist single-scalar loads with invariant addresses out of the vector loop to the preheader,...
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void canonicalizeEVLLoops(VPlan &Plan)
Transform EVL loops to use variable-length stepping after region dissolution.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseMap< VPBasicBlock *, VPValue * > &BlockMaskCache, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void dropPoisonGeneratingRecipes(VPlan &Plan, const std::function< bool(BasicBlock *)> &BlockNeedsPredication)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed)
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)
Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF, TypeSize VectorRegWidth)
Try to convert a plan with interleave groups with VF elements to a plan with the interleave groups re...
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)
Expand VPExpandSCEVRecipes in Plan's entry block.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static void expandBranchOnTwoConds(VPlan &Plan)
Expand BranchOnTwoConds instructions into explicit CFG with BranchOnCond instructions.
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)
This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...
static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range)
Handle users in the exit block for first order reductions in the original exit block.
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static DenseMap< VPBasicBlock *, VPValue * > introduceMasksAndLinearize(VPlan &Plan, bool FoldTail)
Predicate and linearize the control-flow in the only loop region of Plan.
static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPEVLBasedIVPHIRecipe and related recipes to Plan and replaces all uses except the canonical IV...
static void optimizeEVLMasks(VPlan &Plan)
Optimize recipes which use an EVL-based header mask to VP intrinsics, for example:
static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)
Replace symbolic strides from StridesMap in Plan with constants when possible.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static void removeBranchOnConst(VPlan &Plan)
Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue)
Materialize vector trip count computations to a set of VPInstructions.
static void simplifyRecipes(VPlan &Plan)
Perform instcombine-like simplifications on recipes in Plan.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by intro...
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace each replicating VPReplicateRecipe and VPInstruction outside of any replicate region in Plan ...
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx, VFRange &Range)
Detect and create partial reduction recipes for scaled reductions in Plan.
static void cse(VPlan &Plan)
Perform common-subexpression-elimination on Plan.
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void dissolveLoopRegions(VPlan &Plan)
Replace loop regions with explicit CFG.
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder)
Try to have all users of fixed-order recurrences appear after the recipe defining their previous valu...
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)
Materialize VF and VFxUF to be computed explicitly using VPInstructions.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static void updateScalarResumePhis(VPlan &Plan, DenseMap< VPValue *, VPValue * > &IVEndValues)
Update the resume phis in the scalar preheader after creating wide recipes for first-order recurrence...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...