LLVM 23.0.0git
VPlanTransforms.h
Go to the documentation of this file.
1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
19#include "llvm/ADT/ScopeExit.h"
23#include "llvm/Support/Regex.h"
24
25namespace llvm {
26
28class Instruction;
29class Loop;
30class LoopVersioning;
32class PHINode;
33class ScalarEvolution;
37class VPBuilder;
38class VPRecipeBuilder;
39struct VFRange;
40
43
44#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
48#endif
49
51 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
52 /// to the pass. Performs verification/printing after each VPlan pass if
53 /// requested via command line options.
54 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
55 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
56 ArgsTy &&...Args) {
57 scope_exit PostTransformActions{[&]() {
58#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
59 // Make sure to print before verification, so that output is more useful
60 // in case of failures:
62 (VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
64 return Regex(Entry).match(PassName);
65 }))) {
66 dbgs()
67 << "VPlan for loop in '"
69 << "' after " << PassName << '\n';
72 else
73 dbgs() << Plan << '\n';
74 }
75#endif
76 if (VerifyEachVPlan && EnableVerify) {
77 if (!verifyVPlanIsValid(Plan))
78 report_fatal_error("Broken VPlan found, compilation aborted!");
79 }
80 }};
81
82 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
83 }
84#define RUN_VPLAN_PASS(PASS, ...) \
85 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
86#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
87 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
88
89 /// Create a base VPlan0, serving as the common starting point for all later
90 /// candidates. It consists of an initial plain CFG loop with loop blocks from
91 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
92 /// corresponding to the input IR.
93 ///
94 /// The created loop is wrapped in an initial skeleton to facilitate
95 /// vectorization, consisting of a vector pre-header, an exit block for the
96 /// main vector loop (middle.block) and a new block as preheader of the scalar
97 /// loop (scalar.ph). See below for an illustration. It also creates a
98 /// VPValue expression for the original trip count.
99 ///
100 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
101 /// / \ old preheader. Will contain iteration number check and SCEV
102 /// | | expansions.
103 /// | |
104 /// / v
105 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
106 /// | / | added later.
107 /// | / v
108 /// || [ ] <-- vector pre header.
109 /// |/ |
110 /// | v
111 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
112 /// | [ ]_|
113 /// | |
114 /// | v
115 /// | [ ] <--- middle-block with the branch to successors
116 /// | / |
117 /// | / |
118 /// | | v
119 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
120 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
121 /// | | preheader basic block.
122 /// | |
123 /// v <-- edge from middle to exit iff epilogue is not required.
124 /// | [ ] \
125 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
126 /// | | header wrapped in VPIRBasicBlock).
127 /// \ |
128 /// \ v
129 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
130 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
131 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
132 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
133
134 /// Add a canonical IV and its increment, using \p InductionTy and \p DL to \p
135 /// Plan
136 static void addCanonicalIVRecipes(VPlan &Plan, DebugLoc DL);
137
138 /// Replace VPPhi recipes in \p Plan's header with corresponding
139 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
140 /// fixed-order recurrences. This processes all header phis and creates
141 /// the appropriate widened recipe for each one. For fixed-order
142 /// recurrences, also creates FirstOrderRecurrenceSplice instructions and
143 /// sinks/hoists users as needed. Returns false if any fixed-order
144 /// recurrence cannot be handled.
145 static bool createHeaderPhiRecipes(
146 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
147 const MapVector<PHINode *, InductionDescriptor> &Inductions,
148 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
149 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
150 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
151
152 /// Create VPReductionRecipes for in-loop reductions. This processes chains
153 /// of operations contributing to in-loop reductions and creates appropriate
154 /// VPReductionRecipe instances.
155 static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF);
156
157 /// Update \p Plan to account for all early exits. If \p Style is not
158 /// NoUncountableExit, handles uncountable early exits and checks that all
159 /// loads are dereferenceable. Returns false if a non-dereferenceable load is
160 /// found.
161 LLVM_ABI_FOR_TEST static bool
162 handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
163 PredicatedScalarEvolution &PSE, DominatorTree &DT,
164 AssumptionCache *AC);
165
166 /// If a check is needed to guard executing the scalar epilogue loop, it will
167 /// be added to the middle block.
168 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
169
170 // Create a check in \p CheckBlock to see if the vector loop should be
171 // executed.
172 static void addMinimumIterationCheck(
173 VPlan &Plan, ElementCount VF, unsigned UF,
174 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
175 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
176 DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock);
177
178 /// Add a new check block before the vector preheader to \p Plan to check if
179 /// the main vector loop should be executed (TC >= VF * UF).
180 static void
181 addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
182 bool RequiresScalarEpilogue, Loop *OrigLoop,
184 DebugLoc DL, PredicatedScalarEvolution &PSE);
185
186 /// Add a check to \p Plan to see if the epilogue vector loop should be
187 /// executed.
189 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
190 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
191 unsigned EpilogueLoopStep, ScalarEvolution &SE);
192
193 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
194 /// flat CFG into a hierarchical CFG.
195 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
196
197 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
198 /// VPValue and connect the block to \p Plan, using the VPValue as branch
199 /// condition.
200 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
201 bool AddBranchWeights);
202
203 /// Replaces the VPInstructions in \p Plan with corresponding
204 /// widen recipes. Returns false if any VPInstructions could not be converted
205 /// to a wide recipe if needed.
206 LLVM_ABI_FOR_TEST static bool
208 const TargetLibraryInfo &TLI);
209
210 /// Try to legalize reductions with multiple in-loop uses. Currently only
211 /// strict and non-strict min/max reductions used by FindLastIV reductions are
212 /// supported, corresponding to computing the first and last argmin/argmax,
213 /// respectively. Otherwise return false.
214 static bool handleMultiUseReductions(VPlan &Plan,
215 OptimizationRemarkEmitter *ORE,
216 Loop *TheLoop);
217
218 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
219 /// try to update the vector loop to exit early if any input is NaN and resume
220 /// executing in the scalar loop to handle the NaNs there. Return false if
221 /// this attempt was unsuccessful.
222 static bool handleMaxMinNumReductions(VPlan &Plan);
223
224 /// Check if \p Plan contains any FindLast reductions. If it does, try to
225 /// update the vector loop to save the appropriate state using selects
226 /// for entire vectors for both the latest mask containing at least one active
227 /// element and the corresponding data vector. Return false if this attempt
228 /// was unsuccessful.
229 static bool handleFindLastReductions(VPlan &Plan);
230
231 /// Clear NSW/NUW flags from reduction instructions if necessary.
232 static void clearReductionWrapFlags(VPlan &Plan);
233
234 /// Explicitly unroll \p Plan by \p UF.
235 static void unrollByUF(VPlan &Plan, unsigned UF);
236
237 /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
238 /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
239 /// regions are dissolved by replicating their blocks and their recipes \p VF
240 /// times.
241 /// TODO: Also dissolve replicate regions with live outs.
242 static void replicateByVF(VPlan &Plan, ElementCount VF);
243
244 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
245 /// resulting plan to \p BestVF and \p BestUF.
246 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
247 unsigned BestUF,
248 PredicatedScalarEvolution &PSE);
249
250 /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
251 /// is known to be <= VF, replacing them with the AVL directly.
252 static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
253 PredicatedScalarEvolution &PSE);
254
255 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
256 /// optimizations, dead recipe removal, replicate region optimizations and
257 /// block merging.
258 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
259
260 /// Remove redundant VPBasicBlocks by merging them into their single
261 /// predecessor if the latter has a single successor.
262 static bool mergeBlocksIntoPredecessors(VPlan &Plan);
263
264 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
265 /// region block and remove the mask operand. Optimize the created regions by
266 /// iteratively sinking scalar operands into the region, followed by merging
267 /// regions until no improvements are remaining.
268 static void createAndOptimizeReplicateRegions(VPlan &Plan);
269
270 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
271 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
272 /// UseActiveLaneMaskForControlFlow is true, introduce an
273 /// VPActiveLaneMaskPHIRecipe.
274 static void addActiveLaneMask(VPlan &Plan,
275 bool UseActiveLaneMaskForControlFlow);
276
277 /// Insert truncates and extends for any truncated recipe. Redundant casts
278 /// will be folded later.
279 static void
280 truncateToMinimalBitwidths(VPlan &Plan,
281 const MapVector<Instruction *, uint64_t> &MinBWs);
282
283 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
284 /// possible.
285 static void
286 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
287 const DenseMap<Value *, const SCEV *> &StridesMap);
288
289 /// Drop poison flags from recipes that may generate a poison value that is
290 /// used after vectorization, even when their operands are not poison. Those
291 /// recipes meet the following conditions:
292 /// * Contribute to the address computation of a recipe generating a widen
293 /// memory load/store (VPWidenMemoryInstructionRecipe or
294 /// VPInterleaveRecipe).
295 /// * Such a widen memory load/store is masked, but not with the header mask.
296 static void dropPoisonGeneratingRecipes(VPlan &Plan);
297
298 /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
299 /// replaces all uses of the canonical IV except for the canonical IV
300 /// increment with a VPCurrentIterationPHIRecipe. The canonical IV is only
301 /// used to control the loop after this transformation.
302 static void
303 addExplicitVectorLength(VPlan &Plan,
304 const std::optional<unsigned> &MaxEVLSafeElements);
305
306 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
307 /// example:
308 ///
309 /// %mask = icmp ult step-vector, EVL
310 /// %load = load %ptr, %mask
311 /// -->
312 /// %load = vp.load %ptr, EVL
313 static void optimizeEVLMasks(VPlan &Plan);
314
315 // For each Interleave Group in \p InterleaveGroups replace the Recipes
316 // widening its memory instructions with a single VPInterleaveRecipe at its
317 // insertion point.
318 static void createInterleaveGroups(
319 VPlan &Plan,
320 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
321 &InterleaveGroups,
322 const bool &EpilogueAllowed);
323
324 /// Transform widen memory recipes into strided access recipes when legal
325 /// and profitable. Clamps \p Range to maintain consistency with widen
326 /// decisions of \p Plan, and uses \p Ctx to evaluate the cost.
327 static void convertToStridedAccesses(VPlan &Plan,
328 PredicatedScalarEvolution &PSE, Loop &L,
329 VPCostContext &Ctx, VFRange &Range);
330
331 /// Remove dead recipes from \p Plan.
332 static void removeDeadRecipes(VPlan &Plan);
333
334 /// Update \p Plan to account for uncountable early exits by introducing
335 /// appropriate branching logic in the latch that handles early exits and the
336 /// latch exit condition. Multiple exits are handled with a dispatch block
337 /// that determines which exit to take based on lane-by-lane semantics.
338 static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
339 VPBasicBlock *LatchVPBB,
340 VPBasicBlock *MiddleVPBB,
342
343 /// Replaces the exit condition from
344 /// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
345 /// to
346 /// (branch-on-cond eq AVLNext, 0)
347 static void convertEVLExitCond(VPlan &Plan);
348
349 /// Replace loop regions with explicit CFG.
350 static void dissolveLoopRegions(VPlan &Plan);
351
352 /// Expand BranchOnTwoConds instructions into explicit CFG with
353 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
354 static void expandBranchOnTwoConds(VPlan &Plan);
355
356 /// Transform loops with variable-length stepping after region
357 /// dissolution.
358 ///
359 /// Once loop regions are replaced with explicit CFG, loops can step with
360 /// variable vector lengths instead of fixed lengths. This transformation:
361 /// * Makes CurrentIteration-Phi concrete.
362 // * Removes CanonicalIV and increment.
363 static void convertToVariableLengthStep(VPlan &Plan);
364
365 /// Lower abstract recipes to concrete ones, that can be codegen'd.
366 static void convertToConcreteRecipes(VPlan &Plan);
367
368 /// This function converts initial recipes to the abstract recipes and clamps
369 /// \p Range based on cost model for following optimizations and cost
370 /// estimations. The converted abstract recipes will lower to concrete
371 /// recipes before codegen.
372 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
373 VFRange &Range);
374
375 /// Perform instcombine-like simplifications on recipes in \p Plan.
376 static void simplifyRecipes(VPlan &Plan);
377
378 /// Remove BranchOnCond recipes with true or false conditions together with
379 /// removing dead edges to their successors. If \p OnlyLatches is true, only
380 /// process loop latches.
381 static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
382
383 /// Perform common-subexpression-elimination on \p Plan.
384 static void cse(VPlan &Plan);
385
386 /// If there's a single exit block, optimize its phi recipes that use exiting
387 /// IV values by feeding them precomputed end values instead, possibly taken
388 /// one step backwards.
389 static void optimizeInductionLiveOutUsers(VPlan &Plan,
390 PredicatedScalarEvolution &PSE,
391 bool FoldTail);
392
393 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
394 static void materializeBroadcasts(VPlan &Plan);
395
396 /// Hoist predicated loads from the same address to the loop entry block, if
397 /// they are guaranteed to execute on both paths (i.e., in replicate regions
398 /// with complementary masks P and NOT P).
399 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
400 const Loop *L);
401
402 /// Sink predicated stores to the same address with complementary predicates
403 /// (P and NOT P) to an unconditional store with select recipes for the
404 /// stored values. This eliminates branching overhead when all paths
405 /// unconditionally store to the same location.
406 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
407 const Loop *L);
408
409 // Materialize vector trip counts for constants early if it can simply be
410 // computed as (Original TC / VF * UF) * VF * UF.
411 static void
412 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
413 unsigned BestUF,
414 PredicatedScalarEvolution &PSE);
415
416 /// Materialize vector trip count computations to a set of VPInstructions.
417 /// \p Step is used as the step value for the trip count computation.
418 /// \p MaxRuntimeStep is the maximum possible runtime value of Step, used to
419 /// prove the trip count is divisible by the step for scalable VFs.
420 static void materializeVectorTripCount(
421 VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking,
422 bool RequiresScalarEpilogue, VPValue *Step,
423 std::optional<uint64_t> MaxRuntimeStep = std::nullopt);
424
425 /// Materialize the backedge-taken count to be computed explicitly using
426 /// VPInstructions.
427 static void materializeBackedgeTakenCount(VPlan &Plan,
428 VPBasicBlock *VectorPH);
429
430 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
431 /// into vectors and Unpack recipes to extract scalars from vectors as
432 /// needed.
433 static void materializePacksAndUnpacks(VPlan &Plan);
434
435 /// Materialize UF, VF and VFxUF to be computed explicitly using
436 /// VPInstructions.
437 static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
438 ElementCount VF);
439
440 /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
441 /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
442 /// value. A mapping from SCEV expressions to their expanded IR value is
443 /// returned.
444 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
445 ScalarEvolution &SE);
446
447 /// Try to find a single VF among \p Plan's VFs for which all interleave
448 /// groups (with known minimum VF elements) can be replaced by wide loads and
449 /// stores processing VF elements, if all transformed interleave groups access
450 /// the full vector width (checked via the maximum vector register width). If
451 /// the transformation can be applied, the original \p Plan will be split in
452 /// 2:
453 /// 1. The original Plan with the single VF containing the optimized recipes
454 /// using wide loads instead of interleave groups.
455 /// 2. A new clone which contains all VFs of Plan except the optimized VF.
456 ///
457 /// This effectively is a very simple form of loop-aware SLP, where we use
458 /// interleave groups to identify candidates.
459 static std::unique_ptr<VPlan>
460 narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
461
462 /// Adapts the vector loop region for tail folding by introducing a header
463 /// mask and conditionally executing the content of the region:
464 ///
465 /// Vector loop region before:
466 /// +-------------------------------------------+
467 /// |%iv = ... |
468 /// |... |
469 /// |%iv.next = add %iv, vfxuf |
470 /// |branch-on-count %iv.next, vector-trip-count|
471 /// +-------------------------------------------+
472 ///
473 /// Vector loop region after:
474 /// +-------------------------------------------+
475 /// |%iv = ... |
476 /// |%wide.iv = widen-canonical-iv ... |
477 /// |%header-mask = icmp ule %wide.iv, BTC |
478 /// |branch-on-cond %header-mask |---+
479 /// +-------------------------------------------+ |
480 /// | |
481 /// v |
482 /// +-------------------------------------------+ |
483 /// | ... | |
484 /// +-------------------------------------------+ |
485 /// | |
486 /// v |
487 /// +-------------------------------------------+ |
488 /// |<phis> = phi [..., ...], [poison, header] |
489 /// |%iv.next = add %iv, vfxuf |<--+
490 /// |branch-on-count %iv.next, vector-trip-count|
491 /// +-------------------------------------------+
492 ///
493 /// Any VPInstruction::ExtractLastLanes are also updated to extract from the
494 /// last active lane of the header mask.
495 static void foldTailByMasking(VPlan &Plan);
496
497 /// Predicate and linearize the control-flow in the only loop region of
498 /// \p Plan.
499 static void introduceMasksAndLinearize(VPlan &Plan);
500
501 /// Replace a VPWidenCanonicalIVRecipe if it is present in \p Plan, with a
502 /// VPWidenIntOrFpInductionRecipe, provided it would not cause additional
503 /// spills for \p VF at unroll factor \p UF.
505 VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI,
507 unsigned UF, const SmallPtrSetImpl<const Value *> &ValuesToIgnore);
508
509 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
510 /// a BranchOnCond recipe.
511 static void
512 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
513 std::optional<unsigned> VScaleForTuning);
514
515 /// Adjust first-order recurrence users in the middle block: create
516 /// penultimate element extracts for LCSSA phi users, and handle penultimate
517 /// extracts of the last active lane edge.
518 static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan,
519 VFRange &Range);
520
521 /// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
522 /// converting them to FindIV reductions, if their IV range excludes a
523 /// suitable sentinel value. For expressions of IVs, the expression is sunk
524 /// to the middle block.
525 static void optimizeFindIVReductions(VPlan &Plan,
526 PredicatedScalarEvolution &PSE, Loop &L);
527
528 /// Detect and create partial reduction recipes for scaled reductions in
529 /// \p Plan. Must be called after recipe construction. If partial reductions
530 /// are only valid for a subset of VFs in Range, Range.End is updated.
531 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
532 VFRange &Range);
533
534 /// Convert load/store VPInstructions in \p Plan into widened or replicate
535 /// recipes. Non load/store input instructions are left unchanged.
536 static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
537 VPRecipeBuilder &RecipeBuilder);
538
539 /// Make VPlan-based scalarization decision prior to delegating to the ones
540 /// made by the legacy CM. Only transforms "usesFirstLaneOnly` def-use chains
541 /// enabled by prior widening of consecutive memory operations for now.
542 static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range);
543
544 /// Convert call VPInstructions in \p Plan into widened call, vector
545 /// intrinsic or replicate recipes based on a cost comparison via \p CostCtx.
546 static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range,
547 VPRecipeBuilder &RecipeBuilder,
548 VPCostContext &CostCtx);
549};
550
551} // namespace llvm
552
553#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static constexpr uint32_t MinItersBypassWeights[]
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This pass exposes codegen information to IR-level passes.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const char PassName[]
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
A struct for saving information about induction variables.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
The main scalar evolution driver.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
VPlan-based builder utility analogous to IRBuilder.
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4425
Helper class to create VPRecipies from IR instructions.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:834
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4606
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1098
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4751
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI_FOR_TEST cl::opt< bool > VerifyEachVPlan
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintAfterAll
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:78
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI_FOR_TEST cl::list< std::string > VPlanPrintAfterPasses
TargetTransformInfo TTI
LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan)
Verify invariants for general VPlans.
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintVectorRegionScope
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder)
Convert load/store VPInstructions in Plan into widened or replicate recipes.
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan, ArgsTy &&...Args)
Helper to run a VPlan pass Pass on VPlan, forwarding extra arguments to the pass.
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static void materializeBroadcasts(VPlan &Plan)
Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...
static void materializePacksAndUnpacks(VPlan &Plan)
Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, const bool &EpilogueAllowed)
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)
Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL is known to be <= VF,...
static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches=false)
Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...
static void introduceMasksAndLinearize(VPlan &Plan)
Predicate and linearize the control-flow in the only loop region of Plan.
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)
Materialize UF, VF and VFxUF to be computed explicitly using VPInstructions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)
Materialize the backedge-taken count to be computed explicitly using VPInstructions.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static void replaceWideCanonicalIVWithWideIV(VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, ElementCount VF, unsigned UF, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Replace a VPWidenCanonicalIVRecipe if it is present in Plan, with a VPWidenIntOrFpInductionRecipe,...
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void convertToVariableLengthStep(VPlan &Plan)
Transform loops with variable-length stepping after region dissolution.
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)
Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.
static std::unique_ptr< VPlan > narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI)
Try to find a single VF among Plan's VFs for which all interleave groups (with known minimum VF eleme...
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)
Expand VPExpandSCEVRecipes in Plan's entry block.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void expandBranchOnTwoConds(VPlan &Plan)
Expand BranchOnTwoConds instructions into explicit CFG with BranchOnCond instructions.
static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue, VPValue *Step, std::optional< uint64_t > MaxRuntimeStep=std::nullopt)
Materialize vector trip count computations to a set of VPInstructions.
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...
static bool mergeBlocksIntoPredecessors(VPlan &Plan)
Remove redundant VPBasicBlocks by merging them into their single predecessor if the latter has a sing...
static void addCanonicalIVRecipes(VPlan &Plan, DebugLoc DL)
Add a canonical IV and its increment, using InductionTy and DL to Plan.
static void optimizeFindIVReductions(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L)
Optimize FindLast reductions selecting IVs (or expressions of IVs) by converting them to FindIV reduc...
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)
This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...
static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range)
Make VPlan-based scalarization decision prior to delegating to the ones made by the legacy CM.
static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPCurrentIterationPHIRecipe and related recipes to Plan and replaces all uses of the canonical ...
static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder, VPCostContext &CostCtx)
Convert call VPInstructions in Plan into widened call, vector intrinsic or replicate recipes based on...
static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan, VFRange &Range)
Adjust first-order recurrence users in the middle block: create penultimate element extracts for LCSS...
static void optimizeEVLMasks(VPlan &Plan)
Optimize recipes which use an EVL-based header mask to VP intrinsics, for example:
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)
Replace symbolic strides from StridesMap in Plan with constants when possible.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void simplifyRecipes(VPlan &Plan)
Perform instcombine-like simplifications on recipes in Plan.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and VPInstruction in Plan with VF single...
static void convertToStridedAccesses(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L, VPCostContext &Ctx, VFRange &Range)
Transform widen memory recipes into strided access recipes when legal and profitable.
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static void optimizeInductionLiveOutUsers(VPlan &Plan, PredicatedScalarEvolution &PSE, bool FoldTail)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx, VFRange &Range)
Detect and create partial reduction recipes for scaled reductions in Plan.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock)
static void cse(VPlan &Plan)
Perform common-subexpression-elimination on Plan.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void dissolveLoopRegions(VPlan &Plan)
Replace loop regions with explicit CFG.
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static void dropPoisonGeneratingRecipes(VPlan &Plan)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
static void convertEVLExitCond(VPlan &Plan)
Replaces the exit condition from (branch-on-cond eq CanonicalIVInc, VectorTripCount) to (branch-on-co...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...