LLVM 23.0.0git
VPlanTransforms.h
Go to the documentation of this file.
1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
19#include "llvm/ADT/ScopeExit.h"
23#include "llvm/Support/Regex.h"
24
25namespace llvm {
26
28class Instruction;
29class Loop;
30class LoopVersioning;
32class PHINode;
33class ScalarEvolution;
37class VPBuilder;
38class VPRecipeBuilder;
39struct VFRange;
40
43
44#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
48#endif
49
51 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
52 /// to the pass. Performs verification/printing after each VPlan pass if
53 /// requested via command line options.
54 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
55 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
56 ArgsTy &&...Args) {
57 scope_exit PostTransformActions{[&]() {
58#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
59 // Make sure to print before verification, so that output is more useful
60 // in case of failures:
62 (VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
64 return Regex(Entry).match(PassName);
65 }))) {
66 dbgs()
67 << "VPlan for loop in '"
69 << "' after " << PassName << '\n';
72 else
73 dbgs() << Plan << '\n';
74 }
75#endif
76 if (VerifyEachVPlan && EnableVerify) {
77 if (!verifyVPlanIsValid(Plan))
78 report_fatal_error("Broken VPlan found, compilation aborted!");
79 }
80 }};
81
82 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
83 }
84#define RUN_VPLAN_PASS(PASS, ...) \
85 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
86#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
87 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
88
89 /// Create a base VPlan0, serving as the common starting point for all later
90 /// candidates. It consists of an initial plain CFG loop with loop blocks from
91 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
92 /// corresponding to the input IR.
93 ///
94 /// The created loop is wrapped in an initial skeleton to facilitate
95 /// vectorization, consisting of a vector pre-header, an exit block for the
96 /// main vector loop (middle.block) and a new block as preheader of the scalar
97 /// loop (scalar.ph). See below for an illustration. It also adds a canonical
98 /// IV and its increment, using \p InductionTy and \p IVDL, and creates a
99 /// VPValue expression for the original trip count.
100 ///
101 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
102 /// / \ old preheader. Will contain iteration number check and SCEV
103 /// | | expansions.
104 /// | |
105 /// / v
106 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
107 /// | / | added later.
108 /// | / v
109 /// || [ ] <-- vector pre header.
110 /// |/ |
111 /// | v
112 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
113 /// | [ ]_|
114 /// | |
115 /// | v
116 /// | [ ] <--- middle-block with the branch to successors
117 /// | / |
118 /// | / |
119 /// | | v
120 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
121 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
122 /// | | preheader basic block.
123 /// | |
124 /// v <-- edge from middle to exit iff epilogue is not required.
125 /// | [ ] \
126 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
127 /// | | header wrapped in VPIRBasicBlock).
128 /// \ |
129 /// \ v
130 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
131 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
132 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
133 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
134
135 /// Replace VPPhi recipes in \p Plan's header with corresponding
136 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
137 /// fixed-order recurrences. This processes all header phis and creates
138 /// the appropriate widened recipe for each one. For fixed-order
139 /// recurrences, also creates FirstOrderRecurrenceSplice instructions and
140 /// sinks/hoists users as needed. Returns false if any fixed-order
141 /// recurrence cannot be handled.
142 static bool createHeaderPhiRecipes(
143 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
144 const MapVector<PHINode *, InductionDescriptor> &Inductions,
145 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
146 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
147 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
148
149 /// Create VPReductionRecipes for in-loop reductions. This processes chains
150 /// of operations contributing to in-loop reductions and creates appropriate
151 /// VPReductionRecipe instances.
153 VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
154 ElementCount MinVF);
155
156 /// Update \p Plan to account for all early exits. If \p Style is not
157 /// NoUncountableExit, handles uncountable early exits and checks that all
158 /// loads are dereferenceable. Returns false if a non-dereferenceable load is
159 /// found.
160 LLVM_ABI_FOR_TEST static bool
161 handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
162 PredicatedScalarEvolution &PSE, DominatorTree &DT,
163 AssumptionCache *AC);
164
165 /// If a check is needed to guard executing the scalar epilogue loop, it will
166 /// be added to the middle block.
167 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
168
169 // Create a check to \p Plan to see if the vector loop should be executed.
170 // If \p CheckBlock is non-null, the compare and branch are placed there;
171 // ExpandSCEV recipes are always placed in Entry.
172 static void addMinimumIterationCheck(
173 VPlan &Plan, ElementCount VF, unsigned UF,
174 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
175 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
176 DebugLoc DL, PredicatedScalarEvolution &PSE,
177 VPBasicBlock *CheckBlock = nullptr);
178
179 /// Add a new check block before the vector preheader to \p Plan to check if
180 /// the main vector loop should be executed (TC >= VF * UF).
181 static void
182 addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
183 bool RequiresScalarEpilogue, Loop *OrigLoop,
185 DebugLoc DL, PredicatedScalarEvolution &PSE);
186
187 /// Add a check to \p Plan to see if the epilogue vector loop should be
188 /// executed.
190 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
191 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
192 unsigned EpilogueLoopStep, ScalarEvolution &SE);
193
194 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
195 /// flat CFG into a hierarchical CFG.
196 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
197
198 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
199 /// VPValue and connect the block to \p Plan, using the VPValue as branch
200 /// condition.
201 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
202 bool AddBranchWeights);
203
204 /// Replaces the VPInstructions in \p Plan with corresponding
205 /// widen recipes. Returns false if any VPInstructions could not be converted
206 /// to a wide recipe if needed.
207 LLVM_ABI_FOR_TEST static bool
209 const TargetLibraryInfo &TLI);
210
211 /// Try to legalize reductions with multiple in-loop uses. Currently only
212 /// strict and non-strict min/max reductions used by FindLastIV reductions are
213 /// supported, corresponding to computing the first and last argmin/argmax,
214 /// respectively. Otherwise return false.
215 static bool handleMultiUseReductions(VPlan &Plan,
216 OptimizationRemarkEmitter *ORE,
217 Loop *TheLoop);
218
219 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
220 /// try to update the vector loop to exit early if any input is NaN and resume
221 /// executing in the scalar loop to handle the NaNs there. Return false if
222 /// this attempt was unsuccessful.
223 static bool handleMaxMinNumReductions(VPlan &Plan);
224
225 /// Check if \p Plan contains any FindLast reductions. If it does, try to
226 /// update the vector loop to save the appropriate state using selects
227 /// for entire vectors for both the latest mask containing at least one active
228 /// element and the corresponding data vector. Return false if this attempt
229 /// was unsuccessful.
230 static bool handleFindLastReductions(VPlan &Plan);
231
232 /// Clear NSW/NUW flags from reduction instructions if necessary.
233 static void clearReductionWrapFlags(VPlan &Plan);
234
235 /// Explicitly unroll \p Plan by \p UF.
236 static void unrollByUF(VPlan &Plan, unsigned UF);
237
238 /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
239 /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
240 /// regions are dissolved by replicating their blocks and their recipes \p VF
241 /// times.
242 /// TODO: Also dissolve replicate regions with live outs.
243 static void replicateByVF(VPlan &Plan, ElementCount VF);
244
245 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
246 /// resulting plan to \p BestVF and \p BestUF.
247 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
248 unsigned BestUF,
249 PredicatedScalarEvolution &PSE);
250
251 /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
252 /// is known to be <= VF, replacing them with the AVL directly.
253 static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
254 PredicatedScalarEvolution &PSE);
255
256 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
257 /// optimizations, dead recipe removal, replicate region optimizations and
258 /// block merging.
259 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
260
261 /// Remove redundant VPBasicBlocks by merging them into their single
262 /// predecessor if the latter has a single successor.
263 static bool mergeBlocksIntoPredecessors(VPlan &Plan);
264
265 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
266 /// region block and remove the mask operand. Optimize the created regions by
267 /// iteratively sinking scalar operands into the region, followed by merging
268 /// regions until no improvements are remaining.
269 static void createAndOptimizeReplicateRegions(VPlan &Plan);
270
271 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
272 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
273 /// UseActiveLaneMaskForControlFlow is true, introduce an
274 /// VPActiveLaneMaskPHIRecipe.
275 static void addActiveLaneMask(VPlan &Plan,
276 bool UseActiveLaneMaskForControlFlow);
277
278 /// Insert truncates and extends for any truncated recipe. Redundant casts
279 /// will be folded later.
280 static void
281 truncateToMinimalBitwidths(VPlan &Plan,
282 const MapVector<Instruction *, uint64_t> &MinBWs);
283
284 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
285 /// possible.
286 static void
287 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
288 const DenseMap<Value *, const SCEV *> &StridesMap);
289
290 /// Drop poison flags from recipes that may generate a poison value that is
291 /// used after vectorization, even when their operands are not poison. Those
292 /// recipes meet the following conditions:
293 /// * Contribute to the address computation of a recipe generating a widen
294 /// memory load/store (VPWidenMemoryInstructionRecipe or
295 /// VPInterleaveRecipe).
296 /// * Such a widen memory load/store has at least one underlying Instruction
297 /// that is in a basic block that needs predication and after vectorization
298 /// the generated instruction won't be predicated.
299 /// Uses \p BlockNeedsPredication to check if a block needs predicating.
300 /// TODO: Replace BlockNeedsPredication callback with retrieving info from
301 /// VPlan directly.
302 static void dropPoisonGeneratingRecipes(
303 VPlan &Plan,
304 const std::function<bool(BasicBlock *)> &BlockNeedsPredication);
305
306 /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
307 /// replaces all uses of the canonical IV except for the canonical IV
308 /// increment with a VPCurrentIterationPHIRecipe. The canonical IV is only
309 /// used to control the loop after this transformation.
310 static void
311 addExplicitVectorLength(VPlan &Plan,
312 const std::optional<unsigned> &MaxEVLSafeElements);
313
314 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
315 /// example:
316 ///
317 /// %mask = icmp ult step-vector, EVL
318 /// %load = load %ptr, %mask
319 /// -->
320 /// %load = vp.load %ptr, EVL
321 static void optimizeEVLMasks(VPlan &Plan);
322
323 // For each Interleave Group in \p InterleaveGroups replace the Recipes
324 // widening its memory instructions with a single VPInterleaveRecipe at its
325 // insertion point.
326 static void createInterleaveGroups(
327 VPlan &Plan,
328 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
329 &InterleaveGroups,
330 const bool &EpilogueAllowed);
331
332 /// Remove dead recipes from \p Plan.
333 static void removeDeadRecipes(VPlan &Plan);
334
335 /// Update \p Plan to account for uncountable early exits by introducing
336 /// appropriate branching logic in the latch that handles early exits and the
337 /// latch exit condition. Multiple exits are handled with a dispatch block
338 /// that determines which exit to take based on lane-by-lane semantics.
339 static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
340 VPBasicBlock *LatchVPBB,
341 VPBasicBlock *MiddleVPBB,
343
344 /// Replaces the exit condition from
345 /// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
346 /// to
347 /// (branch-on-cond eq AVLNext, 0)
348 static void convertEVLExitCond(VPlan &Plan);
349
350 /// Replace loop regions with explicit CFG.
351 static void dissolveLoopRegions(VPlan &Plan);
352
353 /// Expand BranchOnTwoConds instructions into explicit CFG with
354 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
355 static void expandBranchOnTwoConds(VPlan &Plan);
356
357 /// Transform loops with variable-length stepping after region
358 /// dissolution.
359 ///
360 /// Once loop regions are replaced with explicit CFG, loops can step with
361 /// variable vector lengths instead of fixed lengths. This transformation:
362 /// * Makes CurrentIteration-Phi concrete.
363 // * Removes CanonicalIV and increment.
364 static void convertToVariableLengthStep(VPlan &Plan);
365
366 /// Lower abstract recipes to concrete ones, that can be codegen'd.
367 static void convertToConcreteRecipes(VPlan &Plan);
368
369 /// This function converts initial recipes to the abstract recipes and clamps
370 /// \p Range based on cost model for following optimizations and cost
371 /// estimations. The converted abstract recipes will lower to concrete
372 /// recipes before codegen.
373 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
374 VFRange &Range);
375
376 /// Perform instcombine-like simplifications on recipes in \p Plan.
377 static void simplifyRecipes(VPlan &Plan);
378
379 /// Remove BranchOnCond recipes with true or false conditions together with
380 /// removing dead edges to their successors. If \p OnlyLatches is true, only
381 /// process loop latches.
382 static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
383
384 /// Perform common-subexpression-elimination on \p Plan.
385 static void cse(VPlan &Plan);
386
387 /// If there's a single exit block, optimize its phi recipes that use exiting
388 /// IV values by feeding them precomputed end values instead, possibly taken
389 /// one step backwards.
390 static void optimizeInductionLiveOutUsers(VPlan &Plan,
391 PredicatedScalarEvolution &PSE,
392 bool FoldTail);
393
394 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
395 static void materializeBroadcasts(VPlan &Plan);
396
397 /// Hoist single-scalar loads with invariant addresses out of the vector loop
398 /// to the preheader, if they are proven not to alias with any stores in the
399 /// plan using noalias metadata.
400 static void hoistInvariantLoads(VPlan &Plan);
401
402 /// Hoist predicated loads from the same address to the loop entry block, if
403 /// they are guaranteed to execute on both paths (i.e., in replicate regions
404 /// with complementary masks P and NOT P).
405 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
406 const Loop *L);
407
408 /// Sink predicated stores to the same address with complementary predicates
409 /// (P and NOT P) to an unconditional store with select recipes for the
410 /// stored values. This eliminates branching overhead when all paths
411 /// unconditionally store to the same location.
412 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
413 const Loop *L);
414
415 // Materialize vector trip counts for constants early if it can simply be
416 // computed as (Original TC / VF * UF) * VF * UF.
417 static void
418 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
419 unsigned BestUF,
420 PredicatedScalarEvolution &PSE);
421
422 /// Materialize vector trip count computations to a set of VPInstructions.
423 /// \p Step is used as the step value for the trip count computation.
424 /// \p MaxRuntimeStep is the maximum possible runtime value of Step, used to
425 /// prove the trip count is divisible by the step for scalable VFs.
426 static void materializeVectorTripCount(
427 VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking,
428 bool RequiresScalarEpilogue, VPValue *Step,
429 std::optional<uint64_t> MaxRuntimeStep = std::nullopt);
430
431 /// Materialize the backedge-taken count to be computed explicitly using
432 /// VPInstructions.
433 static void materializeBackedgeTakenCount(VPlan &Plan,
434 VPBasicBlock *VectorPH);
435
436 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
437 /// into vectors and Unpack recipes to extract scalars from vectors as
438 /// needed.
439 static void materializePacksAndUnpacks(VPlan &Plan);
440
441 /// Materialize UF, VF and VFxUF to be computed explicitly using
442 /// VPInstructions.
443 static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
444 ElementCount VF);
445
446 /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
447 /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
448 /// value. A mapping from SCEV expressions to their expanded IR value is
449 /// returned.
450 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
451 ScalarEvolution &SE);
452
453 /// Try to find a single VF among \p Plan's VFs for which all interleave
454 /// groups (with known minimum VF elements) can be replaced by wide loads and
455 /// stores processing VF elements, if all transformed interleave groups access
456 /// the full vector width (checked via the maximum vector register width). If
457 /// the transformation can be applied, the original \p Plan will be split in
458 /// 2:
459 /// 1. The original Plan with the single VF containing the optimized recipes
460 /// using wide loads instead of interleave groups.
461 /// 2. A new clone which contains all VFs of Plan except the optimized VF.
462 ///
463 /// This effectively is a very simple form of loop-aware SLP, where we use
464 /// interleave groups to identify candidates.
465 static std::unique_ptr<VPlan>
466 narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
467
468 /// Adapts the vector loop region for tail folding by introducing a header
469 /// mask and conditionally executing the content of the region:
470 ///
471 /// Vector loop region before:
472 /// +-------------------------------------------+
473 /// |%iv = ... |
474 /// |... |
475 /// |%iv.next = add %iv, vfxuf |
476 /// |branch-on-count %iv.next, vector-trip-count|
477 /// +-------------------------------------------+
478 ///
479 /// Vector loop region after:
480 /// +-------------------------------------------+
481 /// |%iv = ... |
482 /// |%wide.iv = widen-canonical-iv ... |
483 /// |%header-mask = icmp ule %wide.iv, BTC |
484 /// |branch-on-cond %header-mask |---+
485 /// +-------------------------------------------+ |
486 /// | |
487 /// v |
488 /// +-------------------------------------------+ |
489 /// | ... | |
490 /// +-------------------------------------------+ |
491 /// | |
492 /// v |
493 /// +-------------------------------------------+ |
494 /// |<phis> = phi [..., ...], [poison, header] |
495 /// |%iv.next = add %iv, vfxuf |<--+
496 /// |branch-on-count %iv.next, vector-trip-count|
497 /// +-------------------------------------------+
498 ///
499 /// Any VPInstruction::ExtractLastLanes are also updated to extract from the
500 /// last active lane of the header mask.
501 static void foldTailByMasking(VPlan &Plan);
502
503 /// Predicate and linearize the control-flow in the only loop region of
504 /// \p Plan.
505 static void introduceMasksAndLinearize(VPlan &Plan);
506
507 /// Replace a VPWidenCanonicalIVRecipe if it is present in \p Plan, with a
508 /// VPWidenIntOrFpInductionRecipe, provided it would not cause additional
509 /// spills for \p VF at unroll factor \p UF.
511 VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI,
513 unsigned UF, const SmallPtrSetImpl<const Value *> &ValuesToIgnore);
514
515 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
516 /// a BranchOnCond recipe.
517 static void
518 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
519 std::optional<unsigned> VScaleForTuning);
520
521 /// Adjust first-order recurrence users in the middle block: create
522 /// penultimate element extracts for LCSSA phi users, and handle penultimate
523 /// extracts of the last active lane edge.
524 static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan,
525 VFRange &Range);
526
527 /// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
528 /// converting them to FindIV reductions, if their IV range excludes a
529 /// suitable sentinel value. For expressions of IVs, the expression is sunk
530 /// to the middle block.
531 static void optimizeFindIVReductions(VPlan &Plan,
532 PredicatedScalarEvolution &PSE, Loop &L);
533
534 /// Detect and create partial reduction recipes for scaled reductions in
535 /// \p Plan. Must be called after recipe construction. If partial reductions
536 /// are only valid for a subset of VFs in Range, Range.End is updated.
537 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
538 VFRange &Range);
539
540 /// Convert load/store VPInstructions in \p Plan into widened or replicate
541 /// recipes. Non load/store input instructions are left unchanged.
542 static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
543 VPRecipeBuilder &RecipeBuilder);
544};
545
546} // namespace llvm
547
548#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static constexpr uint32_t MinItersBypassWeights[]
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This pass exposes codegen information to IR-level passes.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const char PassName[]
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
A struct for saving information about induction variables.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
The main scalar evolution driver.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
VPlan-based builder utility analogous to IRBuilder.
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4331
Helper class to create VPRecipies from IR instructions.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:836
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4512
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1096
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4649
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI_FOR_TEST cl::opt< bool > VerifyEachVPlan
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintAfterAll
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:82
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI_FOR_TEST cl::list< std::string > VPlanPrintAfterPasses
TargetTransformInfo TTI
LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan)
Verify invariants for general VPlans.
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintVectorRegionScope
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder)
Convert load/store VPInstructions in Plan into widened or replicate recipes.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock=nullptr)
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan, ArgsTy &&...Args)
Helper to run a VPlan pass Pass on VPlan, forwarding extra arguments to the pass.
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static void materializeBroadcasts(VPlan &Plan)
Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...
static void materializePacksAndUnpacks(VPlan &Plan)
Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, const bool &EpilogueAllowed)
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)
Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL is known to be <= VF,...
static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches=false)
Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...
static void introduceMasksAndLinearize(VPlan &Plan)
Predicate and linearize the control-flow in the only loop region of Plan.
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)
Materialize UF, VF and VFxUF to be computed explicitly using VPInstructions.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)
Materialize the backedge-taken count to be computed explicitly using VPInstructions.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static void hoistInvariantLoads(VPlan &Plan)
Hoist single-scalar loads with invariant addresses out of the vector loop to the preheader,...
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static void dropPoisonGeneratingRecipes(VPlan &Plan, const std::function< bool(BasicBlock *)> &BlockNeedsPredication)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void replaceWideCanonicalIVWithWideIV(VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, ElementCount VF, unsigned UF, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Replace a VPWidenCanonicalIVRecipe if it is present in Plan, with a VPWidenIntOrFpInductionRecipe,...
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void convertToVariableLengthStep(VPlan &Plan)
Transform loops with variable-length stepping after region dissolution.
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)
Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.
static std::unique_ptr< VPlan > narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI)
Try to find a single VF among Plan's VFs for which all interleave groups (with known minimum VF eleme...
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)
Expand VPExpandSCEVRecipes in Plan's entry block.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static void expandBranchOnTwoConds(VPlan &Plan)
Expand BranchOnTwoConds instructions into explicit CFG with BranchOnCond instructions.
static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue, VPValue *Step, std::optional< uint64_t > MaxRuntimeStep=std::nullopt)
Materialize vector trip count computations to a set of VPInstructions.
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...
static bool mergeBlocksIntoPredecessors(VPlan &Plan)
Remove redundant VPBasicBlocks by merging them into their single predecessor if the latter has a sing...
static void optimizeFindIVReductions(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L)
Optimize FindLast reductions selecting IVs (or expressions of IVs) by converting them to FindIV reduc...
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)
This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...
static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPCurrentIterationPHIRecipe and related recipes to Plan and replaces all uses of the canonical ...
static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan, VFRange &Range)
Adjust first-order recurrence users in the middle block: create penultimate element extracts for LCSS...
static void optimizeEVLMasks(VPlan &Plan)
Optimize recipes which use an EVL-based header mask to VP intrinsics, for example:
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)
Replace symbolic strides from StridesMap in Plan with constants when possible.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void simplifyRecipes(VPlan &Plan)
Perform instcombine-like simplifications on recipes in Plan.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and VPInstruction in Plan with VF single...
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static void optimizeInductionLiveOutUsers(VPlan &Plan, PredicatedScalarEvolution &PSE, bool FoldTail)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx, VFRange &Range)
Detect and create partial reduction recipes for scaled reductions in Plan.
static void cse(VPlan &Plan)
Perform common-subexpression-elimination on Plan.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void dissolveLoopRegions(VPlan &Plan)
Replace loop regions with explicit CFG.
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
static void convertEVLExitCond(VPlan &Plan)
Replaces the exit condition from (branch-on-cond eq CanonicalIVInc, VectorTripCount) to (branch-on-co...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...