LLVM 23.0.0git
VPlanTransforms.h
Go to the documentation of this file.
1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
19#include "llvm/ADT/ScopeExit.h"
23#include "llvm/Support/Regex.h"
24
25namespace llvm {
26
28class Instruction;
29class Loop;
30class LoopVersioning;
32class PHINode;
33class ScalarEvolution;
37class VPBuilder;
38class VPRecipeBuilder;
39struct VFRange;
40
43
44#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
48#endif
49
51 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
52 /// to the pass. Performs verification/printing after each VPlan pass if
53 /// requested via command line options.
54 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
55 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
56 ArgsTy &&...Args) {
57 scope_exit PostTransformActions{[&]() {
58#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
59 // Make sure to print before verification, so that output is more useful
60 // in case of failures:
62 (VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
64 return Regex(Entry).match(PassName);
65 }))) {
66 dbgs()
67 << "VPlan for loop in '"
69 << "' after " << PassName << '\n';
72 else
73 dbgs() << Plan << '\n';
74 }
75#endif
76 if (VerifyEachVPlan && EnableVerify) {
77 if (!verifyVPlanIsValid(Plan))
78 report_fatal_error("Broken VPlan found, compilation aborted!");
79 }
80 }};
81
82 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
83 }
84#define RUN_VPLAN_PASS(PASS, ...) \
85 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
86#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
87 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
88
89 /// Create a base VPlan0, serving as the common starting point for all later
90 /// candidates. It consists of an initial plain CFG loop with loop blocks from
91 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
92 /// corresponding to the input IR.
93 ///
94 /// The created loop is wrapped in an initial skeleton to facilitate
95 /// vectorization, consisting of a vector pre-header, an exit block for the
96 /// main vector loop (middle.block) and a new block as preheader of the scalar
97 /// loop (scalar.ph). See below for an illustration. It also creates a
98 /// VPValue expression for the original trip count.
99 ///
100 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
101 /// / \ old preheader. Will contain iteration number check and SCEV
102 /// | | expansions.
103 /// | |
104 /// / v
105 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
106 /// | / | added later.
107 /// | / v
108 /// || [ ] <-- vector pre header.
109 /// |/ |
110 /// | v
111 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
112 /// | [ ]_|
113 /// | |
114 /// | v
115 /// | [ ] <--- middle-block with the branch to successors
116 /// | / |
117 /// | / |
118 /// | | v
119 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
120 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
121 /// | | preheader basic block.
122 /// | |
123 /// v <-- edge from middle to exit iff epilogue is not required.
124 /// | [ ] \
125 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
126 /// | | header wrapped in VPIRBasicBlock).
127 /// \ |
128 /// \ v
129 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
130 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
131 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
132 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
133
134 /// Replace VPPhi recipes in \p Plan's header with corresponding
135 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
136 /// fixed-order recurrences. This processes all header phis and creates
137 /// the appropriate widened recipe for each one. For fixed-order
138 /// recurrences, also creates FirstOrderRecurrenceSplice instructions and
139 /// sinks/hoists users as needed. Returns false if any fixed-order
140 /// recurrence cannot be handled.
141 static bool createHeaderPhiRecipes(
142 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
143 const VPDominatorTree &VPDT,
144 const MapVector<PHINode *, InductionDescriptor> &Inductions,
145 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
146 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
147 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
148
149 /// Finalize SCEV predicates by adding induction predicates from \p Plan to
150 /// \p PSE and checking constraints. Returns false if predicated IVs have
151 /// outside-loop uses via ExitingIVValue, if SCEV predicate complexity exceeds
152 /// \p SCEVCheckThreshold, or if predicates are needed but \p OptForSize is
153 /// true.
154 static bool
155 finalizeSCEVPredicates(VPlan &Plan, PredicatedScalarEvolution &PSE,
156 bool OptForSize, unsigned SCEVCheckThreshold,
157 OptimizationRemarkEmitter *ORE, Loop *TheLoop);
158
159 /// Create VPReductionRecipes for in-loop reductions. This processes chains
160 /// of operations contributing to in-loop reductions and creates appropriate
161 /// VPReductionRecipe instances.
162 static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF);
163
164 /// Update \p Plan to account for all early exits. If \p Style is not
165 /// NoUncountableExit, handles uncountable early exits and checks that all
166 /// loads are dereferenceable. Returns false if a non-dereferenceable load is
167 /// found.
168 LLVM_ABI_FOR_TEST static bool
169 handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
170 PredicatedScalarEvolution &PSE, DominatorTree &DT,
171 AssumptionCache *AC);
172
173 /// If a check is needed to guard executing the scalar epilogue loop, it will
174 /// be added to the middle block.
175 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
176
177 // Create a check in \p CheckBlock to see if the vector loop should be
178 // executed. May create VPExpandSCEV recipes in the plan's entry block.
179 static void addMinimumIterationCheck(
180 VPlan &Plan, ElementCount VF, unsigned UF,
181 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
182 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
183 DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock);
184
185 /// Add a new check block before the vector preheader to \p Plan to check if
186 /// the main vector loop should be executed (TC >= VF * UF).
187 static void
188 addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
189 bool RequiresScalarEpilogue, Loop *OrigLoop,
191 DebugLoc DL, PredicatedScalarEvolution &PSE);
192
193 /// Add a check to \p Plan to see if the epilogue vector loop should be
194 /// executed.
196 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
197 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
198 unsigned EpilogueLoopStep, ScalarEvolution &SE);
199
200 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
201 /// flat CFG into a hierarchical CFG. For the outermost loop, also create the
202 /// canonical IV's increment and adjust the latch terminator: replace
203 /// BranchOnCond with BranchOnCount, using \p DL for the canonical IV.
204 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan, DebugLoc DL);
205
206 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
207 /// VPValue and connect the block to \p Plan, using the VPValue as branch
208 /// condition.
209 static void attachVPCheckBlock(VPlan &Plan, VPValue *Cond,
210 VPBasicBlock *CheckBlock,
211 bool AddBranchWeights);
212 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
213 bool AddBranchWeights);
214
215 /// Replaces the VPInstructions in \p Plan with corresponding
216 /// widen recipes. Returns false if any VPInstructions could not be converted
217 /// to a wide recipe if needed.
218 LLVM_ABI_FOR_TEST static bool
220 const TargetLibraryInfo &TLI);
221
222 /// Try to legalize reductions with multiple in-loop uses. Currently only
223 /// strict and non-strict min/max reductions used by FindLastIV reductions are
224 /// supported, corresponding to computing the first and last argmin/argmax,
225 /// respectively. Otherwise return false.
226 static bool handleMultiUseReductions(VPlan &Plan,
227 OptimizationRemarkEmitter *ORE,
228 Loop *TheLoop);
229
230 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
231 /// try to update the vector loop to exit early if any input is NaN and resume
232 /// executing in the scalar loop to handle the NaNs there. Return false if
233 /// this attempt was unsuccessful.
234 static bool handleMaxMinNumReductions(VPlan &Plan);
235
236 /// Check if \p Plan contains any FindLast reductions. If it does, try to
237 /// update the vector loop to save the appropriate state using selects
238 /// for entire vectors for both the latest mask containing at least one active
239 /// element and the corresponding data vector. Return false if this attempt
240 /// was unsuccessful.
241 static bool handleFindLastReductions(VPlan &Plan);
242
243 /// Clear NSW/NUW flags from reduction instructions if necessary.
244 static void clearReductionWrapFlags(VPlan &Plan);
245
246 /// Explicitly unroll \p Plan by \p UF.
247 static void unrollByUF(VPlan &Plan, unsigned UF);
248
249 /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
250 /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
251 /// regions are dissolved by replicating their blocks and their recipes \p VF
252 /// times.
253 /// TODO: Also dissolve replicate regions with live outs.
254 static void replicateByVF(VPlan &Plan, ElementCount VF);
255
256 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
257 /// resulting plan to \p BestVF and \p BestUF.
258 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
259 unsigned BestUF,
260 PredicatedScalarEvolution &PSE);
261
262 /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
263 /// is known to be <= VF, replacing them with the AVL directly.
264 static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
265 PredicatedScalarEvolution &PSE);
266
267 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
268 /// optimizations, dead recipe removal, replicate region optimizations and
269 /// block merging.
270 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
271
272 /// Remove redundant VPBasicBlocks by merging them into their single
273 /// predecessor if the latter has a single successor.
274 static bool mergeBlocksIntoPredecessors(VPlan &Plan);
275
276 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
277 /// region block and remove the mask operand. Optimize the created regions by
278 /// iteratively sinking scalar operands into the region, followed by merging
279 /// regions until no improvements are remaining.
280 static void createAndOptimizeReplicateRegions(VPlan &Plan);
281
282 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
283 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
284 /// UseActiveLaneMaskForControlFlow is true, introduce an
285 /// VPActiveLaneMaskPHIRecipe.
286 static void addActiveLaneMask(VPlan &Plan,
287 bool UseActiveLaneMaskForControlFlow);
288
289 /// Insert truncates and extends for any truncated recipe. Redundant casts
290 /// will be folded later.
291 static void
292 truncateToMinimalBitwidths(VPlan &Plan,
293 const MapVector<Instruction *, uint64_t> &MinBWs);
294
295 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
296 /// possible.
297 static void
298 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
299 const DenseMap<Value *, const SCEV *> &StridesMap,
300 const VPDominatorTree &VPDT);
301
302 /// Drop poison flags from recipes that may generate a poison value that is
303 /// used after vectorization, even when their operands are not poison. Those
304 /// recipes meet the following conditions:
305 /// * Contribute to the address computation of a recipe generating a widen
306 /// memory load/store (VPWidenMemoryInstructionRecipe or
307 /// VPInterleaveRecipe).
308 /// * Such a widen memory load/store is masked, but not with the header mask.
309 static void dropPoisonGeneratingRecipes(VPlan &Plan);
310
311 /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
312 /// replaces all uses of the canonical IV except for the canonical IV
313 /// increment with a VPCurrentIterationPHIRecipe. The canonical IV is only
314 /// used to control the loop after this transformation.
315 static void
316 addExplicitVectorLength(VPlan &Plan,
317 const std::optional<unsigned> &MaxEVLSafeElements);
318
319 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
320 /// example:
321 ///
322 /// %mask = icmp ult step-vector, EVL
323 /// %load = load %ptr, %mask
324 /// -->
325 /// %load = vp.load %ptr, EVL
326 static void optimizeEVLMasks(VPlan &Plan);
327
328 // For each Interleave Group in \p InterleaveGroups replace the Recipes
329 // widening its memory instructions with a single VPInterleaveRecipe at its
330 // insertion point.
331 static void createInterleaveGroups(
332 VPlan &Plan,
333 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
334 &InterleaveGroups,
335 const bool &EpilogueAllowed);
336
337 /// Transform widen memory recipes into strided access recipes when legal
338 /// and profitable. Clamps \p Range to maintain consistency with widen
339 /// decisions of \p Plan, and uses \p Ctx to evaluate the cost.
340 static void convertToStridedAccesses(VPlan &Plan,
341 PredicatedScalarEvolution &PSE, Loop &L,
342 VPCostContext &Ctx, VFRange &Range);
343
344 /// Remove dead recipes from \p Plan.
345 static void removeDeadRecipes(VPlan &Plan);
346
347 /// Update \p Plan to account for uncountable early exits by introducing
348 /// appropriate branching logic in the latch that handles early exits and the
349 /// latch exit condition. Multiple exits are handled with a dispatch block
350 /// that determines which exit to take based on lane-by-lane semantics.
351 static bool handleUncountableEarlyExits(
352 VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB,
353 VPBasicBlock *MiddleVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE,
354 DominatorTree &DT, AssumptionCache *AC, UncountableExitStyle Style);
355
356 /// Replaces the exit condition from
357 /// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
358 /// to
359 /// (branch-on-cond eq AVLNext, 0)
360 static void convertEVLExitCond(VPlan &Plan);
361
362 /// Replace loop regions with explicit CFG.
363 static void dissolveLoopRegions(VPlan &Plan);
364
365 /// Expand BranchOnTwoConds instructions into explicit CFG with
366 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
367 static void expandBranchOnTwoConds(VPlan &Plan);
368
369 /// Transform loops with variable-length stepping after region
370 /// dissolution.
371 ///
372 /// Once loop regions are replaced with explicit CFG, loops can step with
373 /// variable vector lengths instead of fixed lengths. This transformation:
374 /// * Makes CurrentIteration-Phi concrete.
375 // * Removes CanonicalIV and increment.
376 static void convertToVariableLengthStep(VPlan &Plan);
377
378 /// Lower abstract recipes to concrete ones, that can be codegen'd.
379 static void convertToConcreteRecipes(VPlan &Plan);
380
381 /// This function converts initial recipes to the abstract recipes and clamps
382 /// \p Range based on cost model for following optimizations and cost
383 /// estimations. The converted abstract recipes will lower to concrete
384 /// recipes before codegen.
385 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
386 VFRange &Range);
387
388 /// Perform instcombine-like simplifications on recipes in \p Plan.
389 static void simplifyRecipes(VPlan &Plan);
390
391 /// Remove BranchOnCond recipes with true or false conditions together with
392 /// removing dead edges to their successors. If \p OnlyLatches is true, only
393 /// process loop latches. Returns true if incoming values from any phi-like
394 /// recipe have been removed.
395 static bool removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
396
397 /// Perform common-subexpression-elimination on \p Plan.
398 static void cse(VPlan &Plan);
399
400 /// If there's a single exit block, optimize its phi recipes that use exiting
401 /// IV values by feeding them precomputed end values instead, possibly taken
402 /// one step backwards.
403 static void optimizeInductionLiveOutUsers(VPlan &Plan,
404 PredicatedScalarEvolution &PSE,
405 bool FoldTail);
406
407 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
408 static void materializeBroadcasts(VPlan &Plan);
409
410 /// Hoist predicated loads from the same address to the loop entry block, if
411 /// they are guaranteed to execute on both paths (i.e., in replicate regions
412 /// with complementary masks P and NOT P).
413 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
414 const Loop *L);
415
416 /// Sink predicated stores to the same address with complementary predicates
417 /// (P and NOT P) to an unconditional store with select recipes for the
418 /// stored values. This eliminates branching overhead when all paths
419 /// unconditionally store to the same location.
420 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
421 const Loop *L);
422
423 // Materialize vector trip counts for constants early if it can simply be
424 // computed as (Original TC / VF * UF) * VF * UF.
425 static void
426 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
427 unsigned BestUF,
428 PredicatedScalarEvolution &PSE);
429
430 /// Materialize vector trip count computations to a set of VPInstructions.
431 /// \p Step is used as the step value for the trip count computation.
432 /// \p MaxRuntimeStep is the maximum possible runtime value of Step, used to
433 /// prove the trip count is divisible by the step for scalable VFs.
434 static void materializeVectorTripCount(
435 VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking,
436 bool RequiresScalarEpilogue, VPValue *Step,
437 std::optional<uint64_t> MaxRuntimeStep = std::nullopt);
438
439 /// Materialize the backedge-taken count to be computed explicitly using
440 /// VPInstructions.
441 static void materializeBackedgeTakenCount(VPlan &Plan,
442 VPBasicBlock *VectorPH);
443
444 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
445 /// into vectors and Unpack recipes to extract scalars from vectors as
446 /// needed.
447 static void materializePacksAndUnpacks(VPlan &Plan);
448
449 /// Materialize UF, VF and VFxUF to be computed explicitly using
450 /// VPInstructions.
451 static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
452 ElementCount VF);
453
454 /// Attaches the alias-mask to the existing header-mask.
455 static void attachAliasMaskToHeaderMask(VPlan &Plan);
456
457 /// Materializes within the \p AliasCheckVPBB block. Updates the header mask
458 /// of the loop to use the alias mask. Returns the clamped VF.
459 static VPValue *materializeAliasMask(VPlan &Plan,
460 VPBasicBlock *AliasCheckVPBB,
461 ArrayRef<PointerDiffInfo> DiffChecks);
462
463 /// Materializes the alias mask within a check block before the loop. The
464 /// vector loop will only be entered if the clamped VF from the alias mask
465 /// is not scalar.
467 VPlan &Plan, ArrayRef<PointerDiffInfo> DiffChecks, bool HasBranchWeights);
468
469 /// Try to expand VPExpandSCEVRecipes in \p Plan's entry block to
470 /// VPInstructions. Recipes that cannot be expanded (like casts, min/max) are
471 /// kept for later IR-level expansion.
472 static void expandSCEVsToVPInstructions(VPlan &Plan, ScalarEvolution &SE);
473
474 /// Expand remaining VPExpandSCEVRecipes in \p Plan's entry block using
475 /// SCEVExpander. Each VPExpandSCEVRecipe is replaced with a live-in wrapping
476 /// the expanded IR value. A mapping from SCEV expressions to their expanded
477 /// IR value is returned.
478 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
479 ScalarEvolution &SE);
480
481 /// Try to find a single VF among \p Plan's VFs for which all interleave
482 /// groups (with known minimum VF elements) can be replaced by wide loads and
483 /// stores processing VF elements, if all transformed interleave groups access
484 /// the full vector width (checked via the maximum vector register width). If
485 /// the transformation can be applied, the original \p Plan will be split in
486 /// 2:
487 /// 1. The original Plan with the single VF containing the optimized recipes
488 /// using wide loads instead of interleave groups.
489 /// 2. A new clone which contains all VFs of Plan except the optimized VF.
490 ///
491 /// This effectively is a very simple form of loop-aware SLP, where we use
492 /// interleave groups to identify candidates.
493 static std::unique_ptr<VPlan>
494 narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
495
496 /// Adapts the vector loop region for tail folding by introducing a header
497 /// mask and conditionally executing the content of the region:
498 ///
499 /// Vector loop region before:
500 /// +-------------------------------------------+
501 /// |%iv = ... |
502 /// |... |
503 /// |%iv.next = add %iv, vfxuf |
504 /// |branch-on-count %iv.next, vector-trip-count|
505 /// +-------------------------------------------+
506 ///
507 /// Vector loop region after:
508 /// +-------------------------------------------+
509 /// |%iv = ... |
510 /// |%wide.iv = widen-canonical-iv ... |
511 /// |%header-mask = icmp ule %wide.iv, BTC |
512 /// |branch-on-cond %header-mask |---+
513 /// +-------------------------------------------+ |
514 /// | |
515 /// v |
516 /// +-------------------------------------------+ |
517 /// | ... | |
518 /// +-------------------------------------------+ |
519 /// | |
520 /// v |
521 /// +-------------------------------------------+ |
522 /// |<phis> = phi [..., ...], [poison, header] |
523 /// |%iv.next = add %iv, vfxuf |<--+
524 /// |branch-on-count %iv.next, vector-trip-count|
525 /// +-------------------------------------------+
526 ///
527 /// Any VPInstruction::ExtractLastLanes are also updated to extract from the
528 /// last active lane of the header mask.
529 static void foldTailByMasking(VPlan &Plan);
530
531 /// Predicate and linearize the control-flow in the only loop region of
532 /// \p Plan.
533 static void introduceMasksAndLinearize(VPlan &Plan);
534
535 /// Replace a VPWidenCanonicalIVRecipe if it is present in \p Plan, with a
536 /// VPWidenIntOrFpInductionRecipe, provided it would not cause additional
537 /// spills for \p VF at unroll factor \p UF.
539 VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI,
541 unsigned UF, const SmallPtrSetImpl<const Value *> &ValuesToIgnore);
542
543 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
544 /// a BranchOnCond recipe.
545 static void
546 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
547 std::optional<unsigned> VScaleForTuning);
548
549 /// Adjust first-order recurrence users in the middle block: create
550 /// penultimate element extracts for LCSSA phi users, and handle penultimate
551 /// extracts of the last active lane edge.
552 static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan,
553 VFRange &Range);
554
555 /// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
556 /// converting them to FindIV reductions, if their IV range excludes a
557 /// suitable sentinel value. For expressions of IVs, the expression is sunk
558 /// to the middle block.
559 static void optimizeFindIVReductions(VPlan &Plan,
560 PredicatedScalarEvolution &PSE, Loop &L);
561
562 /// Detect and create partial reduction recipes for scaled reductions in
563 /// \p Plan. Must be called after recipe construction. If partial reductions
564 /// are only valid for a subset of VFs in Range, Range.End is updated.
565 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
566 VFRange &Range);
567
568 /// Convert load/store VPInstructions in \p Plan into widened or replicate
569 /// recipes. Non load/store input instructions are left unchanged.
570 static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
571 VPRecipeBuilder &RecipeBuilder);
572
573 /// Make VPlan-based scalarization decision prior to delegating to the ones
574 /// made by the legacy CM. Only transforms "usesFirstLaneOnly` def-use chains
575 /// enabled by prior widening of consecutive memory operations for now.
576 static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range);
577
578 /// Convert call VPInstructions in \p Plan into widened call, vector
579 /// intrinsic or replicate recipes based on a cost comparison via \p CostCtx.
580 static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range,
581 VPRecipeBuilder &RecipeBuilder,
582 VPCostContext &CostCtx);
583};
584
585} // namespace llvm
586
587#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static constexpr uint32_t MinItersBypassWeights[]
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This pass exposes codegen information to IR-level passes.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const char PassName[]
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
A struct for saving information about induction variables.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
The main scalar evolution driver.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
VPlan-based builder utility analogous to IRBuilder.
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4581
Helper class to create VPRecipies from IR instructions.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:812
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4762
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4907
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI_FOR_TEST cl::opt< bool > VerifyEachVPlan
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintAfterAll
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI_FOR_TEST cl::list< std::string > VPlanPrintAfterPasses
TargetTransformInfo TTI
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan)
Verify invariants for general VPlans.
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintVectorRegionScope
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
static VPValue * materializeAliasMask(VPlan &Plan, VPBasicBlock *AliasCheckVPBB, ArrayRef< PointerDiffInfo > DiffChecks)
Materializes within the AliasCheckVPBB block.
static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder)
Convert load/store VPInstructions in Plan into widened or replicate recipes.
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan, ArgsTy &&...Args)
Helper to run a VPlan pass Pass on VPlan, forwarding extra arguments to the pass.
static void expandSCEVsToVPInstructions(VPlan &Plan, ScalarEvolution &SE)
Try to expand VPExpandSCEVRecipes in Plan's entry block to VPInstructions.
static void materializeBroadcasts(VPlan &Plan)
Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...
static void materializePacksAndUnpacks(VPlan &Plan)
Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, const bool &EpilogueAllowed)
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)
Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL is known to be <= VF,...
static void introduceMasksAndLinearize(VPlan &Plan)
Predicate and linearize the control-flow in the only loop region of Plan.
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)
Materialize UF, VF and VFxUF to be computed explicitly using VPInstructions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)
Materialize the backedge-taken count to be computed explicitly using VPInstructions.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static void replaceWideCanonicalIVWithWideIV(VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, ElementCount VF, unsigned UF, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Replace a VPWidenCanonicalIVRecipe if it is present in Plan, with a VPWidenIntOrFpInductionRecipe,...
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void convertToVariableLengthStep(VPlan &Plan)
Transform loops with variable-length stepping after region dissolution.
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)
Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.
static std::unique_ptr< VPlan > narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI)
Try to find a single VF among Plan's VFs for which all interleave groups (with known minimum VF eleme...
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void materializeAliasMaskCheckBlock(VPlan &Plan, ArrayRef< PointerDiffInfo > DiffChecks, bool HasBranchWeights)
Materializes the alias mask within a check block before the loop.
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)
Expand remaining VPExpandSCEVRecipes in Plan's entry block using SCEVExpander.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan, DebugLoc DL)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const VPDominatorTree &VPDT, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static void expandBranchOnTwoConds(VPlan &Plan)
Expand BranchOnTwoConds instructions into explicit CFG with BranchOnCond instructions.
static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue, VPValue *Step, std::optional< uint64_t > MaxRuntimeStep=std::nullopt)
Materialize vector trip count computations to a set of VPInstructions.
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...
static bool mergeBlocksIntoPredecessors(VPlan &Plan)
Remove redundant VPBasicBlocks by merging them into their single predecessor if the latter has a sing...
static void attachAliasMaskToHeaderMask(VPlan &Plan)
Attaches the alias-mask to the existing header-mask.
static void optimizeFindIVReductions(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L)
Optimize FindLast reductions selecting IVs (or expressions of IVs) by converting them to FindIV reduc...
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)
This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...
static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range)
Make VPlan-based scalarization decision prior to delegating to the ones made by the legacy CM.
static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPCurrentIterationPHIRecipe and related recipes to Plan and replaces all uses of the canonical ...
static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder, VPCostContext &CostCtx)
Convert call VPInstructions in Plan into widened call, vector intrinsic or replicate recipes based on...
static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan, VFRange &Range)
Adjust first-order recurrence users in the middle block: create penultimate element extracts for LCSS...
static void optimizeEVLMasks(VPlan &Plan)
Optimize recipes which use an EVL-based header mask to VP intrinsics, for example:
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
static void simplifyRecipes(VPlan &Plan)
Perform instcombine-like simplifications on recipes in Plan.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...
static bool finalizeSCEVPredicates(VPlan &Plan, PredicatedScalarEvolution &PSE, bool OptForSize, unsigned SCEVCheckThreshold, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Finalize SCEV predicates by adding induction predicates from Plan to PSE and checking constraints.
static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap, const VPDominatorTree &VPDT)
Replace symbolic strides from StridesMap in Plan with constants when possible.
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and VPInstruction in Plan with VF single...
static bool removeBranchOnConst(VPlan &Plan, bool OnlyLatches=false)
Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...
static void convertToStridedAccesses(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L, VPCostContext &Ctx, VFRange &Range)
Transform widen memory recipes into strided access recipes when legal and profitable.
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static bool handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static void optimizeInductionLiveOutUsers(VPlan &Plan, PredicatedScalarEvolution &PSE, bool FoldTail)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx, VFRange &Range)
Detect and create partial reduction recipes for scaled reductions in Plan.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock)
static void cse(VPlan &Plan)
Perform common-subexpression-elimination on Plan.
static void attachVPCheckBlock(VPlan &Plan, VPValue *Cond, VPBasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void dissolveLoopRegions(VPlan &Plan)
Replace loop regions with explicit CFG.
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static void dropPoisonGeneratingRecipes(VPlan &Plan)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
static void convertEVLExitCond(VPlan &Plan)
Replaces the exit condition from (branch-on-cond eq CanonicalIVInc, VectorTripCount) to (branch-on-co...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...