LLVM 23.0.0git
LoopVectorizationPlanner.cpp
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.cpp - VF selection and planning -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements VFSelectionContext methods for loop vectorization
11/// VF selection, independent of cost-modeling decisions.
12///
13//===----------------------------------------------------------------------===//
14
21#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "loop-vectorize"
29
31 "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
32 cl::desc("Maximize bandwidth when selecting vectorization factor which "
33 "will be determined by the smallest type in loop."));
34
36 "vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true),
38 cl::desc("Try wider VFs if they enable the use of vector variants"));
39
41 "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
42 cl::desc("Discard VFs if their register pressure is too high."));
43
45 "force-target-supports-scalable-vectors", cl::init(false), cl::Hidden,
47 "Pretend that scalable vectors are supported, even if the target does "
48 "not support them. This flag should only be used for testing."));
49
51 "prefer-inloop-reductions", cl::init(false), cl::Hidden,
52 cl::desc("Prefer in-loop vector reductions, "
53 "overriding the targets preference."));
54
55/// Note: This currently only applies to `llvm.masked.load` and
56/// `llvm.masked.store`. TODO: Extend this to cover other operations as needed.
58 "force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden,
59 cl::desc("Assume the target supports masked memory operations (used for "
60 "testing)."));
61
63 Align Alignment,
64 unsigned AddressSpace) const {
65 return Legal->isConsecutivePtr(DataType, Ptr) &&
67 TTI.isLegalMaskedStore(DataType, Alignment, AddressSpace));
68}
69
71 Align Alignment,
72 unsigned AddressSpace) const {
73 return Legal->isConsecutivePtr(DataType, Ptr) &&
75 TTI.isLegalMaskedLoad(DataType, Alignment, AddressSpace));
76}
77
79 ElementCount VF) const {
80 bool LI = isa<LoadInst>(V);
81 bool SI = isa<StoreInst>(V);
82 if (!LI && !SI)
83 return false;
84 auto *Ty = getLoadStoreType(V);
86 if (VF.isVector())
87 Ty = VectorType::get(Ty, VF);
88 return (LI && TTI.isLegalMaskedGather(Ty, Align)) ||
89 (SI && TTI.isLegalMaskedScatter(Ty, Align));
90}
91
93 return TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors;
94}
95
96bool VFSelectionContext::useMaxBandwidth(bool IsScalable) const {
100 return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
101 (TTI.shouldMaximizeVectorBandwidth(RegKind) ||
103 Legal->hasVectorCallVariants())));
104}
105
107 if (ConsiderRegPressure.getNumOccurrences())
108 return ConsiderRegPressure;
109
110 // TODO: We should eventually consider register pressure for all targets. The
111 // TTI hook is temporary whilst target-specific issues are being fixed.
112 if (TTI.shouldConsiderVectorizationRegPressure())
113 return true;
114
115 if (!useMaxBandwidth(VF.isScalable()))
116 return false;
117 // Only calculate register pressure for VFs enabled by MaxBandwidth.
119 VF, VF.isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF
120 : MaxPermissibleVFWithoutMaxBW.FixedVF);
121}
122
123ElementCount VFSelectionContext::clampVFByMaxTripCount(
124 ElementCount VF, unsigned MaxTripCount, unsigned UserIC,
125 bool FoldTailByMasking, bool RequiresScalarEpilogue) const {
126 unsigned EstimatedVF = VF.getKnownMinValue();
127 if (VF.isScalable() && F.hasFnAttribute(Attribute::VScaleRange)) {
128 auto Attr = F.getFnAttribute(Attribute::VScaleRange);
129 auto Min = Attr.getVScaleRangeMin();
130 EstimatedVF *= Min;
131 }
132
133 // When a scalar epilogue is required, at least one iteration of the scalar
134 // loop has to execute. Adjust MaxTripCount accordingly to avoid picking a
135 // max VF that results in a dead vector loop.
136 if (MaxTripCount > 0 && RequiresScalarEpilogue)
137 MaxTripCount -= 1;
138
139 // When the user specifies an interleave count, we need to ensure that
140 // VF * UserIC <= MaxTripCount to avoid a dead vector loop.
141 unsigned IC = UserIC > 0 ? UserIC : 1;
142 unsigned EstimatedVFTimesIC = EstimatedVF * IC;
143
144 if (MaxTripCount && MaxTripCount <= EstimatedVFTimesIC &&
145 (!FoldTailByMasking || isPowerOf2_32(MaxTripCount))) {
146 // If upper bound loop trip count (TC) is known at compile time there is no
147 // point in choosing VF greater than TC / IC (as done in the loop below).
148 // Select maximum power of two which doesn't exceed TC / IC. If VF is
149 // scalable, we only fall back on a fixed VF when the TC is less than or
150 // equal to the known number of lanes.
151 auto ClampedUpperTripCount = llvm::bit_floor(MaxTripCount / IC);
152 if (ClampedUpperTripCount == 0)
153 ClampedUpperTripCount = 1;
154 LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
155 "exceeding the constant trip count"
156 << (UserIC > 0 ? " divided by UserIC" : "") << ": "
157 << ClampedUpperTripCount << "\n");
158 return ElementCount::get(ClampedUpperTripCount,
159 FoldTailByMasking ? VF.isScalable() : false);
160 }
161 return VF;
162}
163
164ElementCount VFSelectionContext::getMaximizedVFForTarget(
165 unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
166 ElementCount MaxSafeVF, unsigned UserIC, bool FoldTailByMasking,
167 bool RequiresScalarEpilogue) {
168 bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
169 const TypeSize WidestRegister = TTI.getRegisterBitWidth(
170 ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
172
173 // Convenience function to return the minimum of two ElementCounts.
174 auto MinVF = [](const ElementCount &LHS, const ElementCount &RHS) {
175 assert((LHS.isScalable() == RHS.isScalable()) &&
176 "Scalable flags must match");
178 };
179
180 // Ensure MaxVF is a power of 2; the dependence distance bound may not be.
181 // Note that both WidestRegister and WidestType may not be a powers of 2.
182 auto MaxVectorElementCount = ElementCount::get(
183 llvm::bit_floor(WidestRegister.getKnownMinValue() / WidestType),
184 ComputeScalableMaxVF);
185 MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
186 LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
187 << (MaxVectorElementCount * WidestType) << " bits.\n");
188
189 if (!MaxVectorElementCount) {
190 LLVM_DEBUG(dbgs() << "LV: The target has no "
191 << (ComputeScalableMaxVF ? "scalable" : "fixed")
192 << " vector registers.\n");
193 return ElementCount::getFixed(1);
194 }
195
196 ElementCount MaxVF =
197 clampVFByMaxTripCount(MaxVectorElementCount, MaxTripCount, UserIC,
198 FoldTailByMasking, RequiresScalarEpilogue);
199 // If the MaxVF was already clamped, there's no point in trying to pick a
200 // larger one.
201 if (MaxVF != MaxVectorElementCount)
202 return MaxVF;
203
204 if (MaxVF.isScalable())
205 MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
206 else
207 MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
208
209 if (useMaxBandwidth(ComputeScalableMaxVF)) {
210 auto MaxVectorElementCountMaxBW = ElementCount::get(
211 llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
212 ComputeScalableMaxVF);
213 MaxVF = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
214
215 if (ElementCount MinVF =
216 TTI.getMinimumVF(SmallestType, ComputeScalableMaxVF)) {
217 if (ElementCount::isKnownLT(MaxVF, MinVF)) {
218 LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF
219 << ") with target's minimum: " << MinVF << '\n');
220 MaxVF = MinVF;
221 }
222 }
223
224 MaxVF = clampVFByMaxTripCount(MaxVF, MaxTripCount, UserIC,
225 FoldTailByMasking, RequiresScalarEpilogue);
226 }
227 return MaxVF;
228}
229
230std::optional<unsigned> llvm::getMaxVScale(const Function &F,
231 const TargetTransformInfo &TTI) {
232 if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
233 return MaxVScale;
234
235 if (F.hasFnAttribute(Attribute::VScaleRange))
236 return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
237
238 return std::nullopt;
239}
240
241bool VFSelectionContext::isScalableVectorizationAllowed() {
242 if (IsScalableVectorizationAllowed)
243 return *IsScalableVectorizationAllowed;
244
245 IsScalableVectorizationAllowed = false;
247 return false;
248
249 if (Hints->isScalableVectorizationDisabled()) {
250 reportVectorizationInfo("Scalable vectorization is explicitly disabled",
251 "ScalableVectorizationDisabled", ORE, TheLoop);
252 return false;
253 }
254
255 LLVM_DEBUG(dbgs() << "LV: Scalable vectorization is available\n");
256
257 auto MaxScalableVF = ElementCount::getScalable(
258 std::numeric_limits<ElementCount::ScalarTy>::max());
259
260 // Test that the loop-vectorizer can legalize all operations for this MaxVF.
261 // FIXME: While for scalable vectors this is currently sufficient, this should
262 // be replaced by a more detailed mechanism that filters out specific VFs,
263 // instead of invalidating vectorization for a whole set of VFs based on the
264 // MaxVF.
265
266 // Disable scalable vectorization if the loop contains unsupported reductions.
267 if (!all_of(Legal->getReductionVars(), [&](const auto &Reduction) -> bool {
268 return TTI.isLegalToVectorizeReduction(Reduction.second, MaxScalableVF);
269 })) {
271 "Scalable vectorization not supported for the reduction "
272 "operations found in this loop.",
273 "ScalableVFUnfeasible", ORE, TheLoop);
274 return false;
275 }
276
277 // Disable scalable vectorization if the loop contains any instructions
278 // with element types not supported for scalable vectors.
279 if (any_of(ElementTypesInLoop, [&](Type *Ty) {
280 return !Ty->isVoidTy() && !TTI.isElementTypeLegalForScalableVector(Ty);
281 })) {
282 reportVectorizationInfo("Scalable vectorization is not supported "
283 "for all element types found in this loop.",
284 "ScalableVFUnfeasible", ORE, TheLoop);
285 return false;
286 }
287
288 if (!Legal->isSafeForAnyVectorWidth() && !getMaxVScale(F, TTI)) {
289 reportVectorizationInfo("The target does not provide maximum vscale value "
290 "for safe distance analysis.",
291 "ScalableVFUnfeasible", ORE, TheLoop);
292 return false;
293 }
294
295 IsScalableVectorizationAllowed = true;
296 return true;
297}
298
300VFSelectionContext::getMaxLegalScalableVF(unsigned MaxSafeElements) {
301 if (!isScalableVectorizationAllowed())
303
304 auto MaxScalableVF = ElementCount::getScalable(
305 std::numeric_limits<ElementCount::ScalarTy>::max());
306 if (Legal->isSafeForAnyVectorWidth())
307 return MaxScalableVF;
308
309 std::optional<unsigned> MaxVScale = getMaxVScale(F, TTI);
310 // Limit MaxScalableVF by the maximum safe dependence distance.
311 MaxScalableVF = ElementCount::getScalable(MaxSafeElements / *MaxVScale);
312
313 if (!MaxScalableVF)
315 "Max legal vector width too small, scalable vectorization "
316 "unfeasible.",
317 "ScalableVFUnfeasible", ORE, TheLoop);
318
319 return MaxScalableVF;
320}
321
323 unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC,
324 bool FoldTailByMasking, bool RequiresScalarEpilogue) {
325 auto [SmallestType, WidestType] = getSmallestAndWidestTypes();
326
327 // Get the maximum safe dependence distance in bits computed by LAA.
328 // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
329 // the memory accesses that is most restrictive (involved in the smallest
330 // dependence distance).
331 unsigned MaxSafeElementsPowerOf2 =
332 llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
333 if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
334 unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
335 MaxSafeElementsPowerOf2 =
336 std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
337 }
338
339 auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
340 auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
341
342 if (!Legal->isSafeForAnyVectorWidth())
343 MaxSafeElements = MaxSafeElementsPowerOf2;
344
345 LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
346 << ".\n");
347 LLVM_DEBUG(dbgs() << "LV: The max safe scalable VF is: " << MaxSafeScalableVF
348 << ".\n");
349
350 // First analyze the UserVF, fall back if the UserVF should be ignored.
351 if (UserVF) {
352 auto MaxSafeUserVF =
353 UserVF.isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;
354
355 if (ElementCount::isKnownLE(UserVF, MaxSafeUserVF)) {
356 // If `VF=vscale x N` is safe, then so is `VF=N`
357 if (UserVF.isScalable())
358 return FixedScalableVFPair(
359 ElementCount::getFixed(UserVF.getKnownMinValue()), UserVF);
360
361 return UserVF;
362 }
363
364 assert(ElementCount::isKnownGT(UserVF, MaxSafeUserVF));
365
366 // Only clamp if the UserVF is not scalable. If the UserVF is scalable, it
367 // is better to ignore the hint and let the compiler choose a suitable VF.
368 if (!UserVF.isScalable()) {
369 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
370 << " is unsafe, clamping to max safe VF="
371 << MaxSafeFixedVF << ".\n");
372 ORE->emit([&]() {
373 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
374 TheLoop->getStartLoc(),
375 TheLoop->getHeader())
376 << "User-specified vectorization factor "
377 << ore::NV("UserVectorizationFactor", UserVF)
378 << " is unsafe, clamping to maximum safe vectorization factor "
379 << ore::NV("VectorizationFactor", MaxSafeFixedVF);
380 });
381 return MaxSafeFixedVF;
382 }
383
385 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
386 << " is ignored because scalable vectors are not "
387 "available.\n");
388 ORE->emit([&]() {
389 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
390 TheLoop->getStartLoc(),
391 TheLoop->getHeader())
392 << "User-specified vectorization factor "
393 << ore::NV("UserVectorizationFactor", UserVF)
394 << " is ignored because the target does not support scalable "
395 "vectors. The compiler will pick a more suitable value.";
396 });
397 } else {
398 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
399 << " is unsafe. Ignoring scalable UserVF.\n");
400 ORE->emit([&]() {
401 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
402 TheLoop->getStartLoc(),
403 TheLoop->getHeader())
404 << "User-specified vectorization factor "
405 << ore::NV("UserVectorizationFactor", UserVF)
406 << " is unsafe. Ignoring the hint to let the compiler pick a "
407 "more suitable value.";
408 });
409 }
410 }
411
412 LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
413 << " / " << WidestType << " bits.\n");
414
417 if (auto MaxVF = getMaximizedVFForTarget(
418 MaxTripCount, SmallestType, WidestType, MaxSafeFixedVF, UserIC,
419 FoldTailByMasking, RequiresScalarEpilogue))
420 Result.FixedVF = MaxVF;
421
422 if (auto MaxVF = getMaximizedVFForTarget(
423 MaxTripCount, SmallestType, WidestType, MaxSafeScalableVF, UserIC,
424 FoldTailByMasking, RequiresScalarEpilogue))
425 if (MaxVF.isScalable()) {
426 Result.ScalableVF = MaxVF;
427 LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxVF
428 << "\n");
429 }
430
431 return Result;
432}
433
434std::pair<unsigned, unsigned>
436 unsigned MinWidth = -1U;
437 unsigned MaxWidth = 8;
438 const DataLayout &DL = F.getDataLayout();
439 // For in-loop reductions, no element types are added to ElementTypesInLoop
440 // if there are no loads/stores in the loop. In this case, check through the
441 // reduction variables to determine the maximum width.
442 if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) {
443 for (const auto &[_, RdxDesc] : Legal->getReductionVars()) {
444 // When finding the min width used by the recurrence we need to account
445 // for casts on the input operands of the recurrence.
446 MinWidth = std::min(
447 MinWidth,
448 std::min(RdxDesc.getMinWidthCastToRecurrenceTypeInBits(),
449 RdxDesc.getRecurrenceType()->getScalarSizeInBits()));
450 MaxWidth = std::max(MaxWidth,
451 RdxDesc.getRecurrenceType()->getScalarSizeInBits());
452 }
453 } else {
454 for (Type *T : ElementTypesInLoop) {
455 MinWidth = std::min<unsigned>(
456 MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());
457 MaxWidth = std::max<unsigned>(
458 MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());
459 }
460 }
461 return {MinWidth, MaxWidth};
462}
463
465 const SmallPtrSetImpl<const Value *> *ValuesToIgnore) {
466 ElementTypesInLoop.clear();
467 // For each block.
468 for (BasicBlock *BB : TheLoop->blocks()) {
469 // For each instruction in the loop.
470 for (Instruction &I : *BB) {
471 Type *T = I.getType();
472
473 // Skip ignored values.
474 if (ValuesToIgnore && ValuesToIgnore->contains(&I))
475 continue;
476
477 // Only examine Loads, Stores and PHINodes.
479 continue;
480
481 // Examine PHI nodes that are reduction variables. Update the type to
482 // account for the recurrence type.
483 if (auto *PN = dyn_cast<PHINode>(&I)) {
484 if (!Legal->isReductionVariable(PN))
485 continue;
486 const RecurrenceDescriptor &RdxDesc =
487 Legal->getRecurrenceDescriptor(PN);
489 TTI.preferInLoopReduction(RdxDesc.getRecurrenceKind(),
490 RdxDesc.getRecurrenceType()))
491 continue;
492 T = RdxDesc.getRecurrenceType();
493 }
494
495 // Examine the stored values.
496 if (auto *ST = dyn_cast<StoreInst>(&I))
497 T = ST->getValueOperand()->getType();
498
499 assert(T->isSized() &&
500 "Expected the load/store/recurrence type to be sized");
501
502 ElementTypesInLoop.insert(T);
503 }
504 }
505}
506
507void VFSelectionContext::initializeVScaleForTuning() {
509 return;
510
511 if (F.hasFnAttribute(Attribute::VScaleRange)) {
512 auto Attr = F.getFnAttribute(Attribute::VScaleRange);
513 auto Min = Attr.getVScaleRangeMin();
514 auto Max = Attr.getVScaleRangeMax();
515 if (Max && Min == Max) {
516 VScaleForTuning = Max;
517 return;
518 }
519 }
520
521 VScaleForTuning = TTI.getVScaleForTuning();
522}
523
525 const RecurrenceDescriptor &RdxDesc) const {
526 return !Hints->allowReordering() && RdxDesc.isOrdered();
527}
528
530 LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");
531
532 Loop *L = const_cast<Loop *>(TheLoop);
533 if (Legal->getRuntimePointerChecking()->Need) {
535 "Runtime ptr check is required with -Os/-Oz",
536 "runtime pointer checks needed. Enable vectorization of this "
537 "loop with '#pragma clang loop vectorize(enable)' when "
538 "compiling with -Os/-Oz",
539 "CantVersionLoopWithOptForSize", ORE, L);
540 return true;
541 }
542
543 if (!PSE.getPredicate().isAlwaysTrue()) {
545 "Runtime SCEV check is required with -Os/-Oz",
546 "runtime SCEV checks needed. Enable vectorization of this "
547 "loop with '#pragma clang loop vectorize(enable)' when "
548 "compiling with -Os/-Oz",
549 "CantVersionLoopWithOptForSize", ORE, L);
550 return true;
551 }
552
553 // FIXME: Avoid specializing for stride==1 instead of bailing out.
554 if (!Legal->getLAI()->getSymbolicStrides().empty()) {
556 "Runtime stride check for small trip count",
557 "runtime stride == 1 checks needed. Enable vectorization of "
558 "this loop without such check by compiling with -Os/-Oz",
559 "CantVersionLoopWithOptForSize", ORE, L);
560 return true;
561 }
562
563 return false;
564}
565
567 // Avoid duplicating work finding in-loop reductions.
568 if (!InLoopReductions.empty())
569 return;
570
571 for (const auto &Reduction : Legal->getReductionVars()) {
572 PHINode *Phi = Reduction.first;
573 const RecurrenceDescriptor &RdxDesc = Reduction.second;
574
575 // Multi-use reductions (e.g., used in FindLastIV patterns) are handled
576 // separately and should not be considered for in-loop reductions.
577 if (RdxDesc.hasUsesOutsideReductionChain())
578 continue;
579
580 // We don't collect reductions that are type promoted (yet).
581 if (RdxDesc.getRecurrenceType() != Phi->getType())
582 continue;
583
584 // In-loop AnyOf and FindIV reductions are not yet supported.
585 RecurKind Kind = RdxDesc.getRecurrenceKind();
589 continue;
590
591 // If the target would prefer this reduction to happen "in-loop", then we
592 // want to record it as such.
594 !TTI.preferInLoopReduction(Kind, Phi->getType()))
595 continue;
596
597 // Check that we can correctly put the reductions into the loop, by
598 // finding the chain of operations that leads from the phi to the loop
599 // exit value.
600 SmallVector<Instruction *, 4> ReductionOperations =
601 RdxDesc.getReductionOpChain(Phi, const_cast<Loop *>(TheLoop));
602 bool InLoop = !ReductionOperations.empty();
603
604 if (InLoop) {
605 InLoopReductions.insert(Phi);
606 // Add the elements to InLoopReductionImmediateChains for cost modelling.
607 Instruction *LastChain = Phi;
608 for (auto *I : ReductionOperations) {
609 InLoopReductionImmediateChains[I] = LastChain;
610 LastChain = I;
611 }
612 }
613 LLVM_DEBUG(dbgs() << "LV: Using " << (InLoop ? "inloop" : "out of loop")
614 << " reduction for phi: " << *Phi << "\n");
615 }
616}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
#define _
loop Loop Strength Reduction
This file defines the LoopVectorizationLegality class.
static cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))
static cl::opt< bool > ConsiderRegPressure("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high."))
static cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))
static cl::opt< bool > ForceTargetSupportsMaskedMemoryOps("force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports masked memory operations (used for " "testing)."))
Note: This currently only applies to llvm.masked.load and llvm.masked.store.
static cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
This file provides a LoopVectorizationPlanner class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Diagnostic information for optimization analysis remarks.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool contains(ConstPtrType Ptr) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
bool isLegalMaskedStore(Type *DataType, Value *Ptr, Align Alignment, unsigned AddressSpace) const
Returns true if the target machine supports masked store operation for the given DataType and kind of...
bool isLegalMaskedLoad(Type *DataType, Value *Ptr, Align Alignment, unsigned AddressSpace) const
Returns true if the target machine supports masked load operation for the given DataType and kind of ...
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
RecurKind
These are the kinds of recurrences that we support.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).