LLVM 20.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
18#include "llvm/Analysis/Loads.h"
30
31using namespace llvm;
32using namespace PatternMatch;
33
34#define LV_NAME "loop-vectorize"
35#define DEBUG_TYPE LV_NAME
36
37static cl::opt<bool>
38 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
39 cl::desc("Enable if-conversion during vectorization."));
40
41static cl::opt<bool>
42AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
43 cl::desc("Enable recognition of non-constant strided "
44 "pointer induction variables."));
45
46namespace llvm {
48 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
49 cl::desc("Allow enabling loop hints to reorder "
50 "FP operations during vectorization."));
51} // namespace llvm
52
53// TODO: Move size-based thresholds out of legality checking, make cost based
54// decisions instead of hard thresholds.
56 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
57 cl::desc("The maximum number of SCEV checks allowed."));
58
60 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
61 cl::desc("The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
63
66 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
68 cl::desc("Control whether the compiler can use scalable vectors to "
69 "vectorize a loop"),
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
81
82/// Maximum vectorization interleave count.
83static const unsigned MaxInterleaveFactor = 16;
84
85namespace llvm {
86
87bool LoopVectorizeHints::Hint::validate(unsigned Val) {
88 switch (Kind) {
89 case HK_WIDTH:
91 case HK_INTERLEAVE:
92 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
93 case HK_FORCE:
94 return (Val <= 1);
95 case HK_ISVECTORIZED:
96 case HK_PREDICATE:
97 case HK_SCALABLE:
98 return (Val == 0 || Val == 1);
99 }
100 return false;
101}
102
104 bool InterleaveOnlyWhenForced,
107 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
108 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
109 Force("vectorize.enable", FK_Undefined, HK_FORCE),
110 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
111 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
112 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
113 TheLoop(L), ORE(ORE) {
114 // Populate values with existing loop metadata.
115 getHintsFromMetadata();
116
117 // force-vector-interleave overrides DisableInterleaving.
120
121 // If the metadata doesn't explicitly specify whether to enable scalable
122 // vectorization, then decide based on the following criteria (increasing
123 // level of priority):
124 // - Target default
125 // - Metadata width
126 // - Force option (always overrides)
128 if (TTI)
131
132 if (Width.Value)
133 // If the width is set, but the metadata says nothing about the scalable
134 // property, then assume it concerns only a fixed-width UserVF.
135 // If width is not set, the flag takes precedence.
136 Scalable.Value = SK_FixedWidthOnly;
137 }
138
139 // If the flag is set to force any use of scalable vectors, override the loop
140 // hints.
141 if (ForceScalableVectorization.getValue() !=
143 Scalable.Value = ForceScalableVectorization.getValue();
144
145 // Scalable vectorization is disabled if no preference is specified.
147 Scalable.Value = SK_FixedWidthOnly;
148
149 if (IsVectorized.Value != 1)
150 // If the vectorization width and interleaving count are both 1 then
151 // consider the loop to have been already vectorized because there's
152 // nothing more that we can do.
153 IsVectorized.Value =
155 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
156 << "LV: Interleaving disabled by the pass manager\n");
157}
158
160 LLVMContext &Context = TheLoop->getHeader()->getContext();
161
162 MDNode *IsVectorizedMD = MDNode::get(
163 Context,
164 {MDString::get(Context, "llvm.loop.isvectorized"),
165 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
166 MDNode *LoopID = TheLoop->getLoopID();
167 MDNode *NewLoopID =
168 makePostTransformationMetadata(Context, LoopID,
169 {Twine(Prefix(), "vectorize.").str(),
170 Twine(Prefix(), "interleave.").str()},
171 {IsVectorizedMD});
172 TheLoop->setLoopID(NewLoopID);
173
174 // Update internal cache.
175 IsVectorized.Value = 1;
176}
177
179 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
181 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
183 return false;
184 }
185
186 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
187 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
189 return false;
190 }
191
192 if (getIsVectorized() == 1) {
193 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
194 // FIXME: Add interleave.disable metadata. This will allow
195 // vectorize.disable to be used without disabling the pass and errors
196 // to differentiate between disabled vectorization and a width of 1.
197 ORE.emit([&]() {
199 "AllDisabled", L->getStartLoc(),
200 L->getHeader())
201 << "loop not vectorized: vectorization and interleaving are "
202 "explicitly disabled, or the loop has already been "
203 "vectorized";
204 });
205 return false;
206 }
207
208 return true;
209}
210
212 using namespace ore;
213
214 ORE.emit([&]() {
215 if (Force.Value == LoopVectorizeHints::FK_Disabled)
216 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
217 TheLoop->getStartLoc(),
218 TheLoop->getHeader())
219 << "loop not vectorized: vectorization is explicitly disabled";
220
221 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
222 TheLoop->getHeader());
223 R << "loop not vectorized";
224 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
225 R << " (Force=" << NV("Force", true);
226 if (Width.Value != 0)
227 R << ", Vector Width=" << NV("VectorWidth", getWidth());
228 if (getInterleave() != 0)
229 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
230 R << ")";
231 }
232 return R;
233 });
234}
235
238 return LV_NAME;
240 return LV_NAME;
242 return LV_NAME;
244}
245
247 // Allow the vectorizer to change the order of operations if enabling
248 // loop hints are provided
249 ElementCount EC = getWidth();
250 return HintsAllowReordering &&
252 EC.getKnownMinValue() > 1);
253}
254
255void LoopVectorizeHints::getHintsFromMetadata() {
256 MDNode *LoopID = TheLoop->getLoopID();
257 if (!LoopID)
258 return;
259
260 // First operand should refer to the loop id itself.
261 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
262 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
263
264 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
265 const MDString *S = nullptr;
267
268 // The expected hint is either a MDString or a MDNode with the first
269 // operand a MDString.
270 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
271 if (!MD || MD->getNumOperands() == 0)
272 continue;
273 S = dyn_cast<MDString>(MD->getOperand(0));
274 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
275 Args.push_back(MD->getOperand(Idx));
276 } else {
277 S = dyn_cast<MDString>(MDO);
278 assert(Args.size() == 0 && "too many arguments for MDString");
279 }
280
281 if (!S)
282 continue;
283
284 // Check if the hint starts with the loop metadata prefix.
285 StringRef Name = S->getString();
286 if (Args.size() == 1)
287 setHint(Name, Args[0]);
288 }
289}
290
291void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
292 if (!Name.starts_with(Prefix()))
293 return;
294 Name = Name.substr(Prefix().size(), StringRef::npos);
295
296 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
297 if (!C)
298 return;
299 unsigned Val = C->getZExtValue();
300
301 Hint *Hints[] = {&Width, &Interleave, &Force,
302 &IsVectorized, &Predicate, &Scalable};
303 for (auto *H : Hints) {
304 if (Name == H->Name) {
305 if (H->validate(Val))
306 H->Value = Val;
307 else
308 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
309 break;
310 }
311 }
312}
313
314// Return true if the inner loop \p Lp is uniform with regard to the outer loop
315// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
316// executing the inner loop will execute the same iterations). This check is
317// very constrained for now but it will be relaxed in the future. \p Lp is
318// considered uniform if it meets all the following conditions:
319// 1) it has a canonical IV (starting from 0 and with stride 1),
320// 2) its latch terminator is a conditional branch and,
321// 3) its latch condition is a compare instruction whose operands are the
322// canonical IV and an OuterLp invariant.
323// This check doesn't take into account the uniformity of other conditions not
324// related to the loop latch because they don't affect the loop uniformity.
325//
326// NOTE: We decided to keep all these checks and its associated documentation
327// together so that we can easily have a picture of the current supported loop
328// nests. However, some of the current checks don't depend on \p OuterLp and
329// would be redundantly executed for each \p Lp if we invoked this function for
330// different candidate outer loops. This is not the case for now because we
331// don't currently have the infrastructure to evaluate multiple candidate outer
332// loops and \p OuterLp will be a fixed parameter while we only support explicit
333// outer loop vectorization. It's also very likely that these checks go away
334// before introducing the aforementioned infrastructure. However, if this is not
335// the case, we should move the \p OuterLp independent checks to a separate
336// function that is only executed once for each \p Lp.
337static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
338 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
339
340 // If Lp is the outer loop, it's uniform by definition.
341 if (Lp == OuterLp)
342 return true;
343 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
344
345 // 1.
347 if (!IV) {
348 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
349 return false;
350 }
351
352 // 2.
353 BasicBlock *Latch = Lp->getLoopLatch();
354 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
355 if (!LatchBr || LatchBr->isUnconditional()) {
356 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
357 return false;
358 }
359
360 // 3.
361 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
362 if (!LatchCmp) {
364 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
365 return false;
366 }
367
368 Value *CondOp0 = LatchCmp->getOperand(0);
369 Value *CondOp1 = LatchCmp->getOperand(1);
370 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
371 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
372 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
373 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
374 return false;
375 }
376
377 return true;
378}
379
380// Return true if \p Lp and all its nested loops are uniform with regard to \p
381// OuterLp.
382static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
383 if (!isUniformLoop(Lp, OuterLp))
384 return false;
385
386 // Check if nested loops are uniform.
387 for (Loop *SubLp : *Lp)
388 if (!isUniformLoopNest(SubLp, OuterLp))
389 return false;
390
391 return true;
392}
393
395 if (Ty->isPointerTy())
396 return DL.getIntPtrType(Ty);
397
398 // It is possible that char's or short's overflow when we ask for the loop's
399 // trip count, work around this by changing the type size.
400 if (Ty->getScalarSizeInBits() < 32)
401 return Type::getInt32Ty(Ty->getContext());
402
403 return Ty;
404}
405
406static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
409 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
410 return Ty0;
411 return Ty1;
412}
413
414/// Check that the instruction has outside loop users and is not an
415/// identified reduction variable.
416static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
417 SmallPtrSetImpl<Value *> &AllowedExit) {
418 // Reductions, Inductions and non-header phis are allowed to have exit users. All
419 // other instructions must not have external users.
420 if (!AllowedExit.count(Inst))
421 // Check that all of the users of the loop are inside the BB.
422 for (User *U : Inst->users()) {
423 Instruction *UI = cast<Instruction>(U);
424 // This user may be a reduction exit value.
425 if (!TheLoop->contains(UI)) {
426 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
427 return true;
428 }
429 }
430 return false;
431}
432
433/// Returns true if A and B have same pointer operands or same SCEVs addresses
435 StoreInst *B) {
436 // Compare store
437 if (A == B)
438 return true;
439
440 // Otherwise Compare pointers
441 Value *APtr = A->getPointerOperand();
442 Value *BPtr = B->getPointerOperand();
443 if (APtr == BPtr)
444 return true;
445
446 // Otherwise compare address SCEVs
447 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
448}
449
451 Value *Ptr) const {
452 // FIXME: Currently, the set of symbolic strides is sometimes queried before
453 // it's collected. This happens from canVectorizeWithIfConvert, when the
454 // pointer is checked to reference consecutive elements suitable for a
455 // masked access.
456 const auto &Strides =
458
459 Function *F = TheLoop->getHeader()->getParent();
460 bool OptForSize = F->hasOptSize() ||
461 llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
463 bool CanAddPredicate = !OptForSize;
464 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
465 CanAddPredicate, false).value_or(0);
466 if (Stride == 1 || Stride == -1)
467 return Stride;
468 return 0;
469}
470
472 return LAI->isInvariant(V);
473}
474
475namespace {
476/// A rewriter to build the SCEVs for each of the VF lanes in the expected
477/// vectorized loop, which can then be compared to detect their uniformity. This
478/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
479/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
480/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
481/// uniformity.
482class SCEVAddRecForUniformityRewriter
483 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
484 /// Multiplier to be applied to the step of AddRecs in TheLoop.
485 unsigned StepMultiplier;
486
487 /// Offset to be added to the AddRecs in TheLoop.
488 unsigned Offset;
489
490 /// Loop for which to rewrite AddRecsFor.
491 Loop *TheLoop;
492
493 /// Is any sub-expressions not analyzable w.r.t. uniformity?
494 bool CannotAnalyze = false;
495
496 bool canAnalyze() const { return !CannotAnalyze; }
497
498public:
499 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
500 unsigned Offset, Loop *TheLoop)
501 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
502 TheLoop(TheLoop) {}
503
504 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
505 assert(Expr->getLoop() == TheLoop &&
506 "addrec outside of TheLoop must be invariant and should have been "
507 "handled earlier");
508 // Build a new AddRec by multiplying the step by StepMultiplier and
509 // incrementing the start by Offset * step.
510 Type *Ty = Expr->getType();
511 const SCEV *Step = Expr->getStepRecurrence(SE);
512 if (!SE.isLoopInvariant(Step, TheLoop)) {
513 CannotAnalyze = true;
514 return Expr;
515 }
516 const SCEV *NewStep =
517 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
518 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
519 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
520 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
521 }
522
523 const SCEV *visit(const SCEV *S) {
524 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
525 return S;
527 }
528
529 const SCEV *visitUnknown(const SCEVUnknown *S) {
530 if (SE.isLoopInvariant(S, TheLoop))
531 return S;
532 // The value could vary across iterations.
533 CannotAnalyze = true;
534 return S;
535 }
536
537 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
538 // Could not analyze the expression.
539 CannotAnalyze = true;
540 return S;
541 }
542
543 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
544 unsigned StepMultiplier, unsigned Offset,
545 Loop *TheLoop) {
546 /// Bail out if the expression does not contain an UDiv expression.
547 /// Uniform values which are not loop invariant require operations to strip
548 /// out the lowest bits. For now just look for UDivs and use it to avoid
549 /// re-writing UDIV-free expressions for other lanes to limit compile time.
550 if (!SCEVExprContains(S,
551 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
552 return SE.getCouldNotCompute();
553
554 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
555 TheLoop);
556 const SCEV *Result = Rewriter.visit(S);
557
558 if (Rewriter.canAnalyze())
559 return Result;
560 return SE.getCouldNotCompute();
561 }
562};
563
564} // namespace
565
567 if (isInvariant(V))
568 return true;
569 if (VF.isScalable())
570 return false;
571 if (VF.isScalar())
572 return true;
573
574 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
575 // never considered uniform.
576 auto *SE = PSE.getSE();
577 if (!SE->isSCEVable(V->getType()))
578 return false;
579 const SCEV *S = SE->getSCEV(V);
580
581 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
582 // lane 0 matches the expressions for all other lanes.
583 unsigned FixedVF = VF.getKnownMinValue();
584 const SCEV *FirstLaneExpr =
585 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
586 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
587 return false;
588
589 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
590 // lane 0. We check lanes in reverse order for compile-time, as frequently
591 // checking the last lane is sufficient to rule out uniformity.
592 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
593 const SCEV *IthLaneExpr =
594 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
595 return FirstLaneExpr == IthLaneExpr;
596 });
597}
598
600 ElementCount VF) const {
602 if (!Ptr)
603 return false;
604 // Note: There's nothing inherent which prevents predicated loads and
605 // stores from being uniform. The current lowering simply doesn't handle
606 // it; in particular, the cost model distinguishes scatter/gather from
607 // scalar w/predication, and we currently rely on the scalar path.
608 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
609}
610
611bool LoopVectorizationLegality::canVectorizeOuterLoop() {
612 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
613 // Store the result and return it at the end instead of exiting early, in case
614 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
615 bool Result = true;
616 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
617
618 for (BasicBlock *BB : TheLoop->blocks()) {
619 // Check whether the BB terminator is a BranchInst. Any other terminator is
620 // not supported yet.
621 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
622 if (!Br) {
623 reportVectorizationFailure("Unsupported basic block terminator",
624 "loop control flow is not understood by vectorizer",
625 "CFGNotUnderstood", ORE, TheLoop);
626 if (DoExtraAnalysis)
627 Result = false;
628 else
629 return false;
630 }
631
632 // Check whether the BranchInst is a supported one. Only unconditional
633 // branches, conditional branches with an outer loop invariant condition or
634 // backedges are supported.
635 // FIXME: We skip these checks when VPlan predication is enabled as we
636 // want to allow divergent branches. This whole check will be removed
637 // once VPlan predication is on by default.
638 if (Br && Br->isConditional() &&
639 !TheLoop->isLoopInvariant(Br->getCondition()) &&
640 !LI->isLoopHeader(Br->getSuccessor(0)) &&
641 !LI->isLoopHeader(Br->getSuccessor(1))) {
642 reportVectorizationFailure("Unsupported conditional branch",
643 "loop control flow is not understood by vectorizer",
644 "CFGNotUnderstood", ORE, TheLoop);
645 if (DoExtraAnalysis)
646 Result = false;
647 else
648 return false;
649 }
650 }
651
652 // Check whether inner loops are uniform. At this point, we only support
653 // simple outer loops scenarios with uniform nested loops.
654 if (!isUniformLoopNest(TheLoop /*loop nest*/,
655 TheLoop /*context outer loop*/)) {
656 reportVectorizationFailure("Outer loop contains divergent loops",
657 "loop control flow is not understood by vectorizer",
658 "CFGNotUnderstood", ORE, TheLoop);
659 if (DoExtraAnalysis)
660 Result = false;
661 else
662 return false;
663 }
664
665 // Check whether we are able to set up outer loop induction.
666 if (!setupOuterLoopInductions()) {
667 reportVectorizationFailure("Unsupported outer loop Phi(s)",
668 "Unsupported outer loop Phi(s)",
669 "UnsupportedPhi", ORE, TheLoop);
670 if (DoExtraAnalysis)
671 Result = false;
672 else
673 return false;
674 }
675
676 return Result;
677}
678
679void LoopVectorizationLegality::addInductionPhi(
680 PHINode *Phi, const InductionDescriptor &ID,
681 SmallPtrSetImpl<Value *> &AllowedExit) {
682 Inductions[Phi] = ID;
683
684 // In case this induction also comes with casts that we know we can ignore
685 // in the vectorized loop body, record them here. All casts could be recorded
686 // here for ignoring, but suffices to record only the first (as it is the
687 // only one that may bw used outside the cast sequence).
688 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
689 if (!Casts.empty())
690 InductionCastsToIgnore.insert(*Casts.begin());
691
692 Type *PhiTy = Phi->getType();
693 const DataLayout &DL = Phi->getDataLayout();
694
695 // Get the widest type.
696 if (!PhiTy->isFloatingPointTy()) {
697 if (!WidestIndTy)
698 WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
699 else
700 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
701 }
702
703 // Int inductions are special because we only allow one IV.
704 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
705 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
706 isa<Constant>(ID.getStartValue()) &&
707 cast<Constant>(ID.getStartValue())->isNullValue()) {
708
709 // Use the phi node with the widest type as induction. Use the last
710 // one if there are multiple (no good reason for doing this other
711 // than it is expedient). We've checked that it begins at zero and
712 // steps by one, so this is a canonical induction variable.
713 if (!PrimaryInduction || PhiTy == WidestIndTy)
714 PrimaryInduction = Phi;
715 }
716
717 // Both the PHI node itself, and the "post-increment" value feeding
718 // back into the PHI node may have external users.
719 // We can allow those uses, except if the SCEVs we have for them rely
720 // on predicates that only hold within the loop, since allowing the exit
721 // currently means re-using this SCEV outside the loop (see PR33706 for more
722 // details).
723 if (PSE.getPredicate().isAlwaysTrue()) {
724 AllowedExit.insert(Phi);
725 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
726 }
727
728 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
729}
730
731bool LoopVectorizationLegality::setupOuterLoopInductions() {
732 BasicBlock *Header = TheLoop->getHeader();
733
734 // Returns true if a given Phi is a supported induction.
735 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
737 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
739 addInductionPhi(&Phi, ID, AllowedExit);
740 return true;
741 }
742 // Bail out for any Phi in the outer loop header that is not a supported
743 // induction.
745 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
746 return false;
747 };
748
749 return llvm::all_of(Header->phis(), IsSupportedPhi);
750}
751
752/// Checks if a function is scalarizable according to the TLI, in
753/// the sense that it should be vectorized and then expanded in
754/// multiple scalar calls. This is represented in the
755/// TLI via mappings that do not specify a vector name, as in the
756/// following example:
757///
758/// const VecDesc VecIntrinsics[] = {
759/// {"llvm.phx.abs.i32", "", 4}
760/// };
761static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
762 const StringRef ScalarName = CI.getCalledFunction()->getName();
763 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
764 // Check that all known VFs are not associated to a vector
765 // function, i.e. the vector name is emty.
766 if (Scalarize) {
767 ElementCount WidestFixedVF, WidestScalableVF;
768 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
770 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
771 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
773 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
774 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
775 assert((WidestScalableVF.isZero() || !Scalarize) &&
776 "Caller may decide to scalarize a variant using a scalable VF");
777 }
778 return Scalarize;
779}
780
781bool LoopVectorizationLegality::canVectorizeInstrs() {
782 BasicBlock *Header = TheLoop->getHeader();
783
784 // For each block in the loop.
785 for (BasicBlock *BB : TheLoop->blocks()) {
786 // Scan the instructions in the block and look for hazards.
787 for (Instruction &I : *BB) {
788 if (auto *Phi = dyn_cast<PHINode>(&I)) {
789 Type *PhiTy = Phi->getType();
790 // Check that this PHI type is allowed.
791 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
792 !PhiTy->isPointerTy()) {
793 reportVectorizationFailure("Found a non-int non-pointer PHI",
794 "loop control flow is not understood by vectorizer",
795 "CFGNotUnderstood", ORE, TheLoop);
796 return false;
797 }
798
799 // If this PHINode is not in the header block, then we know that we
800 // can convert it to select during if-conversion. No need to check if
801 // the PHIs in this block are induction or reduction variables.
802 if (BB != Header) {
803 // Non-header phi nodes that have outside uses can be vectorized. Add
804 // them to the list of allowed exits.
805 // Unsafe cyclic dependencies with header phis are identified during
806 // legalization for reduction, induction and fixed order
807 // recurrences.
808 AllowedExit.insert(&I);
809 continue;
810 }
811
812 // We only allow if-converted PHIs with exactly two incoming values.
813 if (Phi->getNumIncomingValues() != 2) {
814 reportVectorizationFailure("Found an invalid PHI",
815 "loop control flow is not understood by vectorizer",
816 "CFGNotUnderstood", ORE, TheLoop, Phi);
817 return false;
818 }
819
821 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
822 DT, PSE.getSE())) {
823 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
824 AllowedExit.insert(RedDes.getLoopExitInstr());
825 Reductions[Phi] = RedDes;
826 continue;
827 }
828
829 // We prevent matching non-constant strided pointer IVS to preserve
830 // historical vectorizer behavior after a generalization of the
831 // IVDescriptor code. The intent is to remove this check, but we
832 // have to fix issues around code quality for such loops first.
833 auto IsDisallowedStridedPointerInduction =
834 [](const InductionDescriptor &ID) {
836 return false;
837 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
838 ID.getConstIntStepValue() == nullptr;
839 };
840
841 // TODO: Instead of recording the AllowedExit, it would be good to
842 // record the complementary set: NotAllowedExit. These include (but may
843 // not be limited to):
844 // 1. Reduction phis as they represent the one-before-last value, which
845 // is not available when vectorized
846 // 2. Induction phis and increment when SCEV predicates cannot be used
847 // outside the loop - see addInductionPhi
848 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
849 // outside the loop - see call to hasOutsideLoopUser in the non-phi
850 // handling below
851 // 4. FixedOrderRecurrence phis that can possibly be handled by
852 // extraction.
853 // By recording these, we can then reason about ways to vectorize each
854 // of these NotAllowedExit.
856 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
857 !IsDisallowedStridedPointerInduction(ID)) {
858 addInductionPhi(Phi, ID, AllowedExit);
859 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
860 continue;
861 }
862
863 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
864 AllowedExit.insert(Phi);
865 FixedOrderRecurrences.insert(Phi);
866 continue;
867 }
868
869 // As a last resort, coerce the PHI to a AddRec expression
870 // and re-try classifying it a an induction PHI.
871 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
872 !IsDisallowedStridedPointerInduction(ID)) {
873 addInductionPhi(Phi, ID, AllowedExit);
874 continue;
875 }
876
877 reportVectorizationFailure("Found an unidentified PHI",
878 "value that could not be identified as "
879 "reduction is used outside the loop",
880 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
881 return false;
882 } // end of PHI handling
883
884 // We handle calls that:
885 // * Are debug info intrinsics.
886 // * Have a mapping to an IR intrinsic.
887 // * Have a vector version available.
888 auto *CI = dyn_cast<CallInst>(&I);
889
890 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
891 !isa<DbgInfoIntrinsic>(CI) &&
892 !(CI->getCalledFunction() && TLI &&
893 (!VFDatabase::getMappings(*CI).empty() ||
894 isTLIScalarize(*TLI, *CI)))) {
895 // If the call is a recognized math libary call, it is likely that
896 // we can vectorize it given loosened floating-point constraints.
898 bool IsMathLibCall =
899 TLI && CI->getCalledFunction() &&
900 CI->getType()->isFloatingPointTy() &&
901 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
902 TLI->hasOptimizedCodeGen(Func);
903
904 if (IsMathLibCall) {
905 // TODO: Ideally, we should not use clang-specific language here,
906 // but it's hard to provide meaningful yet generic advice.
907 // Also, should this be guarded by allowExtraAnalysis() and/or be part
908 // of the returned info from isFunctionVectorizable()?
910 "Found a non-intrinsic callsite",
911 "library call cannot be vectorized. "
912 "Try compiling with -fno-math-errno, -ffast-math, "
913 "or similar flags",
914 "CantVectorizeLibcall", ORE, TheLoop, CI);
915 } else {
916 reportVectorizationFailure("Found a non-intrinsic callsite",
917 "call instruction cannot be vectorized",
918 "CantVectorizeLibcall", ORE, TheLoop, CI);
919 }
920 return false;
921 }
922
923 // Some intrinsics have scalar arguments and should be same in order for
924 // them to be vectorized (i.e. loop invariant).
925 if (CI) {
926 auto *SE = PSE.getSE();
927 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
928 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
930 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)),
931 TheLoop)) {
932 reportVectorizationFailure("Found unvectorizable intrinsic",
933 "intrinsic instruction cannot be vectorized",
934 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
935 return false;
936 }
937 }
938 }
939
940 // If we found a vectorized variant of a function, note that so LV can
941 // make better decisions about maximum VF.
942 if (CI && !VFDatabase::getMappings(*CI).empty())
943 VecCallVariantsFound = true;
944
945 // Check that the instruction return type is vectorizable.
946 // Also, we can't vectorize extractelement instructions.
947 if ((!VectorType::isValidElementType(I.getType()) &&
948 !I.getType()->isVoidTy()) ||
949 isa<ExtractElementInst>(I)) {
950 reportVectorizationFailure("Found unvectorizable type",
951 "instruction return type cannot be vectorized",
952 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
953 return false;
954 }
955
956 // Check that the stored type is vectorizable.
957 if (auto *ST = dyn_cast<StoreInst>(&I)) {
958 Type *T = ST->getValueOperand()->getType();
960 reportVectorizationFailure("Store instruction cannot be vectorized",
961 "store instruction cannot be vectorized",
962 "CantVectorizeStore", ORE, TheLoop, ST);
963 return false;
964 }
965
966 // For nontemporal stores, check that a nontemporal vector version is
967 // supported on the target.
968 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
969 // Arbitrarily try a vector of 2 elements.
970 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
971 assert(VecTy && "did not find vectorized version of stored type");
972 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
974 "nontemporal store instruction cannot be vectorized",
975 "nontemporal store instruction cannot be vectorized",
976 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
977 return false;
978 }
979 }
980
981 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
982 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
983 // For nontemporal loads, check that a nontemporal vector version is
984 // supported on the target (arbitrarily try a vector of 2 elements).
985 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
986 assert(VecTy && "did not find vectorized version of load type");
987 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
989 "nontemporal load instruction cannot be vectorized",
990 "nontemporal load instruction cannot be vectorized",
991 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
992 return false;
993 }
994 }
995
996 // FP instructions can allow unsafe algebra, thus vectorizable by
997 // non-IEEE-754 compliant SIMD units.
998 // This applies to floating-point math operations and calls, not memory
999 // operations, shuffles, or casts, as they don't change precision or
1000 // semantics.
1001 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1002 !I.isFast()) {
1003 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1004 Hints->setPotentiallyUnsafe();
1005 }
1006
1007 // Reduction instructions are allowed to have exit users.
1008 // All other instructions must not have external users.
1009 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1010 // We can safely vectorize loops where instructions within the loop are
1011 // used outside the loop only if the SCEV predicates within the loop is
1012 // same as outside the loop. Allowing the exit means reusing the SCEV
1013 // outside the loop.
1014 if (PSE.getPredicate().isAlwaysTrue()) {
1015 AllowedExit.insert(&I);
1016 continue;
1017 }
1018 reportVectorizationFailure("Value cannot be used outside the loop",
1019 "value cannot be used outside the loop",
1020 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1021 return false;
1022 }
1023 } // next instr.
1024 }
1025
1026 if (!PrimaryInduction) {
1027 if (Inductions.empty()) {
1028 reportVectorizationFailure("Did not find one integer induction var",
1029 "loop induction variable could not be identified",
1030 "NoInductionVariable", ORE, TheLoop);
1031 return false;
1032 }
1033 if (!WidestIndTy) {
1034 reportVectorizationFailure("Did not find one integer induction var",
1035 "integer loop induction variable could not be identified",
1036 "NoIntegerInductionVariable", ORE, TheLoop);
1037 return false;
1038 }
1039 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
1040 }
1041
1042 // Now we know the widest induction type, check if our found induction
1043 // is the same size. If it's not, unset it here and InnerLoopVectorizer
1044 // will create another.
1045 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1046 PrimaryInduction = nullptr;
1047
1048 return true;
1049}
1050
1051bool LoopVectorizationLegality::canVectorizeMemory() {
1052 LAI = &LAIs.getInfo(*TheLoop);
1053 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1054 if (LAR) {
1055 ORE->emit([&]() {
1056 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1057 "loop not vectorized: ", *LAR);
1058 });
1059 }
1060
1061 if (!LAI->canVectorizeMemory())
1062 return false;
1063
1065 reportVectorizationFailure("We don't allow storing to uniform addresses",
1066 "write to a loop invariant address could not "
1067 "be vectorized",
1068 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1069 TheLoop);
1070 return false;
1071 }
1072
1073 // We can vectorize stores to invariant address when final reduction value is
1074 // guaranteed to be stored at the end of the loop. Also, if decision to
1075 // vectorize loop is made, runtime checks are added so as to make sure that
1076 // invariant address won't alias with any other objects.
1077 if (!LAI->getStoresToInvariantAddresses().empty()) {
1078 // For each invariant address, check if last stored value is unconditional
1079 // and the address is not calculated inside the loop.
1080 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1082 continue;
1083
1084 if (blockNeedsPredication(SI->getParent())) {
1086 "We don't allow storing to uniform addresses",
1087 "write of conditional recurring variant value to a loop "
1088 "invariant address could not be vectorized",
1089 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1090 return false;
1091 }
1092
1093 // Invariant address should be defined outside of loop. LICM pass usually
1094 // makes sure it happens, but in rare cases it does not, we do not want
1095 // to overcomplicate vectorization to support this case.
1096 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1097 if (TheLoop->contains(Ptr)) {
1099 "Invariant address is calculated inside the loop",
1100 "write to a loop invariant address could not "
1101 "be vectorized",
1102 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1103 return false;
1104 }
1105 }
1106 }
1107
1109 // For each invariant address, check its last stored value is the result
1110 // of one of our reductions.
1111 //
1112 // We do not check if dependence with loads exists because that is already
1113 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1114 ScalarEvolution *SE = PSE.getSE();
1115 SmallVector<StoreInst *, 4> UnhandledStores;
1116 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1118 // Earlier stores to this address are effectively deadcode.
1119 // With opaque pointers it is possible for one pointer to be used with
1120 // different sizes of stored values:
1121 // store i32 0, ptr %x
1122 // store i8 0, ptr %x
1123 // The latest store doesn't complitely overwrite the first one in the
1124 // example. That is why we have to make sure that types of stored
1125 // values are same.
1126 // TODO: Check that bitwidth of unhandled store is smaller then the
1127 // one that overwrites it and add a test.
1128 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1129 return storeToSameAddress(SE, SI, I) &&
1130 I->getValueOperand()->getType() ==
1131 SI->getValueOperand()->getType();
1132 });
1133 continue;
1134 }
1135 UnhandledStores.push_back(SI);
1136 }
1137
1138 bool IsOK = UnhandledStores.empty();
1139 // TODO: we should also validate against InvariantMemSets.
1140 if (!IsOK) {
1142 "We don't allow storing to uniform addresses",
1143 "write to a loop invariant address could not "
1144 "be vectorized",
1145 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1146 return false;
1147 }
1148 }
1149 }
1150
1151 PSE.addPredicate(LAI->getPSE().getPredicate());
1152 return true;
1153}
1154
1156 bool EnableStrictReductions) {
1157
1158 // First check if there is any ExactFP math or if we allow reassociations
1159 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1160 return true;
1161
1162 // If the above is false, we have ExactFPMath & do not allow reordering.
1163 // If the EnableStrictReductions flag is set, first check if we have any
1164 // Exact FP induction vars, which we cannot vectorize.
1165 if (!EnableStrictReductions ||
1166 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1167 InductionDescriptor IndDesc = Induction.second;
1168 return IndDesc.getExactFPMathInst();
1169 }))
1170 return false;
1171
1172 // We can now only vectorize if all reductions with Exact FP math also
1173 // have the isOrdered flag set, which indicates that we can move the
1174 // reduction operations in-loop.
1175 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1176 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1177 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1178 }));
1179}
1180
1182 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1183 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1184 return RdxDesc.IntermediateStore == SI;
1185 });
1186}
1187
1189 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1190 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1191 if (!RdxDesc.IntermediateStore)
1192 return false;
1193
1194 ScalarEvolution *SE = PSE.getSE();
1195 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1196 return V == InvariantAddress ||
1197 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1198 });
1199}
1200
1202 Value *In0 = const_cast<Value *>(V);
1203 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1204 if (!PN)
1205 return false;
1206
1207 return Inductions.count(PN);
1208}
1209
1210const InductionDescriptor *
1212 if (!isInductionPhi(Phi))
1213 return nullptr;
1214 auto &ID = getInductionVars().find(Phi)->second;
1215 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1217 return &ID;
1218 return nullptr;
1219}
1220
1221const InductionDescriptor *
1223 if (!isInductionPhi(Phi))
1224 return nullptr;
1225 auto &ID = getInductionVars().find(Phi)->second;
1227 return &ID;
1228 return nullptr;
1229}
1230
1232 const Value *V) const {
1233 auto *Inst = dyn_cast<Instruction>(V);
1234 return (Inst && InductionCastsToIgnore.count(Inst));
1235}
1236
1239}
1240
1242 const PHINode *Phi) const {
1243 return FixedOrderRecurrences.count(Phi);
1244}
1245
1247 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1248}
1249
1250bool LoopVectorizationLegality::blockCanBePredicated(
1251 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1252 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1253 for (Instruction &I : *BB) {
1254 // We can predicate blocks with calls to assume, as long as we drop them in
1255 // case we flatten the CFG via predication.
1256 if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1257 MaskedOp.insert(&I);
1258 continue;
1259 }
1260
1261 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1262 // TODO: there might be cases that it should block the vectorization. Let's
1263 // ignore those for now.
1264 if (isa<NoAliasScopeDeclInst>(&I))
1265 continue;
1266
1267 // We can allow masked calls if there's at least one vector variant, even
1268 // if we end up scalarizing due to the cost model calculations.
1269 // TODO: Allow other calls if they have appropriate attributes... readonly
1270 // and argmemonly?
1271 if (CallInst *CI = dyn_cast<CallInst>(&I))
1273 MaskedOp.insert(CI);
1274 continue;
1275 }
1276
1277 // Loads are handled via masking (or speculated if safe to do so.)
1278 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1279 if (!SafePtrs.count(LI->getPointerOperand()))
1280 MaskedOp.insert(LI);
1281 continue;
1282 }
1283
1284 // Predicated store requires some form of masking:
1285 // 1) masked store HW instruction,
1286 // 2) emulation via load-blend-store (only if safe and legal to do so,
1287 // be aware on the race conditions), or
1288 // 3) element-by-element predicate check and scalar store.
1289 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1290 MaskedOp.insert(SI);
1291 continue;
1292 }
1293
1294 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1295 return false;
1296 }
1297
1298 return true;
1299}
1300
1301bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1302 if (!EnableIfConversion) {
1303 reportVectorizationFailure("If-conversion is disabled",
1304 "if-conversion is disabled",
1305 "IfConversionDisabled",
1306 ORE, TheLoop);
1307 return false;
1308 }
1309
1310 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1311
1312 // A list of pointers which are known to be dereferenceable within scope of
1313 // the loop body for each iteration of the loop which executes. That is,
1314 // the memory pointed to can be dereferenced (with the access size implied by
1315 // the value's type) unconditionally within the loop header without
1316 // introducing a new fault.
1317 SmallPtrSet<Value *, 8> SafePointers;
1318
1319 // Collect safe addresses.
1320 for (BasicBlock *BB : TheLoop->blocks()) {
1321 if (!blockNeedsPredication(BB)) {
1322 for (Instruction &I : *BB)
1323 if (auto *Ptr = getLoadStorePointerOperand(&I))
1324 SafePointers.insert(Ptr);
1325 continue;
1326 }
1327
1328 // For a block which requires predication, a address may be safe to access
1329 // in the loop w/o predication if we can prove dereferenceability facts
1330 // sufficient to ensure it'll never fault within the loop. For the moment,
1331 // we restrict this to loads; stores are more complicated due to
1332 // concurrency restrictions.
1333 ScalarEvolution &SE = *PSE.getSE();
1334 for (Instruction &I : *BB) {
1335 LoadInst *LI = dyn_cast<LoadInst>(&I);
1336 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1337 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
1338 SafePointers.insert(LI->getPointerOperand());
1339 }
1340 }
1341
1342 // Collect the blocks that need predication.
1343 for (BasicBlock *BB : TheLoop->blocks()) {
1344 // We support only branches and switch statements as terminators inside the
1345 // loop.
1346 if (isa<SwitchInst>(BB->getTerminator())) {
1347 if (TheLoop->isLoopExiting(BB)) {
1348 reportVectorizationFailure("Loop contains an unsupported switch",
1349 "loop contains an unsupported switch",
1350 "LoopContainsUnsupportedSwitch", ORE,
1351 TheLoop, BB->getTerminator());
1352 return false;
1353 }
1354 } else if (!isa<BranchInst>(BB->getTerminator())) {
1355 reportVectorizationFailure("Loop contains an unsupported terminator",
1356 "loop contains an unsupported terminator",
1357 "LoopContainsUnsupportedTerminator", ORE,
1358 TheLoop, BB->getTerminator());
1359 return false;
1360 }
1361
1362 // We must be able to predicate all blocks that need to be predicated.
1363 if (blockNeedsPredication(BB) &&
1364 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1366 "Control flow cannot be substituted for a select",
1367 "control flow cannot be substituted for a select", "NoCFGForSelect",
1368 ORE, TheLoop, BB->getTerminator());
1369 return false;
1370 }
1371 }
1372
1373 // We can if-convert this loop.
1374 return true;
1375}
1376
1377// Helper function to canVectorizeLoopNestCFG.
1378bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1379 bool UseVPlanNativePath) {
1380 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1381 "VPlan-native path is not enabled.");
1382
1383 // TODO: ORE should be improved to show more accurate information when an
1384 // outer loop can't be vectorized because a nested loop is not understood or
1385 // legal. Something like: "outer_loop_location: loop not vectorized:
1386 // (inner_loop_location) loop control flow is not understood by vectorizer".
1387
1388 // Store the result and return it at the end instead of exiting early, in case
1389 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1390 bool Result = true;
1391 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1392
1393 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1394 // be canonicalized.
1395 if (!Lp->getLoopPreheader()) {
1396 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1397 "loop control flow is not understood by vectorizer",
1398 "CFGNotUnderstood", ORE, TheLoop);
1399 if (DoExtraAnalysis)
1400 Result = false;
1401 else
1402 return false;
1403 }
1404
1405 // We must have a single backedge.
1406 if (Lp->getNumBackEdges() != 1) {
1407 reportVectorizationFailure("The loop must have a single backedge",
1408 "loop control flow is not understood by vectorizer",
1409 "CFGNotUnderstood", ORE, TheLoop);
1410 if (DoExtraAnalysis)
1411 Result = false;
1412 else
1413 return false;
1414 }
1415
1416 return Result;
1417}
1418
1419bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1420 Loop *Lp, bool UseVPlanNativePath) {
1421 // Store the result and return it at the end instead of exiting early, in case
1422 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1423 bool Result = true;
1424 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1425 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1426 if (DoExtraAnalysis)
1427 Result = false;
1428 else
1429 return false;
1430 }
1431
1432 // Recursively check whether the loop control flow of nested loops is
1433 // understood.
1434 for (Loop *SubLp : *Lp)
1435 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1436 if (DoExtraAnalysis)
1437 Result = false;
1438 else
1439 return false;
1440 }
1441
1442 return Result;
1443}
1444
1445bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1446 // Store the result and return it at the end instead of exiting early, in case
1447 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1448 bool Result = true;
1449
1450 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1451 // Check whether the loop-related control flow in the loop nest is expected by
1452 // vectorizer.
1453 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1454 if (DoExtraAnalysis)
1455 Result = false;
1456 else
1457 return false;
1458 }
1459
1460 // We need to have a loop header.
1461 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1462 << '\n');
1463
1464 // Specific checks for outer loops. We skip the remaining legal checks at this
1465 // point because they don't support outer loops.
1466 if (!TheLoop->isInnermost()) {
1467 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1468
1469 if (!canVectorizeOuterLoop()) {
1470 reportVectorizationFailure("Unsupported outer loop",
1471 "unsupported outer loop",
1472 "UnsupportedOuterLoop",
1473 ORE, TheLoop);
1474 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1475 // outer loops.
1476 return false;
1477 }
1478
1479 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1480 return Result;
1481 }
1482
1483 assert(TheLoop->isInnermost() && "Inner loop expected.");
1484 // Check if we can if-convert non-single-bb loops.
1485 unsigned NumBlocks = TheLoop->getNumBlocks();
1486 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1487 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1488 if (DoExtraAnalysis)
1489 Result = false;
1490 else
1491 return false;
1492 }
1493
1494 // Check if we can vectorize the instructions and CFG in this loop.
1495 if (!canVectorizeInstrs()) {
1496 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1497 if (DoExtraAnalysis)
1498 Result = false;
1499 else
1500 return false;
1501 }
1502
1503 // Go over each instruction and look at memory deps.
1504 if (!canVectorizeMemory()) {
1505 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1506 if (DoExtraAnalysis)
1507 Result = false;
1508 else
1509 return false;
1510 }
1511
1512 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1513 reportVectorizationFailure("could not determine number of loop iterations",
1514 "could not determine number of loop iterations",
1515 "CantComputeNumberOfIterations", ORE, TheLoop);
1516 if (DoExtraAnalysis)
1517 Result = false;
1518 else
1519 return false;
1520 }
1521
1522 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1524 ? " (with a runtime bound check)"
1525 : "")
1526 << "!\n");
1527
1528 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1529 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1530 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1531
1532 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1533 reportVectorizationFailure("Too many SCEV checks needed",
1534 "Too many SCEV assumptions need to be made and checked at runtime",
1535 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1536 if (DoExtraAnalysis)
1537 Result = false;
1538 else
1539 return false;
1540 }
1541
1542 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1543 // we can vectorize, and at this point we don't have any other mem analysis
1544 // which may limit our maximum vectorization factor, so just return true with
1545 // no restrictions.
1546 return Result;
1547}
1548
1550
1551 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1552
1553 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1554
1555 for (const auto &Reduction : getReductionVars())
1556 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1557
1558 // TODO: handle non-reduction outside users when tail is folded by masking.
1559 for (auto *AE : AllowedExit) {
1560 // Check that all users of allowed exit values are inside the loop or
1561 // are the live-out of a reduction.
1562 if (ReductionLiveOuts.count(AE))
1563 continue;
1564 for (User *U : AE->users()) {
1565 Instruction *UI = cast<Instruction>(U);
1566 if (TheLoop->contains(UI))
1567 continue;
1568 LLVM_DEBUG(
1569 dbgs()
1570 << "LV: Cannot fold tail by masking, loop has an outside user for "
1571 << *UI << "\n");
1572 return false;
1573 }
1574 }
1575
1576 for (const auto &Entry : getInductionVars()) {
1577 PHINode *OrigPhi = Entry.first;
1578 for (User *U : OrigPhi->users()) {
1579 auto *UI = cast<Instruction>(U);
1580 if (!TheLoop->contains(UI)) {
1581 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1582 "outside user for "
1583 << *UI << "\n");
1584 return false;
1585 }
1586 }
1587 }
1588
1589 // The list of pointers that we can safely read and write to remains empty.
1590 SmallPtrSet<Value *, 8> SafePointers;
1591
1592 // Check all blocks for predication, including those that ordinarily do not
1593 // need predication such as the header block.
1595 for (BasicBlock *BB : TheLoop->blocks()) {
1596 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1597 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1598 return false;
1599 }
1600 }
1601
1602 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1603
1604 return true;
1605}
1606
1608 // The list of pointers that we can safely read and write to remains empty.
1609 SmallPtrSet<Value *, 8> SafePointers;
1610
1611 // Mark all blocks for predication, including those that ordinarily do not
1612 // need predication such as the header block.
1613 for (BasicBlock *BB : TheLoop->blocks()) {
1614 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1615 assert(R && "Must be able to predicate block when tail-folding.");
1616 }
1617}
1618
1619} // namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
#define DEBUG_TYPE
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
if(PassOpts->AAPipeline)
static bool rewrite(Function &F)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
Value * getPointerOperand()
Definition: Instructions.h:253
const LoopAccessInfo & getInfo(Loop &L)
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:632
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:151
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:502
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
size_type count(const KeyT &Key) const
Definition: MapVector.h:165
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
Root of the metadata hierarchy.
Definition: Metadata.h:62
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:70
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
Value * getPointerOperand()
Definition: Instructions.h:377
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:82
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:71
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:671
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: Loads.cpp:357
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:262
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1158
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
An object of this class is returned by queries that could not be answered.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.