LLVM 20.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
18#include "llvm/Analysis/Loads.h"
29
30using namespace llvm;
31using namespace PatternMatch;
32
33#define LV_NAME "loop-vectorize"
34#define DEBUG_TYPE LV_NAME
35
36static cl::opt<bool>
37 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38 cl::desc("Enable if-conversion during vectorization."));
39
40static cl::opt<bool>
41AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
42 cl::desc("Enable recognition of non-constant strided "
43 "pointer induction variables."));
44
45namespace llvm {
47 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
48 cl::desc("Allow enabling loop hints to reorder "
49 "FP operations during vectorization."));
50} // namespace llvm
51
52// TODO: Move size-based thresholds out of legality checking, make cost based
53// decisions instead of hard thresholds.
55 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
56 cl::desc("The maximum number of SCEV checks allowed."));
57
59 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
60 cl::desc("The maximum number of SCEV checks allowed with a "
61 "vectorize(enable) pragma"));
62
65 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
67 cl::desc("Control whether the compiler can use scalable vectors to "
68 "vectorize a loop"),
71 "Scalable vectorization is disabled."),
74 "Scalable vectorization is available and favored when the "
75 "cost is inconclusive."),
78 "Scalable vectorization is available and favored when the "
79 "cost is inconclusive.")));
80
81/// Maximum vectorization interleave count.
82static const unsigned MaxInterleaveFactor = 16;
83
84namespace llvm {
85
86bool LoopVectorizeHints::Hint::validate(unsigned Val) {
87 switch (Kind) {
88 case HK_WIDTH:
90 case HK_INTERLEAVE:
91 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
92 case HK_FORCE:
93 return (Val <= 1);
94 case HK_ISVECTORIZED:
95 case HK_PREDICATE:
96 case HK_SCALABLE:
97 return (Val == 0 || Val == 1);
98 }
99 return false;
100}
101
103 bool InterleaveOnlyWhenForced,
106 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
107 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
108 Force("vectorize.enable", FK_Undefined, HK_FORCE),
109 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
110 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
111 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
112 TheLoop(L), ORE(ORE) {
113 // Populate values with existing loop metadata.
114 getHintsFromMetadata();
115
116 // force-vector-interleave overrides DisableInterleaving.
119
120 // If the metadata doesn't explicitly specify whether to enable scalable
121 // vectorization, then decide based on the following criteria (increasing
122 // level of priority):
123 // - Target default
124 // - Metadata width
125 // - Force option (always overrides)
127 if (TTI)
130
131 if (Width.Value)
132 // If the width is set, but the metadata says nothing about the scalable
133 // property, then assume it concerns only a fixed-width UserVF.
134 // If width is not set, the flag takes precedence.
135 Scalable.Value = SK_FixedWidthOnly;
136 }
137
138 // If the flag is set to force any use of scalable vectors, override the loop
139 // hints.
140 if (ForceScalableVectorization.getValue() !=
142 Scalable.Value = ForceScalableVectorization.getValue();
143
144 // Scalable vectorization is disabled if no preference is specified.
146 Scalable.Value = SK_FixedWidthOnly;
147
148 if (IsVectorized.Value != 1)
149 // If the vectorization width and interleaving count are both 1 then
150 // consider the loop to have been already vectorized because there's
151 // nothing more that we can do.
152 IsVectorized.Value =
154 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
155 << "LV: Interleaving disabled by the pass manager\n");
156}
157
159 LLVMContext &Context = TheLoop->getHeader()->getContext();
160
161 MDNode *IsVectorizedMD = MDNode::get(
162 Context,
163 {MDString::get(Context, "llvm.loop.isvectorized"),
164 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
165 MDNode *LoopID = TheLoop->getLoopID();
166 MDNode *NewLoopID =
167 makePostTransformationMetadata(Context, LoopID,
168 {Twine(Prefix(), "vectorize.").str(),
169 Twine(Prefix(), "interleave.").str()},
170 {IsVectorizedMD});
171 TheLoop->setLoopID(NewLoopID);
172
173 // Update internal cache.
174 IsVectorized.Value = 1;
175}
176
178 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
180 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
182 return false;
183 }
184
185 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
186 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
188 return false;
189 }
190
191 if (getIsVectorized() == 1) {
192 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
193 // FIXME: Add interleave.disable metadata. This will allow
194 // vectorize.disable to be used without disabling the pass and errors
195 // to differentiate between disabled vectorization and a width of 1.
196 ORE.emit([&]() {
198 "AllDisabled", L->getStartLoc(),
199 L->getHeader())
200 << "loop not vectorized: vectorization and interleaving are "
201 "explicitly disabled, or the loop has already been "
202 "vectorized";
203 });
204 return false;
205 }
206
207 return true;
208}
209
211 using namespace ore;
212
213 ORE.emit([&]() {
214 if (Force.Value == LoopVectorizeHints::FK_Disabled)
215 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
216 TheLoop->getStartLoc(),
217 TheLoop->getHeader())
218 << "loop not vectorized: vectorization is explicitly disabled";
219
220 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
221 TheLoop->getHeader());
222 R << "loop not vectorized";
223 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
224 R << " (Force=" << NV("Force", true);
225 if (Width.Value != 0)
226 R << ", Vector Width=" << NV("VectorWidth", getWidth());
227 if (getInterleave() != 0)
228 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
229 R << ")";
230 }
231 return R;
232 });
233}
234
237 return LV_NAME;
239 return LV_NAME;
241 return LV_NAME;
243}
244
246 // Allow the vectorizer to change the order of operations if enabling
247 // loop hints are provided
248 ElementCount EC = getWidth();
249 return HintsAllowReordering &&
251 EC.getKnownMinValue() > 1);
252}
253
254void LoopVectorizeHints::getHintsFromMetadata() {
255 MDNode *LoopID = TheLoop->getLoopID();
256 if (!LoopID)
257 return;
258
259 // First operand should refer to the loop id itself.
260 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
261 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
262
263 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
264 const MDString *S = nullptr;
266
267 // The expected hint is either a MDString or a MDNode with the first
268 // operand a MDString.
269 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
270 if (!MD || MD->getNumOperands() == 0)
271 continue;
272 S = dyn_cast<MDString>(MD->getOperand(0));
273 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
274 Args.push_back(MD->getOperand(Idx));
275 } else {
276 S = dyn_cast<MDString>(MDO);
277 assert(Args.size() == 0 && "too many arguments for MDString");
278 }
279
280 if (!S)
281 continue;
282
283 // Check if the hint starts with the loop metadata prefix.
284 StringRef Name = S->getString();
285 if (Args.size() == 1)
286 setHint(Name, Args[0]);
287 }
288}
289
290void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
291 if (!Name.starts_with(Prefix()))
292 return;
293 Name = Name.substr(Prefix().size(), StringRef::npos);
294
295 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
296 if (!C)
297 return;
298 unsigned Val = C->getZExtValue();
299
300 Hint *Hints[] = {&Width, &Interleave, &Force,
301 &IsVectorized, &Predicate, &Scalable};
302 for (auto *H : Hints) {
303 if (Name == H->Name) {
304 if (H->validate(Val))
305 H->Value = Val;
306 else
307 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
308 break;
309 }
310 }
311}
312
313// Return true if the inner loop \p Lp is uniform with regard to the outer loop
314// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
315// executing the inner loop will execute the same iterations). This check is
316// very constrained for now but it will be relaxed in the future. \p Lp is
317// considered uniform if it meets all the following conditions:
318// 1) it has a canonical IV (starting from 0 and with stride 1),
319// 2) its latch terminator is a conditional branch and,
320// 3) its latch condition is a compare instruction whose operands are the
321// canonical IV and an OuterLp invariant.
322// This check doesn't take into account the uniformity of other conditions not
323// related to the loop latch because they don't affect the loop uniformity.
324//
325// NOTE: We decided to keep all these checks and its associated documentation
326// together so that we can easily have a picture of the current supported loop
327// nests. However, some of the current checks don't depend on \p OuterLp and
328// would be redundantly executed for each \p Lp if we invoked this function for
329// different candidate outer loops. This is not the case for now because we
330// don't currently have the infrastructure to evaluate multiple candidate outer
331// loops and \p OuterLp will be a fixed parameter while we only support explicit
332// outer loop vectorization. It's also very likely that these checks go away
333// before introducing the aforementioned infrastructure. However, if this is not
334// the case, we should move the \p OuterLp independent checks to a separate
335// function that is only executed once for each \p Lp.
336static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
337 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
338
339 // If Lp is the outer loop, it's uniform by definition.
340 if (Lp == OuterLp)
341 return true;
342 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
343
344 // 1.
346 if (!IV) {
347 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
348 return false;
349 }
350
351 // 2.
352 BasicBlock *Latch = Lp->getLoopLatch();
353 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
354 if (!LatchBr || LatchBr->isUnconditional()) {
355 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
356 return false;
357 }
358
359 // 3.
360 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
361 if (!LatchCmp) {
363 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
364 return false;
365 }
366
367 Value *CondOp0 = LatchCmp->getOperand(0);
368 Value *CondOp1 = LatchCmp->getOperand(1);
369 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
370 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
371 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
372 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
373 return false;
374 }
375
376 return true;
377}
378
379// Return true if \p Lp and all its nested loops are uniform with regard to \p
380// OuterLp.
381static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
382 if (!isUniformLoop(Lp, OuterLp))
383 return false;
384
385 // Check if nested loops are uniform.
386 for (Loop *SubLp : *Lp)
387 if (!isUniformLoopNest(SubLp, OuterLp))
388 return false;
389
390 return true;
391}
392
394 if (Ty->isPointerTy())
395 return DL.getIntPtrType(Ty);
396
397 // It is possible that char's or short's overflow when we ask for the loop's
398 // trip count, work around this by changing the type size.
399 if (Ty->getScalarSizeInBits() < 32)
400 return Type::getInt32Ty(Ty->getContext());
401
402 return Ty;
403}
404
405static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
408 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
409 return Ty0;
410 return Ty1;
411}
412
413/// Check that the instruction has outside loop users and is not an
414/// identified reduction variable.
415static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
416 SmallPtrSetImpl<Value *> &AllowedExit) {
417 // Reductions, Inductions and non-header phis are allowed to have exit users. All
418 // other instructions must not have external users.
419 if (!AllowedExit.count(Inst))
420 // Check that all of the users of the loop are inside the BB.
421 for (User *U : Inst->users()) {
422 Instruction *UI = cast<Instruction>(U);
423 // This user may be a reduction exit value.
424 if (!TheLoop->contains(UI)) {
425 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
426 return true;
427 }
428 }
429 return false;
430}
431
432/// Returns true if A and B have same pointer operands or same SCEVs addresses
434 StoreInst *B) {
435 // Compare store
436 if (A == B)
437 return true;
438
439 // Otherwise Compare pointers
440 Value *APtr = A->getPointerOperand();
441 Value *BPtr = B->getPointerOperand();
442 if (APtr == BPtr)
443 return true;
444
445 // Otherwise compare address SCEVs
446 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
447}
448
450 Value *Ptr) const {
451 // FIXME: Currently, the set of symbolic strides is sometimes queried before
452 // it's collected. This happens from canVectorizeWithIfConvert, when the
453 // pointer is checked to reference consecutive elements suitable for a
454 // masked access.
455 const auto &Strides =
457
458 Function *F = TheLoop->getHeader()->getParent();
459 bool OptForSize = F->hasOptSize() ||
460 llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
462 bool CanAddPredicate = !OptForSize;
463 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
464 CanAddPredicate, false).value_or(0);
465 if (Stride == 1 || Stride == -1)
466 return Stride;
467 return 0;
468}
469
471 return LAI->isInvariant(V);
472}
473
474namespace {
475/// A rewriter to build the SCEVs for each of the VF lanes in the expected
476/// vectorized loop, which can then be compared to detect their uniformity. This
477/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
478/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
479/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
480/// uniformity.
481class SCEVAddRecForUniformityRewriter
482 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
483 /// Multiplier to be applied to the step of AddRecs in TheLoop.
484 unsigned StepMultiplier;
485
486 /// Offset to be added to the AddRecs in TheLoop.
487 unsigned Offset;
488
489 /// Loop for which to rewrite AddRecsFor.
490 Loop *TheLoop;
491
492 /// Is any sub-expressions not analyzable w.r.t. uniformity?
493 bool CannotAnalyze = false;
494
495 bool canAnalyze() const { return !CannotAnalyze; }
496
497public:
498 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
499 unsigned Offset, Loop *TheLoop)
500 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
501 TheLoop(TheLoop) {}
502
503 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
504 assert(Expr->getLoop() == TheLoop &&
505 "addrec outside of TheLoop must be invariant and should have been "
506 "handled earlier");
507 // Build a new AddRec by multiplying the step by StepMultiplier and
508 // incrementing the start by Offset * step.
509 Type *Ty = Expr->getType();
510 const SCEV *Step = Expr->getStepRecurrence(SE);
511 if (!SE.isLoopInvariant(Step, TheLoop)) {
512 CannotAnalyze = true;
513 return Expr;
514 }
515 const SCEV *NewStep =
516 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
517 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
518 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
519 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
520 }
521
522 const SCEV *visit(const SCEV *S) {
523 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
524 return S;
526 }
527
528 const SCEV *visitUnknown(const SCEVUnknown *S) {
529 if (SE.isLoopInvariant(S, TheLoop))
530 return S;
531 // The value could vary across iterations.
532 CannotAnalyze = true;
533 return S;
534 }
535
536 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
537 // Could not analyze the expression.
538 CannotAnalyze = true;
539 return S;
540 }
541
542 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
543 unsigned StepMultiplier, unsigned Offset,
544 Loop *TheLoop) {
545 /// Bail out if the expression does not contain an UDiv expression.
546 /// Uniform values which are not loop invariant require operations to strip
547 /// out the lowest bits. For now just look for UDivs and use it to avoid
548 /// re-writing UDIV-free expressions for other lanes to limit compile time.
549 if (!SCEVExprContains(S,
550 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
551 return SE.getCouldNotCompute();
552
553 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
554 TheLoop);
555 const SCEV *Result = Rewriter.visit(S);
556
557 if (Rewriter.canAnalyze())
558 return Result;
559 return SE.getCouldNotCompute();
560 }
561};
562
563} // namespace
564
566 if (isInvariant(V))
567 return true;
568 if (VF.isScalable())
569 return false;
570 if (VF.isScalar())
571 return true;
572
573 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
574 // never considered uniform.
575 auto *SE = PSE.getSE();
576 if (!SE->isSCEVable(V->getType()))
577 return false;
578 const SCEV *S = SE->getSCEV(V);
579
580 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
581 // lane 0 matches the expressions for all other lanes.
582 unsigned FixedVF = VF.getKnownMinValue();
583 const SCEV *FirstLaneExpr =
584 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
585 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
586 return false;
587
588 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
589 // lane 0. We check lanes in reverse order for compile-time, as frequently
590 // checking the last lane is sufficient to rule out uniformity.
591 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
592 const SCEV *IthLaneExpr =
593 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
594 return FirstLaneExpr == IthLaneExpr;
595 });
596}
597
599 ElementCount VF) const {
601 if (!Ptr)
602 return false;
603 // Note: There's nothing inherent which prevents predicated loads and
604 // stores from being uniform. The current lowering simply doesn't handle
605 // it; in particular, the cost model distinguishes scatter/gather from
606 // scalar w/predication, and we currently rely on the scalar path.
607 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
608}
609
610bool LoopVectorizationLegality::canVectorizeOuterLoop() {
611 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
612 // Store the result and return it at the end instead of exiting early, in case
613 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
614 bool Result = true;
615 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
616
617 for (BasicBlock *BB : TheLoop->blocks()) {
618 // Check whether the BB terminator is a BranchInst. Any other terminator is
619 // not supported yet.
620 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
621 if (!Br) {
622 reportVectorizationFailure("Unsupported basic block terminator",
623 "loop control flow is not understood by vectorizer",
624 "CFGNotUnderstood", ORE, TheLoop);
625 if (DoExtraAnalysis)
626 Result = false;
627 else
628 return false;
629 }
630
631 // Check whether the BranchInst is a supported one. Only unconditional
632 // branches, conditional branches with an outer loop invariant condition or
633 // backedges are supported.
634 // FIXME: We skip these checks when VPlan predication is enabled as we
635 // want to allow divergent branches. This whole check will be removed
636 // once VPlan predication is on by default.
637 if (Br && Br->isConditional() &&
638 !TheLoop->isLoopInvariant(Br->getCondition()) &&
639 !LI->isLoopHeader(Br->getSuccessor(0)) &&
640 !LI->isLoopHeader(Br->getSuccessor(1))) {
641 reportVectorizationFailure("Unsupported conditional branch",
642 "loop control flow is not understood by vectorizer",
643 "CFGNotUnderstood", ORE, TheLoop);
644 if (DoExtraAnalysis)
645 Result = false;
646 else
647 return false;
648 }
649 }
650
651 // Check whether inner loops are uniform. At this point, we only support
652 // simple outer loops scenarios with uniform nested loops.
653 if (!isUniformLoopNest(TheLoop /*loop nest*/,
654 TheLoop /*context outer loop*/)) {
655 reportVectorizationFailure("Outer loop contains divergent loops",
656 "loop control flow is not understood by vectorizer",
657 "CFGNotUnderstood", ORE, TheLoop);
658 if (DoExtraAnalysis)
659 Result = false;
660 else
661 return false;
662 }
663
664 // Check whether we are able to set up outer loop induction.
665 if (!setupOuterLoopInductions()) {
666 reportVectorizationFailure("Unsupported outer loop Phi(s)",
667 "Unsupported outer loop Phi(s)",
668 "UnsupportedPhi", ORE, TheLoop);
669 if (DoExtraAnalysis)
670 Result = false;
671 else
672 return false;
673 }
674
675 return Result;
676}
677
678void LoopVectorizationLegality::addInductionPhi(
679 PHINode *Phi, const InductionDescriptor &ID,
680 SmallPtrSetImpl<Value *> &AllowedExit) {
681 Inductions[Phi] = ID;
682
683 // In case this induction also comes with casts that we know we can ignore
684 // in the vectorized loop body, record them here. All casts could be recorded
685 // here for ignoring, but suffices to record only the first (as it is the
686 // only one that may bw used outside the cast sequence).
687 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
688 if (!Casts.empty())
689 InductionCastsToIgnore.insert(*Casts.begin());
690
691 Type *PhiTy = Phi->getType();
692 const DataLayout &DL = Phi->getDataLayout();
693
694 // Get the widest type.
695 if (!PhiTy->isFloatingPointTy()) {
696 if (!WidestIndTy)
697 WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
698 else
699 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
700 }
701
702 // Int inductions are special because we only allow one IV.
703 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
704 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
705 isa<Constant>(ID.getStartValue()) &&
706 cast<Constant>(ID.getStartValue())->isNullValue()) {
707
708 // Use the phi node with the widest type as induction. Use the last
709 // one if there are multiple (no good reason for doing this other
710 // than it is expedient). We've checked that it begins at zero and
711 // steps by one, so this is a canonical induction variable.
712 if (!PrimaryInduction || PhiTy == WidestIndTy)
713 PrimaryInduction = Phi;
714 }
715
716 // Both the PHI node itself, and the "post-increment" value feeding
717 // back into the PHI node may have external users.
718 // We can allow those uses, except if the SCEVs we have for them rely
719 // on predicates that only hold within the loop, since allowing the exit
720 // currently means re-using this SCEV outside the loop (see PR33706 for more
721 // details).
722 if (PSE.getPredicate().isAlwaysTrue()) {
723 AllowedExit.insert(Phi);
724 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
725 }
726
727 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
728}
729
730bool LoopVectorizationLegality::setupOuterLoopInductions() {
731 BasicBlock *Header = TheLoop->getHeader();
732
733 // Returns true if a given Phi is a supported induction.
734 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
736 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
738 addInductionPhi(&Phi, ID, AllowedExit);
739 return true;
740 }
741 // Bail out for any Phi in the outer loop header that is not a supported
742 // induction.
744 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
745 return false;
746 };
747
748 return llvm::all_of(Header->phis(), IsSupportedPhi);
749}
750
751/// Checks if a function is scalarizable according to the TLI, in
752/// the sense that it should be vectorized and then expanded in
753/// multiple scalar calls. This is represented in the
754/// TLI via mappings that do not specify a vector name, as in the
755/// following example:
756///
757/// const VecDesc VecIntrinsics[] = {
758/// {"llvm.phx.abs.i32", "", 4}
759/// };
760static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
761 const StringRef ScalarName = CI.getCalledFunction()->getName();
762 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
763 // Check that all known VFs are not associated to a vector
764 // function, i.e. the vector name is emty.
765 if (Scalarize) {
766 ElementCount WidestFixedVF, WidestScalableVF;
767 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
769 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
770 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
772 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
773 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
774 assert((WidestScalableVF.isZero() || !Scalarize) &&
775 "Caller may decide to scalarize a variant using a scalable VF");
776 }
777 return Scalarize;
778}
779
780bool LoopVectorizationLegality::canVectorizeInstrs() {
781 BasicBlock *Header = TheLoop->getHeader();
782
783 // For each block in the loop.
784 for (BasicBlock *BB : TheLoop->blocks()) {
785 // Scan the instructions in the block and look for hazards.
786 for (Instruction &I : *BB) {
787 if (auto *Phi = dyn_cast<PHINode>(&I)) {
788 Type *PhiTy = Phi->getType();
789 // Check that this PHI type is allowed.
790 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
791 !PhiTy->isPointerTy()) {
792 reportVectorizationFailure("Found a non-int non-pointer PHI",
793 "loop control flow is not understood by vectorizer",
794 "CFGNotUnderstood", ORE, TheLoop);
795 return false;
796 }
797
798 // If this PHINode is not in the header block, then we know that we
799 // can convert it to select during if-conversion. No need to check if
800 // the PHIs in this block are induction or reduction variables.
801 if (BB != Header) {
802 // Non-header phi nodes that have outside uses can be vectorized. Add
803 // them to the list of allowed exits.
804 // Unsafe cyclic dependencies with header phis are identified during
805 // legalization for reduction, induction and fixed order
806 // recurrences.
807 AllowedExit.insert(&I);
808 continue;
809 }
810
811 // We only allow if-converted PHIs with exactly two incoming values.
812 if (Phi->getNumIncomingValues() != 2) {
813 reportVectorizationFailure("Found an invalid PHI",
814 "loop control flow is not understood by vectorizer",
815 "CFGNotUnderstood", ORE, TheLoop, Phi);
816 return false;
817 }
818
820 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
821 DT, PSE.getSE())) {
822 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
823 AllowedExit.insert(RedDes.getLoopExitInstr());
824 Reductions[Phi] = RedDes;
825 continue;
826 }
827
828 // We prevent matching non-constant strided pointer IVS to preserve
829 // historical vectorizer behavior after a generalization of the
830 // IVDescriptor code. The intent is to remove this check, but we
831 // have to fix issues around code quality for such loops first.
832 auto IsDisallowedStridedPointerInduction =
833 [](const InductionDescriptor &ID) {
835 return false;
836 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
837 ID.getConstIntStepValue() == nullptr;
838 };
839
840 // TODO: Instead of recording the AllowedExit, it would be good to
841 // record the complementary set: NotAllowedExit. These include (but may
842 // not be limited to):
843 // 1. Reduction phis as they represent the one-before-last value, which
844 // is not available when vectorized
845 // 2. Induction phis and increment when SCEV predicates cannot be used
846 // outside the loop - see addInductionPhi
847 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
848 // outside the loop - see call to hasOutsideLoopUser in the non-phi
849 // handling below
850 // 4. FixedOrderRecurrence phis that can possibly be handled by
851 // extraction.
852 // By recording these, we can then reason about ways to vectorize each
853 // of these NotAllowedExit.
855 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
856 !IsDisallowedStridedPointerInduction(ID)) {
857 addInductionPhi(Phi, ID, AllowedExit);
858 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
859 continue;
860 }
861
862 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
863 AllowedExit.insert(Phi);
864 FixedOrderRecurrences.insert(Phi);
865 continue;
866 }
867
868 // As a last resort, coerce the PHI to a AddRec expression
869 // and re-try classifying it a an induction PHI.
870 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
871 !IsDisallowedStridedPointerInduction(ID)) {
872 addInductionPhi(Phi, ID, AllowedExit);
873 continue;
874 }
875
876 reportVectorizationFailure("Found an unidentified PHI",
877 "value that could not be identified as "
878 "reduction is used outside the loop",
879 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
880 return false;
881 } // end of PHI handling
882
883 // We handle calls that:
884 // * Are debug info intrinsics.
885 // * Have a mapping to an IR intrinsic.
886 // * Have a vector version available.
887 auto *CI = dyn_cast<CallInst>(&I);
888
889 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
890 !isa<DbgInfoIntrinsic>(CI) &&
891 !(CI->getCalledFunction() && TLI &&
892 (!VFDatabase::getMappings(*CI).empty() ||
893 isTLIScalarize(*TLI, *CI)))) {
894 // If the call is a recognized math libary call, it is likely that
895 // we can vectorize it given loosened floating-point constraints.
897 bool IsMathLibCall =
898 TLI && CI->getCalledFunction() &&
899 CI->getType()->isFloatingPointTy() &&
900 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
901 TLI->hasOptimizedCodeGen(Func);
902
903 if (IsMathLibCall) {
904 // TODO: Ideally, we should not use clang-specific language here,
905 // but it's hard to provide meaningful yet generic advice.
906 // Also, should this be guarded by allowExtraAnalysis() and/or be part
907 // of the returned info from isFunctionVectorizable()?
909 "Found a non-intrinsic callsite",
910 "library call cannot be vectorized. "
911 "Try compiling with -fno-math-errno, -ffast-math, "
912 "or similar flags",
913 "CantVectorizeLibcall", ORE, TheLoop, CI);
914 } else {
915 reportVectorizationFailure("Found a non-intrinsic callsite",
916 "call instruction cannot be vectorized",
917 "CantVectorizeLibcall", ORE, TheLoop, CI);
918 }
919 return false;
920 }
921
922 // Some intrinsics have scalar arguments and should be same in order for
923 // them to be vectorized (i.e. loop invariant).
924 if (CI) {
925 auto *SE = PSE.getSE();
926 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
927 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
929 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)),
930 TheLoop)) {
931 reportVectorizationFailure("Found unvectorizable intrinsic",
932 "intrinsic instruction cannot be vectorized",
933 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
934 return false;
935 }
936 }
937 }
938
939 // If we found a vectorized variant of a function, note that so LV can
940 // make better decisions about maximum VF.
941 if (CI && !VFDatabase::getMappings(*CI).empty())
942 VecCallVariantsFound = true;
943
944 // Check that the instruction return type is vectorizable.
945 // Also, we can't vectorize extractelement instructions.
946 if ((!VectorType::isValidElementType(I.getType()) &&
947 !I.getType()->isVoidTy()) ||
948 isa<ExtractElementInst>(I)) {
949 reportVectorizationFailure("Found unvectorizable type",
950 "instruction return type cannot be vectorized",
951 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
952 return false;
953 }
954
955 // Check that the stored type is vectorizable.
956 if (auto *ST = dyn_cast<StoreInst>(&I)) {
957 Type *T = ST->getValueOperand()->getType();
959 reportVectorizationFailure("Store instruction cannot be vectorized",
960 "store instruction cannot be vectorized",
961 "CantVectorizeStore", ORE, TheLoop, ST);
962 return false;
963 }
964
965 // For nontemporal stores, check that a nontemporal vector version is
966 // supported on the target.
967 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
968 // Arbitrarily try a vector of 2 elements.
969 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
970 assert(VecTy && "did not find vectorized version of stored type");
971 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
973 "nontemporal store instruction cannot be vectorized",
974 "nontemporal store instruction cannot be vectorized",
975 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
976 return false;
977 }
978 }
979
980 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
981 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
982 // For nontemporal loads, check that a nontemporal vector version is
983 // supported on the target (arbitrarily try a vector of 2 elements).
984 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
985 assert(VecTy && "did not find vectorized version of load type");
986 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
988 "nontemporal load instruction cannot be vectorized",
989 "nontemporal load instruction cannot be vectorized",
990 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
991 return false;
992 }
993 }
994
995 // FP instructions can allow unsafe algebra, thus vectorizable by
996 // non-IEEE-754 compliant SIMD units.
997 // This applies to floating-point math operations and calls, not memory
998 // operations, shuffles, or casts, as they don't change precision or
999 // semantics.
1000 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1001 !I.isFast()) {
1002 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1003 Hints->setPotentiallyUnsafe();
1004 }
1005
1006 // Reduction instructions are allowed to have exit users.
1007 // All other instructions must not have external users.
1008 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1009 // We can safely vectorize loops where instructions within the loop are
1010 // used outside the loop only if the SCEV predicates within the loop is
1011 // same as outside the loop. Allowing the exit means reusing the SCEV
1012 // outside the loop.
1013 if (PSE.getPredicate().isAlwaysTrue()) {
1014 AllowedExit.insert(&I);
1015 continue;
1016 }
1017 reportVectorizationFailure("Value cannot be used outside the loop",
1018 "value cannot be used outside the loop",
1019 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1020 return false;
1021 }
1022 } // next instr.
1023 }
1024
1025 if (!PrimaryInduction) {
1026 if (Inductions.empty()) {
1027 reportVectorizationFailure("Did not find one integer induction var",
1028 "loop induction variable could not be identified",
1029 "NoInductionVariable", ORE, TheLoop);
1030 return false;
1031 }
1032 if (!WidestIndTy) {
1033 reportVectorizationFailure("Did not find one integer induction var",
1034 "integer loop induction variable could not be identified",
1035 "NoIntegerInductionVariable", ORE, TheLoop);
1036 return false;
1037 }
1038 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
1039 }
1040
1041 // Now we know the widest induction type, check if our found induction
1042 // is the same size. If it's not, unset it here and InnerLoopVectorizer
1043 // will create another.
1044 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1045 PrimaryInduction = nullptr;
1046
1047 return true;
1048}
1049
1050bool LoopVectorizationLegality::canVectorizeMemory() {
1051 LAI = &LAIs.getInfo(*TheLoop);
1052 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1053 if (LAR) {
1054 ORE->emit([&]() {
1055 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1056 "loop not vectorized: ", *LAR);
1057 });
1058 }
1059
1060 if (!LAI->canVectorizeMemory())
1061 return false;
1062
1064 reportVectorizationFailure("We don't allow storing to uniform addresses",
1065 "write to a loop invariant address could not "
1066 "be vectorized",
1067 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1068 TheLoop);
1069 return false;
1070 }
1071
1072 // We can vectorize stores to invariant address when final reduction value is
1073 // guaranteed to be stored at the end of the loop. Also, if decision to
1074 // vectorize loop is made, runtime checks are added so as to make sure that
1075 // invariant address won't alias with any other objects.
1076 if (!LAI->getStoresToInvariantAddresses().empty()) {
1077 // For each invariant address, check if last stored value is unconditional
1078 // and the address is not calculated inside the loop.
1079 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1081 continue;
1082
1083 if (blockNeedsPredication(SI->getParent())) {
1085 "We don't allow storing to uniform addresses",
1086 "write of conditional recurring variant value to a loop "
1087 "invariant address could not be vectorized",
1088 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1089 return false;
1090 }
1091
1092 // Invariant address should be defined outside of loop. LICM pass usually
1093 // makes sure it happens, but in rare cases it does not, we do not want
1094 // to overcomplicate vectorization to support this case.
1095 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1096 if (TheLoop->contains(Ptr)) {
1098 "Invariant address is calculated inside the loop",
1099 "write to a loop invariant address could not "
1100 "be vectorized",
1101 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1102 return false;
1103 }
1104 }
1105 }
1106
1108 // For each invariant address, check its last stored value is the result
1109 // of one of our reductions.
1110 //
1111 // We do not check if dependence with loads exists because that is already
1112 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1113 ScalarEvolution *SE = PSE.getSE();
1114 SmallVector<StoreInst *, 4> UnhandledStores;
1115 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1117 // Earlier stores to this address are effectively deadcode.
1118 // With opaque pointers it is possible for one pointer to be used with
1119 // different sizes of stored values:
1120 // store i32 0, ptr %x
1121 // store i8 0, ptr %x
1122 // The latest store doesn't complitely overwrite the first one in the
1123 // example. That is why we have to make sure that types of stored
1124 // values are same.
1125 // TODO: Check that bitwidth of unhandled store is smaller then the
1126 // one that overwrites it and add a test.
1127 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1128 return storeToSameAddress(SE, SI, I) &&
1129 I->getValueOperand()->getType() ==
1130 SI->getValueOperand()->getType();
1131 });
1132 continue;
1133 }
1134 UnhandledStores.push_back(SI);
1135 }
1136
1137 bool IsOK = UnhandledStores.empty();
1138 // TODO: we should also validate against InvariantMemSets.
1139 if (!IsOK) {
1141 "We don't allow storing to uniform addresses",
1142 "write to a loop invariant address could not "
1143 "be vectorized",
1144 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1145 return false;
1146 }
1147 }
1148 }
1149
1150 PSE.addPredicate(LAI->getPSE().getPredicate());
1151 return true;
1152}
1153
1155 bool EnableStrictReductions) {
1156
1157 // First check if there is any ExactFP math or if we allow reassociations
1158 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1159 return true;
1160
1161 // If the above is false, we have ExactFPMath & do not allow reordering.
1162 // If the EnableStrictReductions flag is set, first check if we have any
1163 // Exact FP induction vars, which we cannot vectorize.
1164 if (!EnableStrictReductions ||
1165 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1166 InductionDescriptor IndDesc = Induction.second;
1167 return IndDesc.getExactFPMathInst();
1168 }))
1169 return false;
1170
1171 // We can now only vectorize if all reductions with Exact FP math also
1172 // have the isOrdered flag set, which indicates that we can move the
1173 // reduction operations in-loop.
1174 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1175 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1176 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1177 }));
1178}
1179
1181 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1182 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1183 return RdxDesc.IntermediateStore == SI;
1184 });
1185}
1186
1188 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1189 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1190 if (!RdxDesc.IntermediateStore)
1191 return false;
1192
1193 ScalarEvolution *SE = PSE.getSE();
1194 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1195 return V == InvariantAddress ||
1196 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1197 });
1198}
1199
1201 Value *In0 = const_cast<Value *>(V);
1202 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1203 if (!PN)
1204 return false;
1205
1206 return Inductions.count(PN);
1207}
1208
1209const InductionDescriptor *
1211 if (!isInductionPhi(Phi))
1212 return nullptr;
1213 auto &ID = getInductionVars().find(Phi)->second;
1214 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1216 return &ID;
1217 return nullptr;
1218}
1219
1220const InductionDescriptor *
1222 if (!isInductionPhi(Phi))
1223 return nullptr;
1224 auto &ID = getInductionVars().find(Phi)->second;
1226 return &ID;
1227 return nullptr;
1228}
1229
1231 const Value *V) const {
1232 auto *Inst = dyn_cast<Instruction>(V);
1233 return (Inst && InductionCastsToIgnore.count(Inst));
1234}
1235
1238}
1239
1241 const PHINode *Phi) const {
1242 return FixedOrderRecurrences.count(Phi);
1243}
1244
1246 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1247}
1248
1249bool LoopVectorizationLegality::blockCanBePredicated(
1250 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1251 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1252 for (Instruction &I : *BB) {
1253 // We can predicate blocks with calls to assume, as long as we drop them in
1254 // case we flatten the CFG via predication.
1255 if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1256 MaskedOp.insert(&I);
1257 continue;
1258 }
1259
1260 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1261 // TODO: there might be cases that it should block the vectorization. Let's
1262 // ignore those for now.
1263 if (isa<NoAliasScopeDeclInst>(&I))
1264 continue;
1265
1266 // We can allow masked calls if there's at least one vector variant, even
1267 // if we end up scalarizing due to the cost model calculations.
1268 // TODO: Allow other calls if they have appropriate attributes... readonly
1269 // and argmemonly?
1270 if (CallInst *CI = dyn_cast<CallInst>(&I))
1272 MaskedOp.insert(CI);
1273 continue;
1274 }
1275
1276 // Loads are handled via masking (or speculated if safe to do so.)
1277 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1278 if (!SafePtrs.count(LI->getPointerOperand()))
1279 MaskedOp.insert(LI);
1280 continue;
1281 }
1282
1283 // Predicated store requires some form of masking:
1284 // 1) masked store HW instruction,
1285 // 2) emulation via load-blend-store (only if safe and legal to do so,
1286 // be aware on the race conditions), or
1287 // 3) element-by-element predicate check and scalar store.
1288 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1289 MaskedOp.insert(SI);
1290 continue;
1291 }
1292
1293 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1294 return false;
1295 }
1296
1297 return true;
1298}
1299
1300bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1301 if (!EnableIfConversion) {
1302 reportVectorizationFailure("If-conversion is disabled",
1303 "if-conversion is disabled",
1304 "IfConversionDisabled",
1305 ORE, TheLoop);
1306 return false;
1307 }
1308
1309 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1310
1311 // A list of pointers which are known to be dereferenceable within scope of
1312 // the loop body for each iteration of the loop which executes. That is,
1313 // the memory pointed to can be dereferenced (with the access size implied by
1314 // the value's type) unconditionally within the loop header without
1315 // introducing a new fault.
1316 SmallPtrSet<Value *, 8> SafePointers;
1317
1318 // Collect safe addresses.
1319 for (BasicBlock *BB : TheLoop->blocks()) {
1320 if (!blockNeedsPredication(BB)) {
1321 for (Instruction &I : *BB)
1322 if (auto *Ptr = getLoadStorePointerOperand(&I))
1323 SafePointers.insert(Ptr);
1324 continue;
1325 }
1326
1327 // For a block which requires predication, a address may be safe to access
1328 // in the loop w/o predication if we can prove dereferenceability facts
1329 // sufficient to ensure it'll never fault within the loop. For the moment,
1330 // we restrict this to loads; stores are more complicated due to
1331 // concurrency restrictions.
1332 ScalarEvolution &SE = *PSE.getSE();
1333 for (Instruction &I : *BB) {
1334 LoadInst *LI = dyn_cast<LoadInst>(&I);
1335 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1336 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
1337 SafePointers.insert(LI->getPointerOperand());
1338 }
1339 }
1340
1341 // Collect the blocks that need predication.
1342 for (BasicBlock *BB : TheLoop->blocks()) {
1343 // We support only branches and switch statements as terminators inside the
1344 // loop.
1345 if (isa<SwitchInst>(BB->getTerminator())) {
1346 if (TheLoop->isLoopExiting(BB)) {
1347 reportVectorizationFailure("Loop contains an unsupported switch",
1348 "loop contains an unsupported switch",
1349 "LoopContainsUnsupportedSwitch", ORE,
1350 TheLoop, BB->getTerminator());
1351 return false;
1352 }
1353 } else if (!isa<BranchInst>(BB->getTerminator())) {
1354 reportVectorizationFailure("Loop contains an unsupported terminator",
1355 "loop contains an unsupported terminator",
1356 "LoopContainsUnsupportedTerminator", ORE,
1357 TheLoop, BB->getTerminator());
1358 return false;
1359 }
1360
1361 // We must be able to predicate all blocks that need to be predicated.
1362 if (blockNeedsPredication(BB) &&
1363 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1365 "Control flow cannot be substituted for a select",
1366 "control flow cannot be substituted for a select", "NoCFGForSelect",
1367 ORE, TheLoop, BB->getTerminator());
1368 return false;
1369 }
1370 }
1371
1372 // We can if-convert this loop.
1373 return true;
1374}
1375
1376// Helper function to canVectorizeLoopNestCFG.
1377bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1378 bool UseVPlanNativePath) {
1379 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1380 "VPlan-native path is not enabled.");
1381
1382 // TODO: ORE should be improved to show more accurate information when an
1383 // outer loop can't be vectorized because a nested loop is not understood or
1384 // legal. Something like: "outer_loop_location: loop not vectorized:
1385 // (inner_loop_location) loop control flow is not understood by vectorizer".
1386
1387 // Store the result and return it at the end instead of exiting early, in case
1388 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1389 bool Result = true;
1390 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1391
1392 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1393 // be canonicalized.
1394 if (!Lp->getLoopPreheader()) {
1395 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1396 "loop control flow is not understood by vectorizer",
1397 "CFGNotUnderstood", ORE, TheLoop);
1398 if (DoExtraAnalysis)
1399 Result = false;
1400 else
1401 return false;
1402 }
1403
1404 // We must have a single backedge.
1405 if (Lp->getNumBackEdges() != 1) {
1406 reportVectorizationFailure("The loop must have a single backedge",
1407 "loop control flow is not understood by vectorizer",
1408 "CFGNotUnderstood", ORE, TheLoop);
1409 if (DoExtraAnalysis)
1410 Result = false;
1411 else
1412 return false;
1413 }
1414
1415 return Result;
1416}
1417
1418bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1419 Loop *Lp, bool UseVPlanNativePath) {
1420 // Store the result and return it at the end instead of exiting early, in case
1421 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1422 bool Result = true;
1423 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1424 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1425 if (DoExtraAnalysis)
1426 Result = false;
1427 else
1428 return false;
1429 }
1430
1431 // Recursively check whether the loop control flow of nested loops is
1432 // understood.
1433 for (Loop *SubLp : *Lp)
1434 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1435 if (DoExtraAnalysis)
1436 Result = false;
1437 else
1438 return false;
1439 }
1440
1441 return Result;
1442}
1443
1444bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1445 // Store the result and return it at the end instead of exiting early, in case
1446 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1447 bool Result = true;
1448
1449 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1450 // Check whether the loop-related control flow in the loop nest is expected by
1451 // vectorizer.
1452 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1453 if (DoExtraAnalysis)
1454 Result = false;
1455 else
1456 return false;
1457 }
1458
1459 // We need to have a loop header.
1460 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1461 << '\n');
1462
1463 // Specific checks for outer loops. We skip the remaining legal checks at this
1464 // point because they don't support outer loops.
1465 if (!TheLoop->isInnermost()) {
1466 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1467
1468 if (!canVectorizeOuterLoop()) {
1469 reportVectorizationFailure("Unsupported outer loop",
1470 "unsupported outer loop",
1471 "UnsupportedOuterLoop",
1472 ORE, TheLoop);
1473 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1474 // outer loops.
1475 return false;
1476 }
1477
1478 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1479 return Result;
1480 }
1481
1482 assert(TheLoop->isInnermost() && "Inner loop expected.");
1483 // Check if we can if-convert non-single-bb loops.
1484 unsigned NumBlocks = TheLoop->getNumBlocks();
1485 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1486 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1487 if (DoExtraAnalysis)
1488 Result = false;
1489 else
1490 return false;
1491 }
1492
1493 // Check if we can vectorize the instructions and CFG in this loop.
1494 if (!canVectorizeInstrs()) {
1495 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1496 if (DoExtraAnalysis)
1497 Result = false;
1498 else
1499 return false;
1500 }
1501
1502 // Go over each instruction and look at memory deps.
1503 if (!canVectorizeMemory()) {
1504 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1505 if (DoExtraAnalysis)
1506 Result = false;
1507 else
1508 return false;
1509 }
1510
1511 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1512 reportVectorizationFailure("could not determine number of loop iterations",
1513 "could not determine number of loop iterations",
1514 "CantComputeNumberOfIterations", ORE, TheLoop);
1515 if (DoExtraAnalysis)
1516 Result = false;
1517 else
1518 return false;
1519 }
1520
1521 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1523 ? " (with a runtime bound check)"
1524 : "")
1525 << "!\n");
1526
1527 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1528 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1529 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1530
1531 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1532 reportVectorizationFailure("Too many SCEV checks needed",
1533 "Too many SCEV assumptions need to be made and checked at runtime",
1534 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1535 if (DoExtraAnalysis)
1536 Result = false;
1537 else
1538 return false;
1539 }
1540
1541 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1542 // we can vectorize, and at this point we don't have any other mem analysis
1543 // which may limit our maximum vectorization factor, so just return true with
1544 // no restrictions.
1545 return Result;
1546}
1547
1549
1550 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1551
1552 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1553
1554 for (const auto &Reduction : getReductionVars())
1555 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1556
1557 // TODO: handle non-reduction outside users when tail is folded by masking.
1558 for (auto *AE : AllowedExit) {
1559 // Check that all users of allowed exit values are inside the loop or
1560 // are the live-out of a reduction.
1561 if (ReductionLiveOuts.count(AE))
1562 continue;
1563 for (User *U : AE->users()) {
1564 Instruction *UI = cast<Instruction>(U);
1565 if (TheLoop->contains(UI))
1566 continue;
1567 LLVM_DEBUG(
1568 dbgs()
1569 << "LV: Cannot fold tail by masking, loop has an outside user for "
1570 << *UI << "\n");
1571 return false;
1572 }
1573 }
1574
1575 for (const auto &Entry : getInductionVars()) {
1576 PHINode *OrigPhi = Entry.first;
1577 for (User *U : OrigPhi->users()) {
1578 auto *UI = cast<Instruction>(U);
1579 if (!TheLoop->contains(UI)) {
1580 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1581 "outside user for "
1582 << *UI << "\n");
1583 return false;
1584 }
1585 }
1586 }
1587
1588 // The list of pointers that we can safely read and write to remains empty.
1589 SmallPtrSet<Value *, 8> SafePointers;
1590
1591 // Check all blocks for predication, including those that ordinarily do not
1592 // need predication such as the header block.
1594 for (BasicBlock *BB : TheLoop->blocks()) {
1595 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1596 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1597 return false;
1598 }
1599 }
1600
1601 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1602
1603 return true;
1604}
1605
1607 // The list of pointers that we can safely read and write to remains empty.
1608 SmallPtrSet<Value *, 8> SafePointers;
1609
1610 // Mark all blocks for predication, including those that ordinarily do not
1611 // need predication such as the header block.
1612 for (BasicBlock *BB : TheLoop->blocks()) {
1613 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1614 assert(R && "Must be able to predicate block when tail-folding.");
1615 }
1616}
1617
1618} // namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
#define DEBUG_TYPE
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
if(VerifyEach)
static bool rewrite(Function &F)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
Value * getPointerOperand()
Definition: Instructions.h:253
const LoopAccessInfo & getInfo(Loop &L)
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:632
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:151
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:502
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
size_type count(const KeyT &Key) const
Definition: MapVector.h:165
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
Root of the metadata hierarchy.
Definition: Metadata.h:62
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:347
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
bool empty() const
Definition: SmallVector.h:95
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
An instruction for storing to memory.
Definition: Instructions.h:290
Value * getPointerOperand()
Definition: Instructions.h:377
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:82
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:71
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:671
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: Loads.cpp:357
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:262
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1158
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2057
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
An object of this class is returned by queries that could not be answered.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.