LLVM 22.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
19#include "llvm/Analysis/Loads.h"
32
33using namespace llvm;
34using namespace PatternMatch;
35
36#define LV_NAME "loop-vectorize"
37#define DEBUG_TYPE LV_NAME
38
39static cl::opt<bool>
40 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
41 cl::desc("Enable if-conversion during vectorization."));
42
43static cl::opt<bool>
44AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
45 cl::desc("Enable recognition of non-constant strided "
46 "pointer induction variables."));
47
48static cl::opt<bool>
49 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
50 cl::desc("Allow enabling loop hints to reorder "
51 "FP operations during vectorization."));
52
53// TODO: Move size-based thresholds out of legality checking, make cost based
54// decisions instead of hard thresholds.
56 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
57 cl::desc("The maximum number of SCEV checks allowed."));
58
60 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
61 cl::desc("The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
63
66 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
68 cl::desc("Control whether the compiler can use scalable vectors to "
69 "vectorize a loop"),
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
81
83 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
84 cl::desc("Enables autovectorization of some loops containing histograms"));
85
86/// Maximum vectorization interleave count.
87static const unsigned MaxInterleaveFactor = 16;
88
89namespace llvm {
90
91bool LoopVectorizeHints::Hint::validate(unsigned Val) {
92 switch (Kind) {
93 case HK_WIDTH:
95 case HK_INTERLEAVE:
96 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
97 case HK_FORCE:
98 return (Val <= 1);
99 case HK_ISVECTORIZED:
100 case HK_PREDICATE:
101 case HK_SCALABLE:
102 return (Val == 0 || Val == 1);
103 }
104 return false;
105}
106
108 bool InterleaveOnlyWhenForced,
111 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force("vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
118 // Populate values with existing loop metadata.
119 getHintsFromMetadata();
120
121 // force-vector-interleave overrides DisableInterleaving.
124
125 // If the metadata doesn't explicitly specify whether to enable scalable
126 // vectorization, then decide based on the following criteria (increasing
127 // level of priority):
128 // - Target default
129 // - Metadata width
130 // - Force option (always overrides)
132 if (TTI)
133 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
135
136 if (Width.Value)
137 // If the width is set, but the metadata says nothing about the scalable
138 // property, then assume it concerns only a fixed-width UserVF.
139 // If width is not set, the flag takes precedence.
140 Scalable.Value = SK_FixedWidthOnly;
141 }
142
143 // If the flag is set to force any use of scalable vectors, override the loop
144 // hints.
145 if (ForceScalableVectorization.getValue() !=
147 Scalable.Value = ForceScalableVectorization.getValue();
148
149 // Scalable vectorization is disabled if no preference is specified.
151 Scalable.Value = SK_FixedWidthOnly;
152
153 if (IsVectorized.Value != 1)
154 // If the vectorization width and interleaving count are both 1 then
155 // consider the loop to have been already vectorized because there's
156 // nothing more that we can do.
157 IsVectorized.Value =
159 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
160 << "LV: Interleaving disabled by the pass manager\n");
161}
162
164 LLVMContext &Context = TheLoop->getHeader()->getContext();
165
166 MDNode *IsVectorizedMD = MDNode::get(
167 Context,
168 {MDString::get(Context, "llvm.loop.isvectorized"),
169 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
170 MDNode *LoopID = TheLoop->getLoopID();
171 MDNode *NewLoopID =
172 makePostTransformationMetadata(Context, LoopID,
173 {Twine(Prefix(), "vectorize.").str(),
174 Twine(Prefix(), "interleave.").str()},
175 {IsVectorizedMD});
176 TheLoop->setLoopID(NewLoopID);
177
178 // Update internal cache.
179 IsVectorized.Value = 1;
180}
181
183 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
187 return false;
188 }
189
190 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
191 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
193 return false;
194 }
195
196 if (getIsVectorized() == 1) {
197 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
198 // FIXME: Add interleave.disable metadata. This will allow
199 // vectorize.disable to be used without disabling the pass and errors
200 // to differentiate between disabled vectorization and a width of 1.
201 ORE.emit([&]() {
203 "AllDisabled", L->getStartLoc(),
204 L->getHeader())
205 << "loop not vectorized: vectorization and interleaving are "
206 "explicitly disabled, or the loop has already been "
207 "vectorized";
208 });
209 return false;
210 }
211
212 return true;
213}
214
216 using namespace ore;
217
218 ORE.emit([&]() {
219 if (Force.Value == LoopVectorizeHints::FK_Disabled)
220 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
221 TheLoop->getStartLoc(),
222 TheLoop->getHeader())
223 << "loop not vectorized: vectorization is explicitly disabled";
224
225 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
226 TheLoop->getHeader());
227 R << "loop not vectorized";
228 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
229 R << " (Force=" << NV("Force", true);
230 if (Width.Value != 0)
231 R << ", Vector Width=" << NV("VectorWidth", getWidth());
232 if (getInterleave() != 0)
233 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
234 R << ")";
235 }
236 return R;
237 });
238}
239
249
251 // Allow the vectorizer to change the order of operations if enabling
252 // loop hints are provided
253 ElementCount EC = getWidth();
254 return HintsAllowReordering &&
256 EC.getKnownMinValue() > 1);
257}
258
259void LoopVectorizeHints::getHintsFromMetadata() {
260 MDNode *LoopID = TheLoop->getLoopID();
261 if (!LoopID)
262 return;
263
264 // First operand should refer to the loop id itself.
265 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
266 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
267
268 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
269 const MDString *S = nullptr;
271
272 // The expected hint is either a MDString or a MDNode with the first
273 // operand a MDString.
274 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
275 if (!MD || MD->getNumOperands() == 0)
276 continue;
277 S = dyn_cast<MDString>(MD->getOperand(0));
278 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
279 Args.push_back(MD->getOperand(Idx));
280 } else {
281 S = dyn_cast<MDString>(MDO);
282 assert(Args.size() == 0 && "too many arguments for MDString");
283 }
284
285 if (!S)
286 continue;
287
288 // Check if the hint starts with the loop metadata prefix.
289 StringRef Name = S->getString();
290 if (Args.size() == 1)
291 setHint(Name, Args[0]);
292 }
293}
294
295void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
296 if (!Name.starts_with(Prefix()))
297 return;
298 Name = Name.substr(Prefix().size(), StringRef::npos);
299
300 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
301 if (!C)
302 return;
303 unsigned Val = C->getZExtValue();
304
305 Hint *Hints[] = {&Width, &Interleave, &Force,
306 &IsVectorized, &Predicate, &Scalable};
307 for (auto *H : Hints) {
308 if (Name == H->Name) {
309 if (H->validate(Val))
310 H->Value = Val;
311 else
312 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
313 break;
314 }
315 }
316}
317
318// Return true if the inner loop \p Lp is uniform with regard to the outer loop
319// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
320// executing the inner loop will execute the same iterations). This check is
321// very constrained for now but it will be relaxed in the future. \p Lp is
322// considered uniform if it meets all the following conditions:
323// 1) it has a canonical IV (starting from 0 and with stride 1),
324// 2) its latch terminator is a conditional branch and,
325// 3) its latch condition is a compare instruction whose operands are the
326// canonical IV and an OuterLp invariant.
327// This check doesn't take into account the uniformity of other conditions not
328// related to the loop latch because they don't affect the loop uniformity.
329//
330// NOTE: We decided to keep all these checks and its associated documentation
331// together so that we can easily have a picture of the current supported loop
332// nests. However, some of the current checks don't depend on \p OuterLp and
333// would be redundantly executed for each \p Lp if we invoked this function for
334// different candidate outer loops. This is not the case for now because we
335// don't currently have the infrastructure to evaluate multiple candidate outer
336// loops and \p OuterLp will be a fixed parameter while we only support explicit
337// outer loop vectorization. It's also very likely that these checks go away
338// before introducing the aforementioned infrastructure. However, if this is not
339// the case, we should move the \p OuterLp independent checks to a separate
340// function that is only executed once for each \p Lp.
341static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
342 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
343
344 // If Lp is the outer loop, it's uniform by definition.
345 if (Lp == OuterLp)
346 return true;
347 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
348
349 // 1.
351 if (!IV) {
352 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
353 return false;
354 }
355
356 // 2.
357 BasicBlock *Latch = Lp->getLoopLatch();
358 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
359 if (!LatchBr || LatchBr->isUnconditional()) {
360 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
361 return false;
362 }
363
364 // 3.
365 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
366 if (!LatchCmp) {
368 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
369 return false;
370 }
371
372 Value *CondOp0 = LatchCmp->getOperand(0);
373 Value *CondOp1 = LatchCmp->getOperand(1);
374 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
375 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
376 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
377 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
378 return false;
379 }
380
381 return true;
382}
383
384// Return true if \p Lp and all its nested loops are uniform with regard to \p
385// OuterLp.
386static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
387 if (!isUniformLoop(Lp, OuterLp))
388 return false;
389
390 // Check if nested loops are uniform.
391 for (Loop *SubLp : *Lp)
392 if (!isUniformLoopNest(SubLp, OuterLp))
393 return false;
394
395 return true;
396}
397
399 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
400
401 if (Ty->isPointerTy())
402 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
403
404 // It is possible that char's or short's overflow when we ask for the loop's
405 // trip count, work around this by changing the type size.
406 if (Ty->getScalarSizeInBits() < 32)
407 return Type::getInt32Ty(Ty->getContext());
408
409 return cast<IntegerType>(Ty);
410}
411
413 Type *Ty1) {
416 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
417}
418
419/// Check that the instruction has outside loop users and is not an
420/// identified reduction variable.
421static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
422 SmallPtrSetImpl<Value *> &AllowedExit) {
423 // Reductions, Inductions and non-header phis are allowed to have exit users. All
424 // other instructions must not have external users.
425 if (!AllowedExit.count(Inst))
426 // Check that all of the users of the loop are inside the BB.
427 for (User *U : Inst->users()) {
429 // This user may be a reduction exit value.
430 if (!TheLoop->contains(UI)) {
431 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
432 return true;
433 }
434 }
435 return false;
436}
437
438/// Returns true if A and B have same pointer operands or same SCEVs addresses
440 StoreInst *B) {
441 // Compare store
442 if (A == B)
443 return true;
444
445 // Otherwise Compare pointers
446 Value *APtr = A->getPointerOperand();
447 Value *BPtr = B->getPointerOperand();
448 if (APtr == BPtr)
449 return true;
450
451 // Otherwise compare address SCEVs
452 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
453}
454
456 Value *Ptr) const {
457 // FIXME: Currently, the set of symbolic strides is sometimes queried before
458 // it's collected. This happens from canVectorizeWithIfConvert, when the
459 // pointer is checked to reference consecutive elements suitable for a
460 // masked access.
461 const auto &Strides =
462 LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
463
464 bool CanAddPredicate = !llvm::shouldOptimizeForSize(
465 TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
466 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
467 CanAddPredicate, false).value_or(0);
468 if (Stride == 1 || Stride == -1)
469 return Stride;
470 return 0;
471}
472
474 return LAI->isInvariant(V);
475}
476
477namespace {
478/// A rewriter to build the SCEVs for each of the VF lanes in the expected
479/// vectorized loop, which can then be compared to detect their uniformity. This
480/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
481/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
482/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
483/// uniformity.
484class SCEVAddRecForUniformityRewriter
485 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
486 /// Multiplier to be applied to the step of AddRecs in TheLoop.
487 unsigned StepMultiplier;
488
489 /// Offset to be added to the AddRecs in TheLoop.
490 unsigned Offset;
491
492 /// Loop for which to rewrite AddRecsFor.
493 Loop *TheLoop;
494
495 /// Is any sub-expressions not analyzable w.r.t. uniformity?
496 bool CannotAnalyze = false;
497
498 bool canAnalyze() const { return !CannotAnalyze; }
499
500public:
501 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
502 unsigned Offset, Loop *TheLoop)
503 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
504 TheLoop(TheLoop) {}
505
506 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
507 assert(Expr->getLoop() == TheLoop &&
508 "addrec outside of TheLoop must be invariant and should have been "
509 "handled earlier");
510 // Build a new AddRec by multiplying the step by StepMultiplier and
511 // incrementing the start by Offset * step.
512 Type *Ty = Expr->getType();
513 const SCEV *Step = Expr->getStepRecurrence(SE);
514 if (!SE.isLoopInvariant(Step, TheLoop)) {
515 CannotAnalyze = true;
516 return Expr;
517 }
518 const SCEV *NewStep =
519 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
520 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
521 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
522 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
523 }
524
525 const SCEV *visit(const SCEV *S) {
526 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
527 return S;
529 }
530
531 const SCEV *visitUnknown(const SCEVUnknown *S) {
532 if (SE.isLoopInvariant(S, TheLoop))
533 return S;
534 // The value could vary across iterations.
535 CannotAnalyze = true;
536 return S;
537 }
538
539 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
540 // Could not analyze the expression.
541 CannotAnalyze = true;
542 return S;
543 }
544
545 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
546 unsigned StepMultiplier, unsigned Offset,
547 Loop *TheLoop) {
548 /// Bail out if the expression does not contain an UDiv expression.
549 /// Uniform values which are not loop invariant require operations to strip
550 /// out the lowest bits. For now just look for UDivs and use it to avoid
551 /// re-writing UDIV-free expressions for other lanes to limit compile time.
552 if (!SCEVExprContains(S,
553 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
554 return SE.getCouldNotCompute();
555
556 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
557 TheLoop);
558 const SCEV *Result = Rewriter.visit(S);
559
560 if (Rewriter.canAnalyze())
561 return Result;
562 return SE.getCouldNotCompute();
563 }
564};
565
566} // namespace
567
569 if (isInvariant(V))
570 return true;
571 if (VF.isScalable())
572 return false;
573 if (VF.isScalar())
574 return true;
575
576 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
577 // never considered uniform.
578 auto *SE = PSE.getSE();
579 if (!SE->isSCEVable(V->getType()))
580 return false;
581 const SCEV *S = SE->getSCEV(V);
582
583 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
584 // lane 0 matches the expressions for all other lanes.
585 unsigned FixedVF = VF.getKnownMinValue();
586 const SCEV *FirstLaneExpr =
587 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
588 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
589 return false;
590
591 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
592 // lane 0. We check lanes in reverse order for compile-time, as frequently
593 // checking the last lane is sufficient to rule out uniformity.
594 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
595 const SCEV *IthLaneExpr =
596 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
597 return FirstLaneExpr == IthLaneExpr;
598 });
599}
600
602 ElementCount VF) const {
604 if (!Ptr)
605 return false;
606 // Note: There's nothing inherent which prevents predicated loads and
607 // stores from being uniform. The current lowering simply doesn't handle
608 // it; in particular, the cost model distinguishes scatter/gather from
609 // scalar w/predication, and we currently rely on the scalar path.
610 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
611}
612
613bool LoopVectorizationLegality::canVectorizeOuterLoop() {
614 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
615 // Store the result and return it at the end instead of exiting early, in case
616 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
617 bool Result = true;
618 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
619
620 for (BasicBlock *BB : TheLoop->blocks()) {
621 // Check whether the BB terminator is a BranchInst. Any other terminator is
622 // not supported yet.
623 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
624 if (!Br) {
625 reportVectorizationFailure("Unsupported basic block terminator",
626 "loop control flow is not understood by vectorizer",
627 "CFGNotUnderstood", ORE, TheLoop);
628 if (DoExtraAnalysis)
629 Result = false;
630 else
631 return false;
632 }
633
634 // Check whether the BranchInst is a supported one. Only unconditional
635 // branches, conditional branches with an outer loop invariant condition or
636 // backedges are supported.
637 // FIXME: We skip these checks when VPlan predication is enabled as we
638 // want to allow divergent branches. This whole check will be removed
639 // once VPlan predication is on by default.
640 if (Br && Br->isConditional() &&
641 !TheLoop->isLoopInvariant(Br->getCondition()) &&
642 !LI->isLoopHeader(Br->getSuccessor(0)) &&
643 !LI->isLoopHeader(Br->getSuccessor(1))) {
644 reportVectorizationFailure("Unsupported conditional branch",
645 "loop control flow is not understood by vectorizer",
646 "CFGNotUnderstood", ORE, TheLoop);
647 if (DoExtraAnalysis)
648 Result = false;
649 else
650 return false;
651 }
652 }
653
654 // Check whether inner loops are uniform. At this point, we only support
655 // simple outer loops scenarios with uniform nested loops.
656 if (!isUniformLoopNest(TheLoop /*loop nest*/,
657 TheLoop /*context outer loop*/)) {
658 reportVectorizationFailure("Outer loop contains divergent loops",
659 "loop control flow is not understood by vectorizer",
660 "CFGNotUnderstood", ORE, TheLoop);
661 if (DoExtraAnalysis)
662 Result = false;
663 else
664 return false;
665 }
666
667 // Check whether we are able to set up outer loop induction.
668 if (!setupOuterLoopInductions()) {
669 reportVectorizationFailure("Unsupported outer loop Phi(s)",
670 "UnsupportedPhi", ORE, TheLoop);
671 if (DoExtraAnalysis)
672 Result = false;
673 else
674 return false;
675 }
676
677 return Result;
678}
679
680void LoopVectorizationLegality::addInductionPhi(
681 PHINode *Phi, const InductionDescriptor &ID,
682 SmallPtrSetImpl<Value *> &AllowedExit) {
683 Inductions[Phi] = ID;
684
685 // In case this induction also comes with casts that we know we can ignore
686 // in the vectorized loop body, record them here. All casts could be recorded
687 // here for ignoring, but suffices to record only the first (as it is the
688 // only one that may bw used outside the cast sequence).
689 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
690 if (!Casts.empty())
691 InductionCastsToIgnore.insert(*Casts.begin());
692
693 Type *PhiTy = Phi->getType();
694 const DataLayout &DL = Phi->getDataLayout();
695
696 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
697 "Expected int, ptr, or FP induction phi type");
698
699 // Get the widest type.
700 if (PhiTy->isIntOrPtrTy()) {
701 if (!WidestIndTy)
702 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
703 else
704 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
705 }
706
707 // Int inductions are special because we only allow one IV.
708 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
709 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
710 isa<Constant>(ID.getStartValue()) &&
711 cast<Constant>(ID.getStartValue())->isNullValue()) {
712
713 // Use the phi node with the widest type as induction. Use the last
714 // one if there are multiple (no good reason for doing this other
715 // than it is expedient). We've checked that it begins at zero and
716 // steps by one, so this is a canonical induction variable.
717 if (!PrimaryInduction || PhiTy == WidestIndTy)
718 PrimaryInduction = Phi;
719 }
720
721 // Both the PHI node itself, and the "post-increment" value feeding
722 // back into the PHI node may have external users.
723 // We can allow those uses, except if the SCEVs we have for them rely
724 // on predicates that only hold within the loop, since allowing the exit
725 // currently means re-using this SCEV outside the loop (see PR33706 for more
726 // details).
727 if (PSE.getPredicate().isAlwaysTrue()) {
728 AllowedExit.insert(Phi);
729 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
730 }
731
732 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
733}
734
735bool LoopVectorizationLegality::setupOuterLoopInductions() {
736 BasicBlock *Header = TheLoop->getHeader();
737
738 // Returns true if a given Phi is a supported induction.
739 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
740 InductionDescriptor ID;
741 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
743 addInductionPhi(&Phi, ID, AllowedExit);
744 return true;
745 }
746 // Bail out for any Phi in the outer loop header that is not a supported
747 // induction.
749 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
750 return false;
751 };
752
753 return llvm::all_of(Header->phis(), IsSupportedPhi);
754}
755
756/// Checks if a function is scalarizable according to the TLI, in
757/// the sense that it should be vectorized and then expanded in
758/// multiple scalar calls. This is represented in the
759/// TLI via mappings that do not specify a vector name, as in the
760/// following example:
761///
762/// const VecDesc VecIntrinsics[] = {
763/// {"llvm.phx.abs.i32", "", 4}
764/// };
765static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
766 const StringRef ScalarName = CI.getCalledFunction()->getName();
767 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
768 // Check that all known VFs are not associated to a vector
769 // function, i.e. the vector name is emty.
770 if (Scalarize) {
771 ElementCount WidestFixedVF, WidestScalableVF;
772 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
774 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
775 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
777 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
778 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
779 assert((WidestScalableVF.isZero() || !Scalarize) &&
780 "Caller may decide to scalarize a variant using a scalable VF");
781 }
782 return Scalarize;
783}
784
785/// Returns true if the call return type `Ty` can be widened by the loop
786/// vectorizer.
787static bool canWidenCallReturnType(Type *Ty) {
788 auto *StructTy = dyn_cast<StructType>(Ty);
789 // TODO: Remove the homogeneous types restriction. This is just an initial
790 // simplification. When we want to support things like the overflow intrinsics
791 // we will have to lift this restriction.
792 if (StructTy && !StructTy->containsHomogeneousTypes())
793 return false;
794 return canVectorizeTy(StructTy);
795}
796
797bool LoopVectorizationLegality::canVectorizeInstrs() {
798 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
799 bool Result = true;
800
801 // For each block in the loop.
802 for (BasicBlock *BB : TheLoop->blocks()) {
803 // Scan the instructions in the block and look for hazards.
804 for (Instruction &I : *BB) {
805 Result &= canVectorizeInstr(I);
806 if (!DoExtraAnalysis && !Result)
807 return false;
808 }
809 }
810
811 if (!PrimaryInduction) {
812 if (Inductions.empty()) {
814 "Did not find one integer induction var",
815 "loop induction variable could not be identified",
816 "NoInductionVariable", ORE, TheLoop);
817 return false;
818 }
819 if (!WidestIndTy) {
821 "Did not find one integer induction var",
822 "integer loop induction variable could not be identified",
823 "NoIntegerInductionVariable", ORE, TheLoop);
824 return false;
825 }
826 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
827 }
828
829 // Now we know the widest induction type, check if our found induction
830 // is the same size. If it's not, unset it here and InnerLoopVectorizer
831 // will create another.
832 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
833 PrimaryInduction = nullptr;
834
835 return Result;
836}
837
838bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
839 BasicBlock *BB = I.getParent();
840 BasicBlock *Header = TheLoop->getHeader();
841
842 if (auto *Phi = dyn_cast<PHINode>(&I)) {
843 Type *PhiTy = Phi->getType();
844 // Check that this PHI type is allowed.
845 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
846 !PhiTy->isPointerTy()) {
848 "Found a non-int non-pointer PHI",
849 "loop control flow is not understood by vectorizer",
850 "CFGNotUnderstood", ORE, TheLoop);
851 return false;
852 }
853
854 // If this PHINode is not in the header block, then we know that we
855 // can convert it to select during if-conversion. No need to check if
856 // the PHIs in this block are induction or reduction variables.
857 if (BB != Header) {
858 // Non-header phi nodes that have outside uses can be vectorized. Add
859 // them to the list of allowed exits.
860 // Unsafe cyclic dependencies with header phis are identified during
861 // legalization for reduction, induction and fixed order
862 // recurrences.
863 AllowedExit.insert(&I);
864 return true;
865 }
866
867 // We only allow if-converted PHIs with exactly two incoming values.
868 if (Phi->getNumIncomingValues() != 2) {
870 "Found an invalid PHI",
871 "loop control flow is not understood by vectorizer",
872 "CFGNotUnderstood", ORE, TheLoop, Phi);
873 return false;
874 }
875
876 RecurrenceDescriptor RedDes;
877 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
878 PSE.getSE())) {
879 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
880 AllowedExit.insert(RedDes.getLoopExitInstr());
881 Reductions[Phi] = RedDes;
882 return true;
883 }
884
885 // We prevent matching non-constant strided pointer IVS to preserve
886 // historical vectorizer behavior after a generalization of the
887 // IVDescriptor code. The intent is to remove this check, but we
888 // have to fix issues around code quality for such loops first.
889 auto IsDisallowedStridedPointerInduction =
890 [](const InductionDescriptor &ID) {
892 return false;
893 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
894 ID.getConstIntStepValue() == nullptr;
895 };
896
897 // TODO: Instead of recording the AllowedExit, it would be good to
898 // record the complementary set: NotAllowedExit. These include (but may
899 // not be limited to):
900 // 1. Reduction phis as they represent the one-before-last value, which
901 // is not available when vectorized
902 // 2. Induction phis and increment when SCEV predicates cannot be used
903 // outside the loop - see addInductionPhi
904 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
905 // outside the loop - see call to hasOutsideLoopUser in the non-phi
906 // handling below
907 // 4. FixedOrderRecurrence phis that can possibly be handled by
908 // extraction.
909 // By recording these, we can then reason about ways to vectorize each
910 // of these NotAllowedExit.
911 InductionDescriptor ID;
912 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
913 !IsDisallowedStridedPointerInduction(ID)) {
914 addInductionPhi(Phi, ID, AllowedExit);
915 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
916 return true;
917 }
918
919 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
920 AllowedExit.insert(Phi);
921 FixedOrderRecurrences.insert(Phi);
922 return true;
923 }
924
925 // As a last resort, coerce the PHI to a AddRec expression
926 // and re-try classifying it a an induction PHI.
927 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
928 !IsDisallowedStridedPointerInduction(ID)) {
929 addInductionPhi(Phi, ID, AllowedExit);
930 return true;
931 }
932
933 reportVectorizationFailure("Found an unidentified PHI",
934 "value that could not be identified as "
935 "reduction is used outside the loop",
936 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
937 Phi);
938 return false;
939 } // end of PHI handling
940
941 // We handle calls that:
942 // * Have a mapping to an IR intrinsic.
943 // * Have a vector version available.
944 auto *CI = dyn_cast<CallInst>(&I);
945
946 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
947 !(CI->getCalledFunction() && TLI &&
948 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
949 // If the call is a recognized math libary call, it is likely that
950 // we can vectorize it given loosened floating-point constraints.
952 bool IsMathLibCall =
953 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
954 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
955 TLI->hasOptimizedCodeGen(Func);
956
957 if (IsMathLibCall) {
958 // TODO: Ideally, we should not use clang-specific language here,
959 // but it's hard to provide meaningful yet generic advice.
960 // Also, should this be guarded by allowExtraAnalysis() and/or be part
961 // of the returned info from isFunctionVectorizable()?
963 "Found a non-intrinsic callsite",
964 "library call cannot be vectorized. "
965 "Try compiling with -fno-math-errno, -ffast-math, "
966 "or similar flags",
967 "CantVectorizeLibcall", ORE, TheLoop, CI);
968 } else {
969 reportVectorizationFailure("Found a non-intrinsic callsite",
970 "call instruction cannot be vectorized",
971 "CantVectorizeLibcall", ORE, TheLoop, CI);
972 }
973 return false;
974 }
975
976 // Some intrinsics have scalar arguments and should be same in order for
977 // them to be vectorized (i.e. loop invariant).
978 if (CI) {
979 auto *SE = PSE.getSE();
980 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
981 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
982 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
983 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
985 "Found unvectorizable intrinsic",
986 "intrinsic instruction cannot be vectorized",
987 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
988 return false;
989 }
990 }
991 }
992
993 // If we found a vectorized variant of a function, note that so LV can
994 // make better decisions about maximum VF.
995 if (CI && !VFDatabase::getMappings(*CI).empty())
996 VecCallVariantsFound = true;
997
998 auto CanWidenInstructionTy = [](Instruction const &Inst) {
999 Type *InstTy = Inst.getType();
1000 if (!isa<StructType>(InstTy))
1001 return canVectorizeTy(InstTy);
1002
1003 // For now, we only recognize struct values returned from calls where
1004 // all users are extractvalue as vectorizable. All element types of the
1005 // struct must be types that can be widened.
1006 return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
1007 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1008 };
1009
1010 // Check that the instruction return type is vectorizable.
1011 // We can't vectorize casts from vector type to scalar type.
1012 // Also, we can't vectorize extractelement instructions.
1013 if (!CanWidenInstructionTy(I) ||
1014 (isa<CastInst>(I) &&
1015 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1017 reportVectorizationFailure("Found unvectorizable type",
1018 "instruction return type cannot be vectorized",
1019 "CantVectorizeInstructionReturnType", ORE,
1020 TheLoop, &I);
1021 return false;
1022 }
1023
1024 // Check that the stored type is vectorizable.
1025 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1026 Type *T = ST->getValueOperand()->getType();
1028 reportVectorizationFailure("Store instruction cannot be vectorized",
1029 "CantVectorizeStore", ORE, TheLoop, ST);
1030 return false;
1031 }
1032
1033 // For nontemporal stores, check that a nontemporal vector version is
1034 // supported on the target.
1035 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1036 // Arbitrarily try a vector of 2 elements.
1037 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1038 assert(VecTy && "did not find vectorized version of stored type");
1039 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1041 "nontemporal store instruction cannot be vectorized",
1042 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1043 return false;
1044 }
1045 }
1046
1047 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1048 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1049 // For nontemporal loads, check that a nontemporal vector version is
1050 // supported on the target (arbitrarily try a vector of 2 elements).
1051 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1052 assert(VecTy && "did not find vectorized version of load type");
1053 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1055 "nontemporal load instruction cannot be vectorized",
1056 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1057 return false;
1058 }
1059 }
1060
1061 // FP instructions can allow unsafe algebra, thus vectorizable by
1062 // non-IEEE-754 compliant SIMD units.
1063 // This applies to floating-point math operations and calls, not memory
1064 // operations, shuffles, or casts, as they don't change precision or
1065 // semantics.
1066 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1067 !I.isFast()) {
1068 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1069 Hints->setPotentiallyUnsafe();
1070 }
1071
1072 // Reduction instructions are allowed to have exit users.
1073 // All other instructions must not have external users.
1074 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1075 // We can safely vectorize loops where instructions within the loop are
1076 // used outside the loop only if the SCEV predicates within the loop is
1077 // same as outside the loop. Allowing the exit means reusing the SCEV
1078 // outside the loop.
1079 if (PSE.getPredicate().isAlwaysTrue()) {
1080 AllowedExit.insert(&I);
1081 return true;
1082 }
1083 reportVectorizationFailure("Value cannot be used outside the loop",
1084 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1085 return false;
1086 }
1087
1088 return true;
1089}
1090
1091/// Find histogram operations that match high-level code in loops:
1092/// \code
1093/// buckets[indices[i]]+=step;
1094/// \endcode
1095///
1096/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1097/// array the computed histogram. It uses a BinOp to sum all counts, storing
1098/// them using a loop-variant index Load from the 'indices' input array.
1099///
1100/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1101/// regardless of hardware support. When there is support, it additionally
1102/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1103/// used to update histogram in \p HistogramPtrs.
1104static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1105 const PredicatedScalarEvolution &PSE,
1106 SmallVectorImpl<HistogramInfo> &Histograms) {
1107
1108 // Store value must come from a Binary Operation.
1109 Instruction *HPtrInstr = nullptr;
1110 BinaryOperator *HBinOp = nullptr;
1111 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1112 return false;
1113
1114 // BinOp must be an Add or a Sub modifying the bucket value by a
1115 // loop invariant amount.
1116 // FIXME: We assume the loop invariant term is on the RHS.
1117 // Fine for an immediate/constant, but maybe not a generic value?
1118 Value *HIncVal = nullptr;
1119 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1120 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1121 return false;
1122
1123 // Make sure the increment value is loop invariant.
1124 if (!TheLoop->isLoopInvariant(HIncVal))
1125 return false;
1126
1127 // The address to store is calculated through a GEP Instruction.
1129 if (!GEP)
1130 return false;
1131
1132 // Restrict address calculation to constant indices except for the last term.
1133 Value *HIdx = nullptr;
1134 for (Value *Index : GEP->indices()) {
1135 if (HIdx)
1136 return false;
1137 if (!isa<ConstantInt>(Index))
1138 HIdx = Index;
1139 }
1140
1141 if (!HIdx)
1142 return false;
1143
1144 // Check that the index is calculated by loading from another array. Ignore
1145 // any extensions.
1146 // FIXME: Support indices from other sources than a linear load from memory?
1147 // We're currently trying to match an operation looping over an array
1148 // of indices, but there could be additional levels of indirection
1149 // in place, or possibly some additional calculation to form the index
1150 // from the loaded data.
1151 Value *VPtrVal;
1152 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1153 return false;
1154
1155 // Make sure the index address varies in this loop, not an outer loop.
1156 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1157 if (!AR || AR->getLoop() != TheLoop)
1158 return false;
1159
1160 // Ensure we'll have the same mask by checking that all parts of the histogram
1161 // (gather load, update, scatter store) are in the same block.
1162 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1163 BasicBlock *LdBB = IndexedLoad->getParent();
1164 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1165 return false;
1166
1167 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1168
1169 // Store the operations that make up the histogram.
1170 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1171 return true;
1172}
1173
1174bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1175 // For now, we only support an IndirectUnsafe dependency that calculates
1176 // a histogram
1178 return false;
1179
1180 // Find a single IndirectUnsafe dependency.
1181 const MemoryDepChecker::Dependence *IUDep = nullptr;
1182 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1183 const auto *Deps = DepChecker.getDependences();
1184 // If there were too many dependences, LAA abandons recording them. We can't
1185 // proceed safely if we don't know what the dependences are.
1186 if (!Deps)
1187 return false;
1188
1189 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1190 // Ignore dependencies that are either known to be safe or can be
1191 // checked at runtime.
1194 continue;
1195
1196 // We're only interested in IndirectUnsafe dependencies here, where the
1197 // address might come from a load from memory. We also only want to handle
1198 // one such dependency, at least for now.
1199 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1200 return false;
1201
1202 IUDep = &Dep;
1203 }
1204 if (!IUDep)
1205 return false;
1206
1207 // For now only normal loads and stores are supported.
1208 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1209 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1210
1211 if (!LI || !SI)
1212 return false;
1213
1214 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1215 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1216}
1217
1218bool LoopVectorizationLegality::canVectorizeMemory() {
1219 LAI = &LAIs.getInfo(*TheLoop);
1220 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1221 if (LAR) {
1222 ORE->emit([&]() {
1223 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1224 "loop not vectorized: ", *LAR);
1225 });
1226 }
1227
1228 if (!LAI->canVectorizeMemory()) {
1231 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1232 "side effects",
1233 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1234 TheLoop);
1235 return false;
1236 }
1237
1238 return canVectorizeIndirectUnsafeDependences();
1239 }
1240
1241 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1242 reportVectorizationFailure("We don't allow storing to uniform addresses",
1243 "write to a loop invariant address could not "
1244 "be vectorized",
1245 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1246 TheLoop);
1247 return false;
1248 }
1249
1250 // We can vectorize stores to invariant address when final reduction value is
1251 // guaranteed to be stored at the end of the loop. Also, if decision to
1252 // vectorize loop is made, runtime checks are added so as to make sure that
1253 // invariant address won't alias with any other objects.
1254 if (!LAI->getStoresToInvariantAddresses().empty()) {
1255 // For each invariant address, check if last stored value is unconditional
1256 // and the address is not calculated inside the loop.
1257 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1259 continue;
1260
1261 if (blockNeedsPredication(SI->getParent())) {
1263 "We don't allow storing to uniform addresses",
1264 "write of conditional recurring variant value to a loop "
1265 "invariant address could not be vectorized",
1266 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1267 return false;
1268 }
1269
1270 // Invariant address should be defined outside of loop. LICM pass usually
1271 // makes sure it happens, but in rare cases it does not, we do not want
1272 // to overcomplicate vectorization to support this case.
1273 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1274 if (TheLoop->contains(Ptr)) {
1276 "Invariant address is calculated inside the loop",
1277 "write to a loop invariant address could not "
1278 "be vectorized",
1279 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1280 return false;
1281 }
1282 }
1283 }
1284
1285 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1286 // For each invariant address, check its last stored value is the result
1287 // of one of our reductions.
1288 //
1289 // We do not check if dependence with loads exists because that is already
1290 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1291 ScalarEvolution *SE = PSE.getSE();
1292 SmallVector<StoreInst *, 4> UnhandledStores;
1293 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1295 // Earlier stores to this address are effectively deadcode.
1296 // With opaque pointers it is possible for one pointer to be used with
1297 // different sizes of stored values:
1298 // store i32 0, ptr %x
1299 // store i8 0, ptr %x
1300 // The latest store doesn't complitely overwrite the first one in the
1301 // example. That is why we have to make sure that types of stored
1302 // values are same.
1303 // TODO: Check that bitwidth of unhandled store is smaller then the
1304 // one that overwrites it and add a test.
1305 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1306 return storeToSameAddress(SE, SI, I) &&
1307 I->getValueOperand()->getType() ==
1308 SI->getValueOperand()->getType();
1309 });
1310 continue;
1311 }
1312 UnhandledStores.push_back(SI);
1313 }
1314
1315 bool IsOK = UnhandledStores.empty();
1316 // TODO: we should also validate against InvariantMemSets.
1317 if (!IsOK) {
1319 "We don't allow storing to uniform addresses",
1320 "write to a loop invariant address could not "
1321 "be vectorized",
1322 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1323 return false;
1324 }
1325 }
1326 }
1327
1328 PSE.addPredicate(LAI->getPSE().getPredicate());
1329 return true;
1330}
1331
1333 bool EnableStrictReductions) {
1334
1335 // First check if there is any ExactFP math or if we allow reassociations
1336 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1337 return true;
1338
1339 // If the above is false, we have ExactFPMath & do not allow reordering.
1340 // If the EnableStrictReductions flag is set, first check if we have any
1341 // Exact FP induction vars, which we cannot vectorize.
1342 if (!EnableStrictReductions ||
1343 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1344 InductionDescriptor IndDesc = Induction.second;
1345 return IndDesc.getExactFPMathInst();
1346 }))
1347 return false;
1348
1349 // We can now only vectorize if all reductions with Exact FP math also
1350 // have the isOrdered flag set, which indicates that we can move the
1351 // reduction operations in-loop.
1352 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1353 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1354 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1355 }));
1356}
1357
1359 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1360 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1361 return RdxDesc.IntermediateStore == SI;
1362 });
1363}
1364
1366 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1367 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1368 if (!RdxDesc.IntermediateStore)
1369 return false;
1370
1371 ScalarEvolution *SE = PSE.getSE();
1372 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1373 return V == InvariantAddress ||
1374 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1375 });
1376}
1377
1379 Value *In0 = const_cast<Value *>(V);
1381 if (!PN)
1382 return false;
1383
1384 return Inductions.count(PN);
1385}
1386
1387const InductionDescriptor *
1389 if (!isInductionPhi(Phi))
1390 return nullptr;
1391 auto &ID = getInductionVars().find(Phi)->second;
1392 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1394 return &ID;
1395 return nullptr;
1396}
1397
1398const InductionDescriptor *
1400 if (!isInductionPhi(Phi))
1401 return nullptr;
1402 auto &ID = getInductionVars().find(Phi)->second;
1404 return &ID;
1405 return nullptr;
1406}
1407
1409 const Value *V) const {
1410 auto *Inst = dyn_cast<Instruction>(V);
1411 return (Inst && InductionCastsToIgnore.count(Inst));
1412}
1413
1417
1419 const PHINode *Phi) const {
1420 return FixedOrderRecurrences.count(Phi);
1421}
1422
1424 // When vectorizing early exits, create predicates for the latch block only.
1425 // The early exiting block must be a direct predecessor of the latch at the
1426 // moment.
1427 BasicBlock *Latch = TheLoop->getLoopLatch();
1429 assert(
1431 "Uncountable exiting block must be a direct predecessor of latch");
1432 return BB == Latch;
1433 }
1434 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1435}
1436
1437bool LoopVectorizationLegality::blockCanBePredicated(
1438 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1439 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1440 for (Instruction &I : *BB) {
1441 // We can predicate blocks with calls to assume, as long as we drop them in
1442 // case we flatten the CFG via predication.
1444 MaskedOp.insert(&I);
1445 continue;
1446 }
1447
1448 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1449 // TODO: there might be cases that it should block the vectorization. Let's
1450 // ignore those for now.
1452 continue;
1453
1454 // We can allow masked calls if there's at least one vector variant, even
1455 // if we end up scalarizing due to the cost model calculations.
1456 // TODO: Allow other calls if they have appropriate attributes... readonly
1457 // and argmemonly?
1458 if (CallInst *CI = dyn_cast<CallInst>(&I))
1460 MaskedOp.insert(CI);
1461 continue;
1462 }
1463
1464 // Loads are handled via masking (or speculated if safe to do so.)
1465 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1466 if (!SafePtrs.count(LI->getPointerOperand()))
1467 MaskedOp.insert(LI);
1468 continue;
1469 }
1470
1471 // Predicated store requires some form of masking:
1472 // 1) masked store HW instruction,
1473 // 2) emulation via load-blend-store (only if safe and legal to do so,
1474 // be aware on the race conditions), or
1475 // 3) element-by-element predicate check and scalar store.
1476 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1477 MaskedOp.insert(SI);
1478 continue;
1479 }
1480
1481 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1482 return false;
1483 }
1484
1485 return true;
1486}
1487
1488bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1489 if (!EnableIfConversion) {
1490 reportVectorizationFailure("If-conversion is disabled",
1491 "IfConversionDisabled", ORE, TheLoop);
1492 return false;
1493 }
1494
1495 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1496
1497 // A list of pointers which are known to be dereferenceable within scope of
1498 // the loop body for each iteration of the loop which executes. That is,
1499 // the memory pointed to can be dereferenced (with the access size implied by
1500 // the value's type) unconditionally within the loop header without
1501 // introducing a new fault.
1502 SmallPtrSet<Value *, 8> SafePointers;
1503
1504 // Collect safe addresses.
1505 for (BasicBlock *BB : TheLoop->blocks()) {
1506 if (!blockNeedsPredication(BB)) {
1507 for (Instruction &I : *BB)
1508 if (auto *Ptr = getLoadStorePointerOperand(&I))
1509 SafePointers.insert(Ptr);
1510 continue;
1511 }
1512
1513 // For a block which requires predication, a address may be safe to access
1514 // in the loop w/o predication if we can prove dereferenceability facts
1515 // sufficient to ensure it'll never fault within the loop. For the moment,
1516 // we restrict this to loads; stores are more complicated due to
1517 // concurrency restrictions.
1518 ScalarEvolution &SE = *PSE.getSE();
1520 for (Instruction &I : *BB) {
1521 LoadInst *LI = dyn_cast<LoadInst>(&I);
1522
1523 // Make sure we can execute all computations feeding into Ptr in the loop
1524 // w/o triggering UB and that none of the out-of-loop operands are poison.
1525 // We do not need to check if operations inside the loop can produce
1526 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1527 // because flags will be dropped when executing them unconditionally.
1528 // TODO: Results could be improved by considering poison-propagation
1529 // properties of visited ops.
1530 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1531 SmallVector<Value *> Worklist = {Ptr};
1532 SmallPtrSet<Value *, 4> Visited;
1533 while (!Worklist.empty()) {
1534 Value *CurrV = Worklist.pop_back_val();
1535 if (!Visited.insert(CurrV).second)
1536 continue;
1537
1538 auto *CurrI = dyn_cast<Instruction>(CurrV);
1539 if (!CurrI || !TheLoop->contains(CurrI)) {
1540 // If operands from outside the loop may be poison then Ptr may also
1541 // be poison.
1542 if (!isGuaranteedNotToBePoison(CurrV, AC,
1543 TheLoop->getLoopPredecessor()
1544 ->getTerminator()
1545 ->getIterator(),
1546 DT))
1547 return false;
1548 continue;
1549 }
1550
1551 // A loaded value may be poison, independent of any flags.
1552 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1553 return false;
1554
1555 // For other ops, assume poison can only be introduced via flags,
1556 // which can be dropped.
1557 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1558 return false;
1559 append_range(Worklist, CurrI->operands());
1560 }
1561 return true;
1562 };
1563 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1564 // that it will consider loops that need guarding by SCEV checks. The
1565 // vectoriser will generate these checks if we decide to vectorise.
1566 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1567 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1568 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1569 &Predicates))
1570 SafePointers.insert(LI->getPointerOperand());
1571 Predicates.clear();
1572 }
1573 }
1574
1575 // Collect the blocks that need predication.
1576 for (BasicBlock *BB : TheLoop->blocks()) {
1577 // We support only branches and switch statements as terminators inside the
1578 // loop.
1579 if (isa<SwitchInst>(BB->getTerminator())) {
1580 if (TheLoop->isLoopExiting(BB)) {
1581 reportVectorizationFailure("Loop contains an unsupported switch",
1582 "LoopContainsUnsupportedSwitch", ORE,
1583 TheLoop, BB->getTerminator());
1584 return false;
1585 }
1586 } else if (!isa<BranchInst>(BB->getTerminator())) {
1587 reportVectorizationFailure("Loop contains an unsupported terminator",
1588 "LoopContainsUnsupportedTerminator", ORE,
1589 TheLoop, BB->getTerminator());
1590 return false;
1591 }
1592
1593 // We must be able to predicate all blocks that need to be predicated.
1594 if (blockNeedsPredication(BB) &&
1595 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1597 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1598 ORE, TheLoop, BB->getTerminator());
1599 return false;
1600 }
1601 }
1602
1603 // We can if-convert this loop.
1604 return true;
1605}
1606
1607// Helper function to canVectorizeLoopNestCFG.
1608bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1609 bool UseVPlanNativePath) {
1610 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1611 "VPlan-native path is not enabled.");
1612
1613 // TODO: ORE should be improved to show more accurate information when an
1614 // outer loop can't be vectorized because a nested loop is not understood or
1615 // legal. Something like: "outer_loop_location: loop not vectorized:
1616 // (inner_loop_location) loop control flow is not understood by vectorizer".
1617
1618 // Store the result and return it at the end instead of exiting early, in case
1619 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1620 bool Result = true;
1621 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1622
1623 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1624 // be canonicalized.
1625 if (!Lp->getLoopPreheader()) {
1626 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1627 "loop control flow is not understood by vectorizer",
1628 "CFGNotUnderstood", ORE, TheLoop);
1629 if (DoExtraAnalysis)
1630 Result = false;
1631 else
1632 return false;
1633 }
1634
1635 // We must have a single backedge.
1636 if (Lp->getNumBackEdges() != 1) {
1637 reportVectorizationFailure("The loop must have a single backedge",
1638 "loop control flow is not understood by vectorizer",
1639 "CFGNotUnderstood", ORE, TheLoop);
1640 if (DoExtraAnalysis)
1641 Result = false;
1642 else
1643 return false;
1644 }
1645
1646 return Result;
1647}
1648
1649bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1650 Loop *Lp, bool UseVPlanNativePath) {
1651 // Store the result and return it at the end instead of exiting early, in case
1652 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1653 bool Result = true;
1654 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1655 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1656 if (DoExtraAnalysis)
1657 Result = false;
1658 else
1659 return false;
1660 }
1661
1662 // Recursively check whether the loop control flow of nested loops is
1663 // understood.
1664 for (Loop *SubLp : *Lp)
1665 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1666 if (DoExtraAnalysis)
1667 Result = false;
1668 else
1669 return false;
1670 }
1671
1672 return Result;
1673}
1674
1675bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1676 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1677 if (!LatchBB) {
1678 reportVectorizationFailure("Loop does not have a latch",
1679 "Cannot vectorize early exit loop",
1680 "NoLatchEarlyExit", ORE, TheLoop);
1681 return false;
1682 }
1683
1684 if (Reductions.size() || FixedOrderRecurrences.size()) {
1686 "Found reductions or recurrences in early-exit loop",
1687 "Cannot vectorize early exit loop with reductions or recurrences",
1688 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1689 return false;
1690 }
1691
1692 SmallVector<BasicBlock *, 8> ExitingBlocks;
1693 TheLoop->getExitingBlocks(ExitingBlocks);
1694
1695 // Keep a record of all the exiting blocks.
1697 BasicBlock *SingleUncountableExitingBlock = nullptr;
1698 for (BasicBlock *BB : ExitingBlocks) {
1699 const SCEV *EC =
1700 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1701 if (isa<SCEVCouldNotCompute>(EC)) {
1702 if (size(successors(BB)) != 2) {
1704 "Early exiting block does not have exactly two successors",
1705 "Incorrect number of successors from early exiting block",
1706 "EarlyExitTooManySuccessors", ORE, TheLoop);
1707 return false;
1708 }
1709
1710 if (SingleUncountableExitingBlock) {
1712 "Loop has too many uncountable exits",
1713 "Cannot vectorize early exit loop with more than one early exit",
1714 "TooManyUncountableEarlyExits", ORE, TheLoop);
1715 return false;
1716 }
1717
1718 SingleUncountableExitingBlock = BB;
1719 } else
1720 CountableExitingBlocks.push_back(BB);
1721 }
1722 // We can safely ignore the predicates here because when vectorizing the loop
1723 // the PredicatatedScalarEvolution class will keep track of all predicates
1724 // for each exiting block anyway. This happens when calling
1725 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1726 Predicates.clear();
1727
1728 if (!SingleUncountableExitingBlock) {
1729 LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits");
1730 return false;
1731 }
1732
1733 // The only supported early exit loops so far are ones where the early
1734 // exiting block is a unique predecessor of the latch block.
1735 BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1736 if (LatchPredBB != SingleUncountableExitingBlock) {
1737 reportVectorizationFailure("Early exit is not the latch predecessor",
1738 "Cannot vectorize early exit loop",
1739 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1740 return false;
1741 }
1742
1743 // The latch block must have a countable exit.
1745 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1747 "Cannot determine exact exit count for latch block",
1748 "Cannot vectorize early exit loop",
1749 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1750 return false;
1751 }
1752 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1753 "Latch block not found in list of countable exits!");
1754
1755 // Check to see if there are instructions that could potentially generate
1756 // exceptions or have side-effects.
1757 auto IsSafeOperation = [](Instruction *I) -> bool {
1758 switch (I->getOpcode()) {
1759 case Instruction::Load:
1760 case Instruction::Store:
1761 case Instruction::PHI:
1762 case Instruction::Br:
1763 // These are checked separately.
1764 return true;
1765 default:
1767 }
1768 };
1769
1770 bool HasSideEffects = false;
1771 for (auto *BB : TheLoop->blocks())
1772 for (auto &I : *BB) {
1773 if (I.mayWriteToMemory()) {
1774 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1775 HasSideEffects = true;
1776 continue;
1777 }
1778
1779 // We don't support complex writes to memory.
1781 "Complex writes to memory unsupported in early exit loops",
1782 "Cannot vectorize early exit loop with complex writes to memory",
1783 "WritesInEarlyExitLoop", ORE, TheLoop);
1784 return false;
1785 }
1786
1787 if (!IsSafeOperation(&I)) {
1788 reportVectorizationFailure("Early exit loop contains operations that "
1789 "cannot be speculatively executed",
1790 "UnsafeOperationsEarlyExitLoop", ORE,
1791 TheLoop);
1792 return false;
1793 }
1794 }
1795
1796 // The vectoriser cannot handle loads that occur after the early exit block.
1797 assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
1798 "Expected latch predecessor to be the early exiting block");
1799
1800 SmallVector<LoadInst *, 4> NonDerefLoads;
1801 // TODO: Handle loops that may fault.
1802 if (!HasSideEffects) {
1803 // Read-only loop.
1804 Predicates.clear();
1805 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1806 &Predicates)) {
1808 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1809 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1810 return false;
1811 }
1812 } else if (!canUncountableExitConditionLoadBeMoved(
1813 SingleUncountableExitingBlock))
1814 return false;
1815
1816 // Check non-dereferenceable loads if any.
1817 for (LoadInst *LI : NonDerefLoads) {
1818 // Only support unit-stride access for now.
1819 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1820 if (Stride != 1) {
1822 "Loop contains potentially faulting strided load",
1823 "Cannot vectorize early exit loop with "
1824 "strided fault-only-first load",
1825 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1826 return false;
1827 }
1828 PotentiallyFaultingLoads.insert(LI);
1829 LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
1830 << "\n");
1831 }
1832
1833 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1834 PSE.getSymbolicMaxBackedgeTakenCount();
1835 // Since we have an exact exit count for the latch and the early exit
1836 // dominates the latch, then this should guarantee a computed SCEV value.
1837 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1838 "Failed to get symbolic expression for backedge taken count");
1839 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1840 "backedge taken count: "
1841 << *SymbolicMaxBTC << '\n');
1842 UncountableExitingBB = SingleUncountableExitingBlock;
1843 UncountableExitWithSideEffects = HasSideEffects;
1844 return true;
1845}
1846
1847bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1848 BasicBlock *ExitingBlock) {
1849 // Try to find a load in the critical path for the uncountable exit condition.
1850 // This is currently matching about the simplest form we can, expecting
1851 // only one in-loop load, the result of which is directly compared against
1852 // a loop-invariant value.
1853 // FIXME: We're insisting on a single use for now, because otherwise we will
1854 // need to make PHI nodes for other users. That can be done once the initial
1855 // transform code lands.
1856 auto *Br = cast<BranchInst>(ExitingBlock->getTerminator());
1857
1858 using namespace llvm::PatternMatch;
1859 Instruction *L = nullptr;
1860 Value *Ptr = nullptr;
1861 Value *R = nullptr;
1862 if (!match(Br->getCondition(),
1864 m_Value(R))))) {
1866 "Early exit loop with store but no supported condition load",
1867 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1868 return false;
1869 }
1870
1871 // FIXME: Don't rely on operand ordering for the comparison.
1872 if (!TheLoop->isLoopInvariant(R)) {
1874 "Early exit loop with store but no supported condition load",
1875 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1876 return false;
1877 }
1878
1879 // Make sure that the load address is not loop invariant; we want an
1880 // address calculation that we can rotate to the next vector iteration.
1881 const SCEV *PtrScev = PSE.getSE()->getSCEV(Ptr);
1882 if (!isa<SCEVAddRecExpr>(PtrScev)) {
1884 "Uncountable exit condition depends on load with an address that is "
1885 "not an add recurrence",
1886 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1887 return false;
1888 }
1889
1890 // FIXME: Support gathers after first-faulting load support lands.
1892 LoadInst *Load = cast<LoadInst>(L);
1893 if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
1894 &Predicates)) {
1896 "Loop may fault",
1897 "Cannot vectorize potentially faulting early exit loop",
1898 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1899 return false;
1900 }
1901
1902 ICFLoopSafetyInfo SafetyInfo;
1903 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1904 // We need to know that load will be executed before we can hoist a
1905 // copy out to run just before the first iteration.
1906 // FIXME: Currently, other restrictions prevent us from reaching this point
1907 // with a loop where the uncountable exit condition is determined
1908 // by a conditional load.
1909 assert(SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop) &&
1910 "Unhandled control flow in uncountable exit loop with side effects");
1911
1912 // Prohibit any potential aliasing with any instruction in the loop which
1913 // might store to memory.
1914 // FIXME: Relax this constraint where possible.
1915 for (auto *BB : TheLoop->blocks()) {
1916 for (auto &I : *BB) {
1917 if (&I == Load)
1918 continue;
1919
1920 if (I.mayWriteToMemory()) {
1921 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1922 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1923 if (AR == AliasResult::NoAlias)
1924 continue;
1925 }
1926
1928 "Cannot determine whether critical uncountable exit load address "
1929 "does not alias with a memory write",
1930 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1931 return false;
1932 }
1933 }
1934 }
1935
1936 return true;
1937}
1938
1939bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1940 // Store the result and return it at the end instead of exiting early, in case
1941 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1942 bool Result = true;
1943
1944 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1945 // Check whether the loop-related control flow in the loop nest is expected by
1946 // vectorizer.
1947 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1948 if (DoExtraAnalysis) {
1949 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1950 Result = false;
1951 } else {
1952 return false;
1953 }
1954 }
1955
1956 // We need to have a loop header.
1957 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1958 << '\n');
1959
1960 // Specific checks for outer loops. We skip the remaining legal checks at this
1961 // point because they don't support outer loops.
1962 if (!TheLoop->isInnermost()) {
1963 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1964
1965 if (!canVectorizeOuterLoop()) {
1966 reportVectorizationFailure("Unsupported outer loop",
1967 "UnsupportedOuterLoop", ORE, TheLoop);
1968 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1969 // outer loops.
1970 return false;
1971 }
1972
1973 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1974 return Result;
1975 }
1976
1977 assert(TheLoop->isInnermost() && "Inner loop expected.");
1978 // Check if we can if-convert non-single-bb loops.
1979 unsigned NumBlocks = TheLoop->getNumBlocks();
1980 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1981 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1982 if (DoExtraAnalysis)
1983 Result = false;
1984 else
1985 return false;
1986 }
1987
1988 // Check if we can vectorize the instructions and CFG in this loop.
1989 if (!canVectorizeInstrs()) {
1990 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1991 if (DoExtraAnalysis)
1992 Result = false;
1993 else
1994 return false;
1995 }
1996
1997 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1998 if (TheLoop->getExitingBlock()) {
1999 reportVectorizationFailure("Cannot vectorize uncountable loop",
2000 "UnsupportedUncountableLoop", ORE, TheLoop);
2001 if (DoExtraAnalysis)
2002 Result = false;
2003 else
2004 return false;
2005 } else {
2006 if (!isVectorizableEarlyExitLoop()) {
2009 "Must be false without vectorizable early-exit loop");
2010 if (DoExtraAnalysis)
2011 Result = false;
2012 else
2013 return false;
2014 }
2015 }
2016 }
2017
2018 // Go over each instruction and look at memory deps.
2019 if (!canVectorizeMemory()) {
2020 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
2021 if (DoExtraAnalysis)
2022 Result = false;
2023 else
2024 return false;
2025 }
2026
2027 // Bail out for state-changing loops with uncountable exits for now.
2028 if (UncountableExitWithSideEffects) {
2030 "Writes to memory unsupported in early exit loops",
2031 "Cannot vectorize early exit loop with writes to memory",
2032 "WritesInEarlyExitLoop", ORE, TheLoop);
2033 return false;
2034 }
2035
2036 if (Result) {
2037 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2038 << (LAI->getRuntimePointerChecking()->Need
2039 ? " (with a runtime bound check)"
2040 : "")
2041 << "!\n");
2042 }
2043
2044 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
2045 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
2046 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
2047
2048 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2049 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2050 "due to SCEVThreshold");
2051 reportVectorizationFailure("Too many SCEV checks needed",
2052 "Too many SCEV assumptions need to be made and checked at runtime",
2053 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2054 if (DoExtraAnalysis)
2055 Result = false;
2056 else
2057 return false;
2058 }
2059
2060 // Okay! We've done all the tests. If any have failed, return false. Otherwise
2061 // we can vectorize, and at this point we don't have any other mem analysis
2062 // which may limit our maximum vectorization factor, so just return true with
2063 // no restrictions.
2064 return Result;
2065}
2066
2068 // The only loops we can vectorize without a scalar epilogue, are loops with
2069 // a bottom-test and a single exiting block. We'd have to handle the fact
2070 // that not every instruction executes on the last iteration. This will
2071 // require a lane mask which varies through the vector loop body. (TODO)
2072 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2073 LLVM_DEBUG(
2074 dbgs()
2075 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2076 return false;
2077 }
2078
2079 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2080
2081 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
2082
2083 for (const auto &Reduction : getReductionVars())
2084 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
2085
2086 // TODO: handle non-reduction outside users when tail is folded by masking.
2087 for (auto *AE : AllowedExit) {
2088 // Check that all users of allowed exit values are inside the loop or
2089 // are the live-out of a reduction.
2090 if (ReductionLiveOuts.count(AE))
2091 continue;
2092 for (User *U : AE->users()) {
2094 if (TheLoop->contains(UI))
2095 continue;
2096 LLVM_DEBUG(
2097 dbgs()
2098 << "LV: Cannot fold tail by masking, loop has an outside user for "
2099 << *UI << "\n");
2100 return false;
2101 }
2102 }
2103
2104 for (const auto &Entry : getInductionVars()) {
2105 PHINode *OrigPhi = Entry.first;
2106 for (User *U : OrigPhi->users()) {
2107 auto *UI = cast<Instruction>(U);
2108 if (!TheLoop->contains(UI)) {
2109 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
2110 "outside user for "
2111 << *UI << "\n");
2112 return false;
2113 }
2114 }
2115 }
2116
2117 // The list of pointers that we can safely read and write to remains empty.
2118 SmallPtrSet<Value *, 8> SafePointers;
2119
2120 // Check all blocks for predication, including those that ordinarily do not
2121 // need predication such as the header block.
2123 for (BasicBlock *BB : TheLoop->blocks()) {
2124 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2125 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2126 return false;
2127 }
2128 }
2129
2130 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2131
2132 return true;
2133}
2134
2136 // The list of pointers that we can safely read and write to remains empty.
2137 SmallPtrSet<Value *, 8> SafePointers;
2138
2139 // Mark all blocks for predication, including those that ordinarily do not
2140 // need predication such as the header block.
2141 for (BasicBlock *BB : TheLoop->blocks()) {
2142 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
2143 assert(R && "Must be able to predicate block when tail-folding.");
2144 }
2145}
2146
2147} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
@ NoAlias
The two locations do not alias at all.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:535
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has exactly one uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block, if there is exactly one.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V, bool HasCoroSuspendInst=false) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:163
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:514
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1443
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1451
Tracking metadata reference owned by Metadata.
Definition Metadata.h:899
A single uniqued string.
Definition Metadata.h:720
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:617
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:607
iterator find(const KeyT &Key)
Definition MapVector.h:141
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:63
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:74
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
const ParentTy * getParent() const
Definition ilist_node.h:34
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:694
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1685
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:416
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
auto reverse(ContainerTy &&C)
Definition STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:863
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2122
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:836
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.