LLVM 22.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
19#include "llvm/Analysis/Loads.h"
32
33using namespace llvm;
34using namespace PatternMatch;
35
36#define LV_NAME "loop-vectorize"
37#define DEBUG_TYPE LV_NAME
38
39static cl::opt<bool>
40 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
41 cl::desc("Enable if-conversion during vectorization."));
42
43static cl::opt<bool>
44AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
45 cl::desc("Enable recognition of non-constant strided "
46 "pointer induction variables."));
47
48static cl::opt<bool>
49 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
50 cl::desc("Allow enabling loop hints to reorder "
51 "FP operations during vectorization."));
52
53// TODO: Move size-based thresholds out of legality checking, make cost based
54// decisions instead of hard thresholds.
56 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
57 cl::desc("The maximum number of SCEV checks allowed."));
58
60 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
61 cl::desc("The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
63
66 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
68 cl::desc("Control whether the compiler can use scalable vectors to "
69 "vectorize a loop"),
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
81
83 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
84 cl::desc("Enables autovectorization of some loops containing histograms"));
85
86/// Maximum vectorization interleave count.
87static const unsigned MaxInterleaveFactor = 16;
88
89namespace llvm {
90
91bool LoopVectorizeHints::Hint::validate(unsigned Val) {
92 switch (Kind) {
93 case HK_WIDTH:
95 case HK_INTERLEAVE:
96 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
97 case HK_FORCE:
98 return (Val <= 1);
99 case HK_ISVECTORIZED:
100 case HK_PREDICATE:
101 case HK_SCALABLE:
102 return (Val == 0 || Val == 1);
103 }
104 return false;
105}
106
108 bool InterleaveOnlyWhenForced,
111 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force("vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
118 // Populate values with existing loop metadata.
119 getHintsFromMetadata();
120
121 // force-vector-interleave overrides DisableInterleaving.
124
125 // If the metadata doesn't explicitly specify whether to enable scalable
126 // vectorization, then decide based on the following criteria (increasing
127 // level of priority):
128 // - Target default
129 // - Metadata width
130 // - Force option (always overrides)
132 if (TTI)
133 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
135
136 if (Width.Value)
137 // If the width is set, but the metadata says nothing about the scalable
138 // property, then assume it concerns only a fixed-width UserVF.
139 // If width is not set, the flag takes precedence.
140 Scalable.Value = SK_FixedWidthOnly;
141 }
142
143 // If the flag is set to force any use of scalable vectors, override the loop
144 // hints.
145 if (ForceScalableVectorization.getValue() !=
147 Scalable.Value = ForceScalableVectorization.getValue();
148
149 // Scalable vectorization is disabled if no preference is specified.
151 Scalable.Value = SK_FixedWidthOnly;
152
153 if (IsVectorized.Value != 1)
154 // If the vectorization width and interleaving count are both 1 then
155 // consider the loop to have been already vectorized because there's
156 // nothing more that we can do.
157 IsVectorized.Value =
159 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
160 << "LV: Interleaving disabled by the pass manager\n");
161}
162
164 LLVMContext &Context = TheLoop->getHeader()->getContext();
165
166 MDNode *IsVectorizedMD = MDNode::get(
167 Context,
168 {MDString::get(Context, "llvm.loop.isvectorized"),
169 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
170 MDNode *LoopID = TheLoop->getLoopID();
171 MDNode *NewLoopID =
172 makePostTransformationMetadata(Context, LoopID,
173 {Twine(Prefix(), "vectorize.").str(),
174 Twine(Prefix(), "interleave.").str()},
175 {IsVectorizedMD});
176 TheLoop->setLoopID(NewLoopID);
177
178 // Update internal cache.
179 IsVectorized.Value = 1;
180}
181
183 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
187 return false;
188 }
189
190 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
191 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
193 return false;
194 }
195
196 if (getIsVectorized() == 1) {
197 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
198 // FIXME: Add interleave.disable metadata. This will allow
199 // vectorize.disable to be used without disabling the pass and errors
200 // to differentiate between disabled vectorization and a width of 1.
201 ORE.emit([&]() {
203 "AllDisabled", L->getStartLoc(),
204 L->getHeader())
205 << "loop not vectorized: vectorization and interleaving are "
206 "explicitly disabled, or the loop has already been "
207 "vectorized";
208 });
209 return false;
210 }
211
212 return true;
213}
214
216 using namespace ore;
217
218 ORE.emit([&]() {
219 if (Force.Value == LoopVectorizeHints::FK_Disabled)
220 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
221 TheLoop->getStartLoc(),
222 TheLoop->getHeader())
223 << "loop not vectorized: vectorization is explicitly disabled";
224
225 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
226 TheLoop->getHeader());
227 R << "loop not vectorized";
228 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
229 R << " (Force=" << NV("Force", true);
230 if (Width.Value != 0)
231 R << ", Vector Width=" << NV("VectorWidth", getWidth());
232 if (getInterleave() != 0)
233 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
234 R << ")";
235 }
236 return R;
237 });
238}
239
249
251 // Allow the vectorizer to change the order of operations if enabling
252 // loop hints are provided
253 ElementCount EC = getWidth();
254 return HintsAllowReordering &&
256 EC.getKnownMinValue() > 1);
257}
258
259void LoopVectorizeHints::getHintsFromMetadata() {
260 MDNode *LoopID = TheLoop->getLoopID();
261 if (!LoopID)
262 return;
263
264 // First operand should refer to the loop id itself.
265 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
266 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
267
268 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
269 const MDString *S = nullptr;
271
272 // The expected hint is either a MDString or a MDNode with the first
273 // operand a MDString.
274 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
275 if (!MD || MD->getNumOperands() == 0)
276 continue;
277 S = dyn_cast<MDString>(MD->getOperand(0));
278 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
279 Args.push_back(MD->getOperand(Idx));
280 } else {
281 S = dyn_cast<MDString>(MDO);
282 assert(Args.size() == 0 && "too many arguments for MDString");
283 }
284
285 if (!S)
286 continue;
287
288 // Check if the hint starts with the loop metadata prefix.
289 StringRef Name = S->getString();
290 if (Args.size() == 1)
291 setHint(Name, Args[0]);
292 }
293}
294
295void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
296 if (!Name.consume_front(Prefix()))
297 return;
298
299 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
300 if (!C)
301 return;
302 unsigned Val = C->getZExtValue();
303
304 Hint *Hints[] = {&Width, &Interleave, &Force,
305 &IsVectorized, &Predicate, &Scalable};
306 for (auto *H : Hints) {
307 if (Name == H->Name) {
308 if (H->validate(Val))
309 H->Value = Val;
310 else
311 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
312 break;
313 }
314 }
315}
316
317// Return true if the inner loop \p Lp is uniform with regard to the outer loop
318// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
319// executing the inner loop will execute the same iterations). This check is
320// very constrained for now but it will be relaxed in the future. \p Lp is
321// considered uniform if it meets all the following conditions:
322// 1) it has a canonical IV (starting from 0 and with stride 1),
323// 2) its latch terminator is a conditional branch and,
324// 3) its latch condition is a compare instruction whose operands are the
325// canonical IV and an OuterLp invariant.
326// This check doesn't take into account the uniformity of other conditions not
327// related to the loop latch because they don't affect the loop uniformity.
328//
329// NOTE: We decided to keep all these checks and its associated documentation
330// together so that we can easily have a picture of the current supported loop
331// nests. However, some of the current checks don't depend on \p OuterLp and
332// would be redundantly executed for each \p Lp if we invoked this function for
333// different candidate outer loops. This is not the case for now because we
334// don't currently have the infrastructure to evaluate multiple candidate outer
335// loops and \p OuterLp will be a fixed parameter while we only support explicit
336// outer loop vectorization. It's also very likely that these checks go away
337// before introducing the aforementioned infrastructure. However, if this is not
338// the case, we should move the \p OuterLp independent checks to a separate
339// function that is only executed once for each \p Lp.
340static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
341 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
342
343 // If Lp is the outer loop, it's uniform by definition.
344 if (Lp == OuterLp)
345 return true;
346 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
347
348 // 1.
350 if (!IV) {
351 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
352 return false;
353 }
354
355 // 2.
356 BasicBlock *Latch = Lp->getLoopLatch();
357 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
358 if (!LatchBr || LatchBr->isUnconditional()) {
359 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
360 return false;
361 }
362
363 // 3.
364 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
365 if (!LatchCmp) {
367 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
368 return false;
369 }
370
371 Value *CondOp0 = LatchCmp->getOperand(0);
372 Value *CondOp1 = LatchCmp->getOperand(1);
373 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
374 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
375 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
376 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
377 return false;
378 }
379
380 return true;
381}
382
383// Return true if \p Lp and all its nested loops are uniform with regard to \p
384// OuterLp.
385static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
386 if (!isUniformLoop(Lp, OuterLp))
387 return false;
388
389 // Check if nested loops are uniform.
390 for (Loop *SubLp : *Lp)
391 if (!isUniformLoopNest(SubLp, OuterLp))
392 return false;
393
394 return true;
395}
396
398 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
399
400 if (Ty->isPointerTy())
401 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
402
403 // It is possible that char's or short's overflow when we ask for the loop's
404 // trip count, work around this by changing the type size.
405 if (Ty->getScalarSizeInBits() < 32)
406 return Type::getInt32Ty(Ty->getContext());
407
408 return cast<IntegerType>(Ty);
409}
410
412 Type *Ty1) {
415 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
416}
417
418/// Check that the instruction has outside loop users and is not an
419/// identified reduction variable.
420static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
421 SmallPtrSetImpl<Value *> &AllowedExit) {
422 // Reductions, Inductions and non-header phis are allowed to have exit users. All
423 // other instructions must not have external users.
424 if (!AllowedExit.count(Inst))
425 // Check that all of the users of the loop are inside the BB.
426 for (User *U : Inst->users()) {
428 // This user may be a reduction exit value.
429 if (!TheLoop->contains(UI)) {
430 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
431 return true;
432 }
433 }
434 return false;
435}
436
437/// Returns true if A and B have same pointer operands or same SCEVs addresses
439 StoreInst *B) {
440 // Compare store
441 if (A == B)
442 return true;
443
444 // Otherwise Compare pointers
445 Value *APtr = A->getPointerOperand();
446 Value *BPtr = B->getPointerOperand();
447 if (APtr == BPtr)
448 return true;
449
450 // Otherwise compare address SCEVs
451 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
452}
453
455 Value *Ptr) const {
456 // FIXME: Currently, the set of symbolic strides is sometimes queried before
457 // it's collected. This happens from canVectorizeWithIfConvert, when the
458 // pointer is checked to reference consecutive elements suitable for a
459 // masked access.
460 const auto &Strides =
461 LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
462
463 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
464 AllowRuntimeSCEVChecks, false)
465 .value_or(0);
466 if (Stride == 1 || Stride == -1)
467 return Stride;
468 return 0;
469}
470
472 return LAI->isInvariant(V);
473}
474
475namespace {
476/// A rewriter to build the SCEVs for each of the VF lanes in the expected
477/// vectorized loop, which can then be compared to detect their uniformity. This
478/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
479/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
480/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
481/// uniformity.
482class SCEVAddRecForUniformityRewriter
483 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
484 /// Multiplier to be applied to the step of AddRecs in TheLoop.
485 unsigned StepMultiplier;
486
487 /// Offset to be added to the AddRecs in TheLoop.
488 unsigned Offset;
489
490 /// Loop for which to rewrite AddRecsFor.
491 Loop *TheLoop;
492
493 /// Is any sub-expressions not analyzable w.r.t. uniformity?
494 bool CannotAnalyze = false;
495
496 bool canAnalyze() const { return !CannotAnalyze; }
497
498public:
499 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
500 unsigned Offset, Loop *TheLoop)
501 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
502 TheLoop(TheLoop) {}
503
504 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
505 assert(Expr->getLoop() == TheLoop &&
506 "addrec outside of TheLoop must be invariant and should have been "
507 "handled earlier");
508 // Build a new AddRec by multiplying the step by StepMultiplier and
509 // incrementing the start by Offset * step.
510 Type *Ty = Expr->getType();
511 const SCEV *Step = Expr->getStepRecurrence(SE);
512 if (!SE.isLoopInvariant(Step, TheLoop)) {
513 CannotAnalyze = true;
514 return Expr;
515 }
516 const SCEV *NewStep =
517 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
518 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
519 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
520 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
521 }
522
523 const SCEV *visit(const SCEV *S) {
524 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
525 return S;
527 }
528
529 const SCEV *visitUnknown(const SCEVUnknown *S) {
530 if (SE.isLoopInvariant(S, TheLoop))
531 return S;
532 // The value could vary across iterations.
533 CannotAnalyze = true;
534 return S;
535 }
536
537 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
538 // Could not analyze the expression.
539 CannotAnalyze = true;
540 return S;
541 }
542
543 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
544 unsigned StepMultiplier, unsigned Offset,
545 Loop *TheLoop) {
546 /// Bail out if the expression does not contain an UDiv expression.
547 /// Uniform values which are not loop invariant require operations to strip
548 /// out the lowest bits. For now just look for UDivs and use it to avoid
549 /// re-writing UDIV-free expressions for other lanes to limit compile time.
550 if (!SCEVExprContains(S,
551 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
552 return SE.getCouldNotCompute();
553
554 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
555 TheLoop);
556 const SCEV *Result = Rewriter.visit(S);
557
558 if (Rewriter.canAnalyze())
559 return Result;
560 return SE.getCouldNotCompute();
561 }
562};
563
564} // namespace
565
567 if (isInvariant(V))
568 return true;
569 if (VF.isScalable())
570 return false;
571 if (VF.isScalar())
572 return true;
573
574 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
575 // never considered uniform.
576 auto *SE = PSE.getSE();
577 if (!SE->isSCEVable(V->getType()))
578 return false;
579 const SCEV *S = SE->getSCEV(V);
580
581 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
582 // lane 0 matches the expressions for all other lanes.
583 unsigned FixedVF = VF.getKnownMinValue();
584 const SCEV *FirstLaneExpr =
585 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
586 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
587 return false;
588
589 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
590 // lane 0. We check lanes in reverse order for compile-time, as frequently
591 // checking the last lane is sufficient to rule out uniformity.
592 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
593 const SCEV *IthLaneExpr =
594 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
595 return FirstLaneExpr == IthLaneExpr;
596 });
597}
598
600 ElementCount VF) const {
602 if (!Ptr)
603 return false;
604 // Note: There's nothing inherent which prevents predicated loads and
605 // stores from being uniform. The current lowering simply doesn't handle
606 // it; in particular, the cost model distinguishes scatter/gather from
607 // scalar w/predication, and we currently rely on the scalar path.
608 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
609}
610
611bool LoopVectorizationLegality::canVectorizeOuterLoop() {
612 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
613 // Store the result and return it at the end instead of exiting early, in case
614 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
615 bool Result = true;
616 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
617
618 for (BasicBlock *BB : TheLoop->blocks()) {
619 // Check whether the BB terminator is a BranchInst. Any other terminator is
620 // not supported yet.
621 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
622 if (!Br) {
623 reportVectorizationFailure("Unsupported basic block terminator",
624 "loop control flow is not understood by vectorizer",
625 "CFGNotUnderstood", ORE, TheLoop);
626 if (DoExtraAnalysis)
627 Result = false;
628 else
629 return false;
630 }
631
632 // Check whether the BranchInst is a supported one. Only unconditional
633 // branches, conditional branches with an outer loop invariant condition or
634 // backedges are supported.
635 // FIXME: We skip these checks when VPlan predication is enabled as we
636 // want to allow divergent branches. This whole check will be removed
637 // once VPlan predication is on by default.
638 if (Br && Br->isConditional() &&
639 !TheLoop->isLoopInvariant(Br->getCondition()) &&
640 !LI->isLoopHeader(Br->getSuccessor(0)) &&
641 !LI->isLoopHeader(Br->getSuccessor(1))) {
642 reportVectorizationFailure("Unsupported conditional branch",
643 "loop control flow is not understood by vectorizer",
644 "CFGNotUnderstood", ORE, TheLoop);
645 if (DoExtraAnalysis)
646 Result = false;
647 else
648 return false;
649 }
650 }
651
652 // Check whether inner loops are uniform. At this point, we only support
653 // simple outer loops scenarios with uniform nested loops.
654 if (!isUniformLoopNest(TheLoop /*loop nest*/,
655 TheLoop /*context outer loop*/)) {
656 reportVectorizationFailure("Outer loop contains divergent loops",
657 "loop control flow is not understood by vectorizer",
658 "CFGNotUnderstood", ORE, TheLoop);
659 if (DoExtraAnalysis)
660 Result = false;
661 else
662 return false;
663 }
664
665 // Check whether we are able to set up outer loop induction.
666 if (!setupOuterLoopInductions()) {
667 reportVectorizationFailure("Unsupported outer loop Phi(s)",
668 "UnsupportedPhi", ORE, TheLoop);
669 if (DoExtraAnalysis)
670 Result = false;
671 else
672 return false;
673 }
674
675 return Result;
676}
677
678void LoopVectorizationLegality::addInductionPhi(
679 PHINode *Phi, const InductionDescriptor &ID,
680 SmallPtrSetImpl<Value *> &AllowedExit) {
681 Inductions[Phi] = ID;
682
683 // In case this induction also comes with casts that we know we can ignore
684 // in the vectorized loop body, record them here. All casts could be recorded
685 // here for ignoring, but suffices to record only the first (as it is the
686 // only one that may bw used outside the cast sequence).
687 ArrayRef<Instruction *> Casts = ID.getCastInsts();
688 if (!Casts.empty())
689 InductionCastsToIgnore.insert(*Casts.begin());
690
691 Type *PhiTy = Phi->getType();
692 const DataLayout &DL = Phi->getDataLayout();
693
694 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
695 "Expected int, ptr, or FP induction phi type");
696
697 // Get the widest type.
698 if (PhiTy->isIntOrPtrTy()) {
699 if (!WidestIndTy)
700 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
701 else
702 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
703 }
704
705 // Int inductions are special because we only allow one IV.
706 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
707 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
708 isa<Constant>(ID.getStartValue()) &&
709 cast<Constant>(ID.getStartValue())->isNullValue()) {
710
711 // Use the phi node with the widest type as induction. Use the last
712 // one if there are multiple (no good reason for doing this other
713 // than it is expedient). We've checked that it begins at zero and
714 // steps by one, so this is a canonical induction variable.
715 if (!PrimaryInduction || PhiTy == WidestIndTy)
716 PrimaryInduction = Phi;
717 }
718
719 // Both the PHI node itself, and the "post-increment" value feeding
720 // back into the PHI node may have external users.
721 // We can allow those uses, except if the SCEVs we have for them rely
722 // on predicates that only hold within the loop, since allowing the exit
723 // currently means re-using this SCEV outside the loop (see PR33706 for more
724 // details).
725 if (PSE.getPredicate().isAlwaysTrue()) {
726 AllowedExit.insert(Phi);
727 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
728 }
729
730 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
731}
732
733bool LoopVectorizationLegality::setupOuterLoopInductions() {
734 BasicBlock *Header = TheLoop->getHeader();
735
736 // Returns true if a given Phi is a supported induction.
737 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
738 InductionDescriptor ID;
739 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
741 addInductionPhi(&Phi, ID, AllowedExit);
742 return true;
743 }
744 // Bail out for any Phi in the outer loop header that is not a supported
745 // induction.
747 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
748 return false;
749 };
750
751 return llvm::all_of(Header->phis(), IsSupportedPhi);
752}
753
754/// Checks if a function is scalarizable according to the TLI, in
755/// the sense that it should be vectorized and then expanded in
756/// multiple scalar calls. This is represented in the
757/// TLI via mappings that do not specify a vector name, as in the
758/// following example:
759///
760/// const VecDesc VecIntrinsics[] = {
761/// {"llvm.phx.abs.i32", "", 4}
762/// };
763static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
764 const StringRef ScalarName = CI.getCalledFunction()->getName();
765 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
766 // Check that all known VFs are not associated to a vector
767 // function, i.e. the vector name is emty.
768 if (Scalarize) {
769 ElementCount WidestFixedVF, WidestScalableVF;
770 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
772 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
773 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
775 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
776 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
777 assert((WidestScalableVF.isZero() || !Scalarize) &&
778 "Caller may decide to scalarize a variant using a scalable VF");
779 }
780 return Scalarize;
781}
782
783/// Returns true if the call return type `Ty` can be widened by the loop
784/// vectorizer.
785static bool canWidenCallReturnType(Type *Ty) {
786 auto *StructTy = dyn_cast<StructType>(Ty);
787 // TODO: Remove the homogeneous types restriction. This is just an initial
788 // simplification. When we want to support things like the overflow intrinsics
789 // we will have to lift this restriction.
790 if (StructTy && !StructTy->containsHomogeneousTypes())
791 return false;
792 return canVectorizeTy(StructTy);
793}
794
795bool LoopVectorizationLegality::canVectorizeInstrs() {
796 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
797 bool Result = true;
798
799 // For each block in the loop.
800 for (BasicBlock *BB : TheLoop->blocks()) {
801 // Scan the instructions in the block and look for hazards.
802 for (Instruction &I : *BB) {
803 Result &= canVectorizeInstr(I);
804 if (!DoExtraAnalysis && !Result)
805 return false;
806 }
807 }
808
809 if (!PrimaryInduction) {
810 if (Inductions.empty()) {
812 "Did not find one integer induction var",
813 "loop induction variable could not be identified",
814 "NoInductionVariable", ORE, TheLoop);
815 return false;
816 }
817 if (!WidestIndTy) {
819 "Did not find one integer induction var",
820 "integer loop induction variable could not be identified",
821 "NoIntegerInductionVariable", ORE, TheLoop);
822 return false;
823 }
824 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
825 }
826
827 // Now we know the widest induction type, check if our found induction
828 // is the same size. If it's not, unset it here and InnerLoopVectorizer
829 // will create another.
830 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
831 PrimaryInduction = nullptr;
832
833 return Result;
834}
835
836bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
837 BasicBlock *BB = I.getParent();
838 BasicBlock *Header = TheLoop->getHeader();
839
840 if (auto *Phi = dyn_cast<PHINode>(&I)) {
841 Type *PhiTy = Phi->getType();
842 // Check that this PHI type is allowed.
843 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
844 !PhiTy->isPointerTy()) {
846 "Found a non-int non-pointer PHI",
847 "loop control flow is not understood by vectorizer",
848 "CFGNotUnderstood", ORE, TheLoop);
849 return false;
850 }
851
852 // If this PHINode is not in the header block, then we know that we
853 // can convert it to select during if-conversion. No need to check if
854 // the PHIs in this block are induction or reduction variables.
855 if (BB != Header) {
856 // Non-header phi nodes that have outside uses can be vectorized. Add
857 // them to the list of allowed exits.
858 // Unsafe cyclic dependencies with header phis are identified during
859 // legalization for reduction, induction and fixed order
860 // recurrences.
861 AllowedExit.insert(&I);
862 return true;
863 }
864
865 // We only allow if-converted PHIs with exactly two incoming values.
866 if (Phi->getNumIncomingValues() != 2) {
868 "Found an invalid PHI",
869 "loop control flow is not understood by vectorizer",
870 "CFGNotUnderstood", ORE, TheLoop, Phi);
871 return false;
872 }
873
874 RecurrenceDescriptor RedDes;
875 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
876 PSE.getSE())) {
877 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
878 AllowedExit.insert(RedDes.getLoopExitInstr());
879 Reductions[Phi] = RedDes;
882 RedDes.getRecurrenceKind())) &&
883 "Only min/max recurrences are allowed to have multiple uses "
884 "currently");
885 return true;
886 }
887
888 // We prevent matching non-constant strided pointer IVS to preserve
889 // historical vectorizer behavior after a generalization of the
890 // IVDescriptor code. The intent is to remove this check, but we
891 // have to fix issues around code quality for such loops first.
892 auto IsDisallowedStridedPointerInduction =
893 [](const InductionDescriptor &ID) {
895 return false;
896 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
897 ID.getConstIntStepValue() == nullptr;
898 };
899
900 // TODO: Instead of recording the AllowedExit, it would be good to
901 // record the complementary set: NotAllowedExit. These include (but may
902 // not be limited to):
903 // 1. Reduction phis as they represent the one-before-last value, which
904 // is not available when vectorized
905 // 2. Induction phis and increment when SCEV predicates cannot be used
906 // outside the loop - see addInductionPhi
907 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
908 // outside the loop - see call to hasOutsideLoopUser in the non-phi
909 // handling below
910 // 4. FixedOrderRecurrence phis that can possibly be handled by
911 // extraction.
912 // By recording these, we can then reason about ways to vectorize each
913 // of these NotAllowedExit.
914 InductionDescriptor ID;
915 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
916 !IsDisallowedStridedPointerInduction(ID)) {
917 addInductionPhi(Phi, ID, AllowedExit);
918 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
919 return true;
920 }
921
922 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
923 AllowedExit.insert(Phi);
924 FixedOrderRecurrences.insert(Phi);
925 return true;
926 }
927
928 // As a last resort, coerce the PHI to a AddRec expression
929 // and re-try classifying it a an induction PHI.
930 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
931 !IsDisallowedStridedPointerInduction(ID)) {
932 addInductionPhi(Phi, ID, AllowedExit);
933 return true;
934 }
935
936 reportVectorizationFailure("Found an unidentified PHI",
937 "value that could not be identified as "
938 "reduction is used outside the loop",
939 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
940 Phi);
941 return false;
942 } // end of PHI handling
943
944 // We handle calls that:
945 // * Have a mapping to an IR intrinsic.
946 // * Have a vector version available.
947 auto *CI = dyn_cast<CallInst>(&I);
948
949 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
950 !(CI->getCalledFunction() && TLI &&
951 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
952 // If the call is a recognized math libary call, it is likely that
953 // we can vectorize it given loosened floating-point constraints.
954 LibFunc Func;
955 bool IsMathLibCall =
956 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
957 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
958 TLI->hasOptimizedCodeGen(Func);
959
960 if (IsMathLibCall) {
961 // TODO: Ideally, we should not use clang-specific language here,
962 // but it's hard to provide meaningful yet generic advice.
963 // Also, should this be guarded by allowExtraAnalysis() and/or be part
964 // of the returned info from isFunctionVectorizable()?
966 "Found a non-intrinsic callsite",
967 "library call cannot be vectorized. "
968 "Try compiling with -fno-math-errno, -ffast-math, "
969 "or similar flags",
970 "CantVectorizeLibcall", ORE, TheLoop, CI);
971 } else {
972 reportVectorizationFailure("Found a non-intrinsic callsite",
973 "call instruction cannot be vectorized",
974 "CantVectorizeLibcall", ORE, TheLoop, CI);
975 }
976 return false;
977 }
978
979 // Some intrinsics have scalar arguments and should be same in order for
980 // them to be vectorized (i.e. loop invariant).
981 if (CI) {
982 auto *SE = PSE.getSE();
983 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
984 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
985 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
986 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
988 "Found unvectorizable intrinsic",
989 "intrinsic instruction cannot be vectorized",
990 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
991 return false;
992 }
993 }
994 }
995
996 // If we found a vectorized variant of a function, note that so LV can
997 // make better decisions about maximum VF.
998 if (CI && !VFDatabase::getMappings(*CI).empty())
999 VecCallVariantsFound = true;
1000
1001 auto CanWidenInstructionTy = [](Instruction const &Inst) {
1002 Type *InstTy = Inst.getType();
1003 if (!isa<StructType>(InstTy))
1004 return canVectorizeTy(InstTy);
1005
1006 // For now, we only recognize struct values returned from calls where
1007 // all users are extractvalue as vectorizable. All element types of the
1008 // struct must be types that can be widened.
1009 return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
1010 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1011 };
1012
1013 // Check that the instruction return type is vectorizable.
1014 // We can't vectorize casts from vector type to scalar type.
1015 // Also, we can't vectorize extractelement instructions.
1016 if (!CanWidenInstructionTy(I) ||
1017 (isa<CastInst>(I) &&
1018 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1020 reportVectorizationFailure("Found unvectorizable type",
1021 "instruction return type cannot be vectorized",
1022 "CantVectorizeInstructionReturnType", ORE,
1023 TheLoop, &I);
1024 return false;
1025 }
1026
1027 // Check that the stored type is vectorizable.
1028 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1029 Type *T = ST->getValueOperand()->getType();
1031 reportVectorizationFailure("Store instruction cannot be vectorized",
1032 "CantVectorizeStore", ORE, TheLoop, ST);
1033 return false;
1034 }
1035
1036 // For nontemporal stores, check that a nontemporal vector version is
1037 // supported on the target.
1038 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1039 // Arbitrarily try a vector of 2 elements.
1040 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1041 assert(VecTy && "did not find vectorized version of stored type");
1042 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1044 "nontemporal store instruction cannot be vectorized",
1045 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1046 return false;
1047 }
1048 }
1049
1050 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1051 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1052 // For nontemporal loads, check that a nontemporal vector version is
1053 // supported on the target (arbitrarily try a vector of 2 elements).
1054 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1055 assert(VecTy && "did not find vectorized version of load type");
1056 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1058 "nontemporal load instruction cannot be vectorized",
1059 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1060 return false;
1061 }
1062 }
1063
1064 // FP instructions can allow unsafe algebra, thus vectorizable by
1065 // non-IEEE-754 compliant SIMD units.
1066 // This applies to floating-point math operations and calls, not memory
1067 // operations, shuffles, or casts, as they don't change precision or
1068 // semantics.
1069 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1070 !I.isFast()) {
1071 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1072 Hints->setPotentiallyUnsafe();
1073 }
1074
1075 // Reduction instructions are allowed to have exit users.
1076 // All other instructions must not have external users.
1077 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1078 // We can safely vectorize loops where instructions within the loop are
1079 // used outside the loop only if the SCEV predicates within the loop is
1080 // same as outside the loop. Allowing the exit means reusing the SCEV
1081 // outside the loop.
1082 if (PSE.getPredicate().isAlwaysTrue()) {
1083 AllowedExit.insert(&I);
1084 return true;
1085 }
1086 reportVectorizationFailure("Value cannot be used outside the loop",
1087 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1088 return false;
1089 }
1090
1091 return true;
1092}
1093
1094/// Find histogram operations that match high-level code in loops:
1095/// \code
1096/// buckets[indices[i]]+=step;
1097/// \endcode
1098///
1099/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1100/// array the computed histogram. It uses a BinOp to sum all counts, storing
1101/// them using a loop-variant index Load from the 'indices' input array.
1102///
1103/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1104/// regardless of hardware support. When there is support, it additionally
1105/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1106/// used to update histogram in \p HistogramPtrs.
1107static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1108 const PredicatedScalarEvolution &PSE,
1109 SmallVectorImpl<HistogramInfo> &Histograms) {
1110
1111 // Store value must come from a Binary Operation.
1112 Instruction *HPtrInstr = nullptr;
1113 BinaryOperator *HBinOp = nullptr;
1114 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1115 return false;
1116
1117 // BinOp must be an Add or a Sub modifying the bucket value by a
1118 // loop invariant amount.
1119 // FIXME: We assume the loop invariant term is on the RHS.
1120 // Fine for an immediate/constant, but maybe not a generic value?
1121 Value *HIncVal = nullptr;
1122 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1123 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1124 return false;
1125
1126 // Make sure the increment value is loop invariant.
1127 if (!TheLoop->isLoopInvariant(HIncVal))
1128 return false;
1129
1130 // The address to store is calculated through a GEP Instruction.
1132 if (!GEP)
1133 return false;
1134
1135 // Restrict address calculation to constant indices except for the last term.
1136 Value *HIdx = nullptr;
1137 for (Value *Index : GEP->indices()) {
1138 if (HIdx)
1139 return false;
1140 if (!isa<ConstantInt>(Index))
1141 HIdx = Index;
1142 }
1143
1144 if (!HIdx)
1145 return false;
1146
1147 // Check that the index is calculated by loading from another array. Ignore
1148 // any extensions.
1149 // FIXME: Support indices from other sources than a linear load from memory?
1150 // We're currently trying to match an operation looping over an array
1151 // of indices, but there could be additional levels of indirection
1152 // in place, or possibly some additional calculation to form the index
1153 // from the loaded data.
1154 Value *VPtrVal;
1155 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1156 return false;
1157
1158 // Make sure the index address varies in this loop, not an outer loop.
1159 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1160 if (!AR || AR->getLoop() != TheLoop)
1161 return false;
1162
1163 // Ensure we'll have the same mask by checking that all parts of the histogram
1164 // (gather load, update, scatter store) are in the same block.
1165 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1166 BasicBlock *LdBB = IndexedLoad->getParent();
1167 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1168 return false;
1169
1170 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1171
1172 // Store the operations that make up the histogram.
1173 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1174 return true;
1175}
1176
1177bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1178 // For now, we only support an IndirectUnsafe dependency that calculates
1179 // a histogram
1181 return false;
1182
1183 // Find a single IndirectUnsafe dependency.
1184 const MemoryDepChecker::Dependence *IUDep = nullptr;
1185 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1186 const auto *Deps = DepChecker.getDependences();
1187 // If there were too many dependences, LAA abandons recording them. We can't
1188 // proceed safely if we don't know what the dependences are.
1189 if (!Deps)
1190 return false;
1191
1192 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1193 // Ignore dependencies that are either known to be safe or can be
1194 // checked at runtime.
1197 continue;
1198
1199 // We're only interested in IndirectUnsafe dependencies here, where the
1200 // address might come from a load from memory. We also only want to handle
1201 // one such dependency, at least for now.
1202 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1203 return false;
1204
1205 IUDep = &Dep;
1206 }
1207 if (!IUDep)
1208 return false;
1209
1210 // For now only normal loads and stores are supported.
1211 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1212 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1213
1214 if (!LI || !SI)
1215 return false;
1216
1217 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1218 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1219}
1220
1221bool LoopVectorizationLegality::canVectorizeMemory() {
1222 LAI = &LAIs.getInfo(*TheLoop);
1223 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1224 if (LAR) {
1225 ORE->emit([&]() {
1226 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1227 "loop not vectorized: ", *LAR);
1228 });
1229 }
1230
1231 if (!LAI->canVectorizeMemory()) {
1234 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1235 "side effects",
1236 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1237 TheLoop);
1238 return false;
1239 }
1240
1241 return canVectorizeIndirectUnsafeDependences();
1242 }
1243
1244 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1245 reportVectorizationFailure("We don't allow storing to uniform addresses",
1246 "write to a loop invariant address could not "
1247 "be vectorized",
1248 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1249 TheLoop);
1250 return false;
1251 }
1252
1253 // We can vectorize stores to invariant address when final reduction value is
1254 // guaranteed to be stored at the end of the loop. Also, if decision to
1255 // vectorize loop is made, runtime checks are added so as to make sure that
1256 // invariant address won't alias with any other objects.
1257 if (!LAI->getStoresToInvariantAddresses().empty()) {
1258 // For each invariant address, check if last stored value is unconditional
1259 // and the address is not calculated inside the loop.
1260 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1262 continue;
1263
1264 if (blockNeedsPredication(SI->getParent())) {
1266 "We don't allow storing to uniform addresses",
1267 "write of conditional recurring variant value to a loop "
1268 "invariant address could not be vectorized",
1269 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1270 return false;
1271 }
1272
1273 // Invariant address should be defined outside of loop. LICM pass usually
1274 // makes sure it happens, but in rare cases it does not, we do not want
1275 // to overcomplicate vectorization to support this case.
1276 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1277 if (TheLoop->contains(Ptr)) {
1279 "Invariant address is calculated inside the loop",
1280 "write to a loop invariant address could not "
1281 "be vectorized",
1282 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1283 return false;
1284 }
1285 }
1286 }
1287
1288 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1289 // For each invariant address, check its last stored value is the result
1290 // of one of our reductions.
1291 //
1292 // We do not check if dependence with loads exists because that is already
1293 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1294 ScalarEvolution *SE = PSE.getSE();
1295 SmallVector<StoreInst *, 4> UnhandledStores;
1296 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1298 // Earlier stores to this address are effectively deadcode.
1299 // With opaque pointers it is possible for one pointer to be used with
1300 // different sizes of stored values:
1301 // store i32 0, ptr %x
1302 // store i8 0, ptr %x
1303 // The latest store doesn't complitely overwrite the first one in the
1304 // example. That is why we have to make sure that types of stored
1305 // values are same.
1306 // TODO: Check that bitwidth of unhandled store is smaller then the
1307 // one that overwrites it and add a test.
1308 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1309 return storeToSameAddress(SE, SI, I) &&
1310 I->getValueOperand()->getType() ==
1311 SI->getValueOperand()->getType();
1312 });
1313 continue;
1314 }
1315 UnhandledStores.push_back(SI);
1316 }
1317
1318 bool IsOK = UnhandledStores.empty();
1319 // TODO: we should also validate against InvariantMemSets.
1320 if (!IsOK) {
1322 "We don't allow storing to uniform addresses",
1323 "write to a loop invariant address could not "
1324 "be vectorized",
1325 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1326 return false;
1327 }
1328 }
1329 }
1330
1331 PSE.addPredicate(LAI->getPSE().getPredicate());
1332 return true;
1333}
1334
1336 bool EnableStrictReductions) {
1337
1338 // First check if there is any ExactFP math or if we allow reassociations
1339 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1340 return true;
1341
1342 // If the above is false, we have ExactFPMath & do not allow reordering.
1343 // If the EnableStrictReductions flag is set, first check if we have any
1344 // Exact FP induction vars, which we cannot vectorize.
1345 if (!EnableStrictReductions ||
1346 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1347 InductionDescriptor IndDesc = Induction.second;
1348 return IndDesc.getExactFPMathInst();
1349 }))
1350 return false;
1351
1352 // We can now only vectorize if all reductions with Exact FP math also
1353 // have the isOrdered flag set, which indicates that we can move the
1354 // reduction operations in-loop.
1355 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1356 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1357 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1358 }));
1359}
1360
1362 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1363 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1364 return RdxDesc.IntermediateStore == SI;
1365 });
1366}
1367
1369 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1370 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1371 if (!RdxDesc.IntermediateStore)
1372 return false;
1373
1374 ScalarEvolution *SE = PSE.getSE();
1375 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1376 return V == InvariantAddress ||
1377 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1378 });
1379}
1380
1382 Value *In0 = const_cast<Value *>(V);
1384 if (!PN)
1385 return false;
1386
1387 return Inductions.count(PN);
1388}
1389
1390const InductionDescriptor *
1392 if (!isInductionPhi(Phi))
1393 return nullptr;
1394 auto &ID = getInductionVars().find(Phi)->second;
1395 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1397 return &ID;
1398 return nullptr;
1399}
1400
1401const InductionDescriptor *
1403 if (!isInductionPhi(Phi))
1404 return nullptr;
1405 auto &ID = getInductionVars().find(Phi)->second;
1407 return &ID;
1408 return nullptr;
1409}
1410
1412 const Value *V) const {
1413 auto *Inst = dyn_cast<Instruction>(V);
1414 return (Inst && InductionCastsToIgnore.count(Inst));
1415}
1416
1420
1422 const PHINode *Phi) const {
1423 return FixedOrderRecurrences.count(Phi);
1424}
1425
1427 // When vectorizing early exits, create predicates for the latch block only.
1428 // The early exiting block must be a direct predecessor of the latch at the
1429 // moment.
1430 BasicBlock *Latch = TheLoop->getLoopLatch();
1432 assert(
1434 "Uncountable exiting block must be a direct predecessor of latch");
1435 return BB == Latch;
1436 }
1437 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1438}
1439
1440bool LoopVectorizationLegality::blockCanBePredicated(
1441 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1442 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1443 for (Instruction &I : *BB) {
1444 // We can predicate blocks with calls to assume, as long as we drop them in
1445 // case we flatten the CFG via predication.
1447 MaskedOp.insert(&I);
1448 continue;
1449 }
1450
1451 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1452 // TODO: there might be cases that it should block the vectorization. Let's
1453 // ignore those for now.
1455 continue;
1456
1457 // We can allow masked calls if there's at least one vector variant, even
1458 // if we end up scalarizing due to the cost model calculations.
1459 // TODO: Allow other calls if they have appropriate attributes... readonly
1460 // and argmemonly?
1461 if (CallInst *CI = dyn_cast<CallInst>(&I))
1463 MaskedOp.insert(CI);
1464 continue;
1465 }
1466
1467 // Loads are handled via masking (or speculated if safe to do so.)
1468 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1469 if (!SafePtrs.count(LI->getPointerOperand()))
1470 MaskedOp.insert(LI);
1471 continue;
1472 }
1473
1474 // Predicated store requires some form of masking:
1475 // 1) masked store HW instruction,
1476 // 2) emulation via load-blend-store (only if safe and legal to do so,
1477 // be aware on the race conditions), or
1478 // 3) element-by-element predicate check and scalar store.
1479 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1480 MaskedOp.insert(SI);
1481 continue;
1482 }
1483
1484 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1485 return false;
1486 }
1487
1488 return true;
1489}
1490
1491bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1492 if (!EnableIfConversion) {
1493 reportVectorizationFailure("If-conversion is disabled",
1494 "IfConversionDisabled", ORE, TheLoop);
1495 return false;
1496 }
1497
1498 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1499
1500 // A list of pointers which are known to be dereferenceable within scope of
1501 // the loop body for each iteration of the loop which executes. That is,
1502 // the memory pointed to can be dereferenced (with the access size implied by
1503 // the value's type) unconditionally within the loop header without
1504 // introducing a new fault.
1505 SmallPtrSet<Value *, 8> SafePointers;
1506
1507 // Collect safe addresses.
1508 for (BasicBlock *BB : TheLoop->blocks()) {
1509 if (!blockNeedsPredication(BB)) {
1510 for (Instruction &I : *BB)
1511 if (auto *Ptr = getLoadStorePointerOperand(&I))
1512 SafePointers.insert(Ptr);
1513 continue;
1514 }
1515
1516 // For a block which requires predication, a address may be safe to access
1517 // in the loop w/o predication if we can prove dereferenceability facts
1518 // sufficient to ensure it'll never fault within the loop. For the moment,
1519 // we restrict this to loads; stores are more complicated due to
1520 // concurrency restrictions.
1521 ScalarEvolution &SE = *PSE.getSE();
1523 for (Instruction &I : *BB) {
1524 LoadInst *LI = dyn_cast<LoadInst>(&I);
1525
1526 // Make sure we can execute all computations feeding into Ptr in the loop
1527 // w/o triggering UB and that none of the out-of-loop operands are poison.
1528 // We do not need to check if operations inside the loop can produce
1529 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1530 // because flags will be dropped when executing them unconditionally.
1531 // TODO: Results could be improved by considering poison-propagation
1532 // properties of visited ops.
1533 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1534 SmallVector<Value *> Worklist = {Ptr};
1535 SmallPtrSet<Value *, 4> Visited;
1536 while (!Worklist.empty()) {
1537 Value *CurrV = Worklist.pop_back_val();
1538 if (!Visited.insert(CurrV).second)
1539 continue;
1540
1541 auto *CurrI = dyn_cast<Instruction>(CurrV);
1542 if (!CurrI || !TheLoop->contains(CurrI)) {
1543 // If operands from outside the loop may be poison then Ptr may also
1544 // be poison.
1545 if (!isGuaranteedNotToBePoison(CurrV, AC,
1546 TheLoop->getLoopPredecessor()
1547 ->getTerminator()
1548 ->getIterator(),
1549 DT))
1550 return false;
1551 continue;
1552 }
1553
1554 // A loaded value may be poison, independent of any flags.
1555 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1556 return false;
1557
1558 // For other ops, assume poison can only be introduced via flags,
1559 // which can be dropped.
1560 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1561 return false;
1562 append_range(Worklist, CurrI->operands());
1563 }
1564 return true;
1565 };
1566 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1567 // that it will consider loops that need guarding by SCEV checks. The
1568 // vectoriser will generate these checks if we decide to vectorise.
1569 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1570 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1571 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1572 &Predicates))
1573 SafePointers.insert(LI->getPointerOperand());
1574 Predicates.clear();
1575 }
1576 }
1577
1578 // Collect the blocks that need predication.
1579 for (BasicBlock *BB : TheLoop->blocks()) {
1580 // We support only branches and switch statements as terminators inside the
1581 // loop.
1582 if (isa<SwitchInst>(BB->getTerminator())) {
1583 if (TheLoop->isLoopExiting(BB)) {
1584 reportVectorizationFailure("Loop contains an unsupported switch",
1585 "LoopContainsUnsupportedSwitch", ORE,
1586 TheLoop, BB->getTerminator());
1587 return false;
1588 }
1589 } else if (!isa<BranchInst>(BB->getTerminator())) {
1590 reportVectorizationFailure("Loop contains an unsupported terminator",
1591 "LoopContainsUnsupportedTerminator", ORE,
1592 TheLoop, BB->getTerminator());
1593 return false;
1594 }
1595
1596 // We must be able to predicate all blocks that need to be predicated.
1597 if (blockNeedsPredication(BB) &&
1598 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1600 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1601 ORE, TheLoop, BB->getTerminator());
1602 return false;
1603 }
1604 }
1605
1606 // We can if-convert this loop.
1607 return true;
1608}
1609
1610// Helper function to canVectorizeLoopNestCFG.
1611bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1612 bool UseVPlanNativePath) {
1613 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1614 "VPlan-native path is not enabled.");
1615
1616 // TODO: ORE should be improved to show more accurate information when an
1617 // outer loop can't be vectorized because a nested loop is not understood or
1618 // legal. Something like: "outer_loop_location: loop not vectorized:
1619 // (inner_loop_location) loop control flow is not understood by vectorizer".
1620
1621 // Store the result and return it at the end instead of exiting early, in case
1622 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1623 bool Result = true;
1624 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1625
1626 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1627 // be canonicalized.
1628 if (!Lp->getLoopPreheader()) {
1629 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1630 "loop control flow is not understood by vectorizer",
1631 "CFGNotUnderstood", ORE, TheLoop);
1632 if (DoExtraAnalysis)
1633 Result = false;
1634 else
1635 return false;
1636 }
1637
1638 // We must have a single backedge.
1639 if (Lp->getNumBackEdges() != 1) {
1640 reportVectorizationFailure("The loop must have a single backedge",
1641 "loop control flow is not understood by vectorizer",
1642 "CFGNotUnderstood", ORE, TheLoop);
1643 if (DoExtraAnalysis)
1644 Result = false;
1645 else
1646 return false;
1647 }
1648
1649 // The latch must be terminated by a BranchInst.
1650 BasicBlock *Latch = Lp->getLoopLatch();
1651 if (Latch && !isa<BranchInst>(Latch->getTerminator())) {
1653 "The loop latch terminator is not a BranchInst",
1654 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1655 ORE, TheLoop);
1656 if (DoExtraAnalysis)
1657 Result = false;
1658 else
1659 return false;
1660 }
1661
1662 return Result;
1663}
1664
1665bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1666 Loop *Lp, bool UseVPlanNativePath) {
1667 // Store the result and return it at the end instead of exiting early, in case
1668 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1669 bool Result = true;
1670 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1671 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1672 if (DoExtraAnalysis)
1673 Result = false;
1674 else
1675 return false;
1676 }
1677
1678 // Recursively check whether the loop control flow of nested loops is
1679 // understood.
1680 for (Loop *SubLp : *Lp)
1681 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1682 if (DoExtraAnalysis)
1683 Result = false;
1684 else
1685 return false;
1686 }
1687
1688 return Result;
1689}
1690
1691bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1692 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1693 if (!LatchBB) {
1694 reportVectorizationFailure("Loop does not have a latch",
1695 "Cannot vectorize early exit loop",
1696 "NoLatchEarlyExit", ORE, TheLoop);
1697 return false;
1698 }
1699
1700 if (Reductions.size() || FixedOrderRecurrences.size()) {
1702 "Found reductions or recurrences in early-exit loop",
1703 "Cannot vectorize early exit loop with reductions or recurrences",
1704 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1705 return false;
1706 }
1707
1708 SmallVector<BasicBlock *, 8> ExitingBlocks;
1709 TheLoop->getExitingBlocks(ExitingBlocks);
1710
1711 // Keep a record of all the exiting blocks.
1713 BasicBlock *SingleUncountableExitingBlock = nullptr;
1714 for (BasicBlock *BB : ExitingBlocks) {
1715 const SCEV *EC =
1716 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1717 if (isa<SCEVCouldNotCompute>(EC)) {
1718 if (size(successors(BB)) != 2) {
1720 "Early exiting block does not have exactly two successors",
1721 "Incorrect number of successors from early exiting block",
1722 "EarlyExitTooManySuccessors", ORE, TheLoop);
1723 return false;
1724 }
1725
1726 if (SingleUncountableExitingBlock) {
1728 "Loop has too many uncountable exits",
1729 "Cannot vectorize early exit loop with more than one early exit",
1730 "TooManyUncountableEarlyExits", ORE, TheLoop);
1731 return false;
1732 }
1733
1734 SingleUncountableExitingBlock = BB;
1735 } else
1736 CountableExitingBlocks.push_back(BB);
1737 }
1738 // We can safely ignore the predicates here because when vectorizing the loop
1739 // the PredicatatedScalarEvolution class will keep track of all predicates
1740 // for each exiting block anyway. This happens when calling
1741 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1742 Predicates.clear();
1743
1744 if (!SingleUncountableExitingBlock) {
1745 LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits");
1746 return false;
1747 }
1748
1749 // The only supported early exit loops so far are ones where the early
1750 // exiting block is a unique predecessor of the latch block.
1751 BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1752 if (LatchPredBB != SingleUncountableExitingBlock) {
1753 reportVectorizationFailure("Early exit is not the latch predecessor",
1754 "Cannot vectorize early exit loop",
1755 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1756 return false;
1757 }
1758
1759 // The latch block must have a countable exit.
1761 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1763 "Cannot determine exact exit count for latch block",
1764 "Cannot vectorize early exit loop",
1765 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1766 return false;
1767 }
1768 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1769 "Latch block not found in list of countable exits!");
1770
1771 // Check to see if there are instructions that could potentially generate
1772 // exceptions or have side-effects.
1773 auto IsSafeOperation = [](Instruction *I) -> bool {
1774 switch (I->getOpcode()) {
1775 case Instruction::Load:
1776 case Instruction::Store:
1777 case Instruction::PHI:
1778 case Instruction::Br:
1779 // These are checked separately.
1780 return true;
1781 default:
1783 }
1784 };
1785
1786 bool HasSideEffects = false;
1787 for (auto *BB : TheLoop->blocks())
1788 for (auto &I : *BB) {
1789 if (I.mayWriteToMemory()) {
1790 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1791 HasSideEffects = true;
1792 continue;
1793 }
1794
1795 // We don't support complex writes to memory.
1797 "Complex writes to memory unsupported in early exit loops",
1798 "Cannot vectorize early exit loop with complex writes to memory",
1799 "WritesInEarlyExitLoop", ORE, TheLoop);
1800 return false;
1801 }
1802
1803 if (!IsSafeOperation(&I)) {
1804 reportVectorizationFailure("Early exit loop contains operations that "
1805 "cannot be speculatively executed",
1806 "UnsafeOperationsEarlyExitLoop", ORE,
1807 TheLoop);
1808 return false;
1809 }
1810 }
1811
1812 // The vectoriser cannot handle loads that occur after the early exit block.
1813 assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
1814 "Expected latch predecessor to be the early exiting block");
1815
1816 SmallVector<LoadInst *, 4> NonDerefLoads;
1817 // TODO: Handle loops that may fault.
1818 if (!HasSideEffects) {
1819 // Read-only loop.
1820 Predicates.clear();
1821 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1822 &Predicates)) {
1824 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1825 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1826 return false;
1827 }
1828 } else if (!canUncountableExitConditionLoadBeMoved(
1829 SingleUncountableExitingBlock))
1830 return false;
1831
1832 // Check non-dereferenceable loads if any.
1833 for (LoadInst *LI : NonDerefLoads) {
1834 // Only support unit-stride access for now.
1835 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1836 if (Stride != 1) {
1838 "Loop contains potentially faulting strided load",
1839 "Cannot vectorize early exit loop with "
1840 "strided fault-only-first load",
1841 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1842 return false;
1843 }
1844 PotentiallyFaultingLoads.insert(LI);
1845 LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
1846 << "\n");
1847 }
1848
1849 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1850 PSE.getSymbolicMaxBackedgeTakenCount();
1851 // Since we have an exact exit count for the latch and the early exit
1852 // dominates the latch, then this should guarantee a computed SCEV value.
1853 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1854 "Failed to get symbolic expression for backedge taken count");
1855 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1856 "backedge taken count: "
1857 << *SymbolicMaxBTC << '\n');
1858 UncountableExitingBB = SingleUncountableExitingBlock;
1859 UncountableExitWithSideEffects = HasSideEffects;
1860 return true;
1861}
1862
1863bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1864 BasicBlock *ExitingBlock) {
1865 // Try to find a load in the critical path for the uncountable exit condition.
1866 // This is currently matching about the simplest form we can, expecting
1867 // only one in-loop load, the result of which is directly compared against
1868 // a loop-invariant value.
1869 // FIXME: We're insisting on a single use for now, because otherwise we will
1870 // need to make PHI nodes for other users. That can be done once the initial
1871 // transform code lands.
1872 auto *Br = cast<BranchInst>(ExitingBlock->getTerminator());
1873
1874 using namespace llvm::PatternMatch;
1875 Instruction *L = nullptr;
1876 Value *Ptr = nullptr;
1877 Value *R = nullptr;
1878 if (!match(Br->getCondition(),
1880 m_Value(R))))) {
1882 "Early exit loop with store but no supported condition load",
1883 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1884 return false;
1885 }
1886
1887 // FIXME: Don't rely on operand ordering for the comparison.
1888 if (!TheLoop->isLoopInvariant(R)) {
1890 "Early exit loop with store but no supported condition load",
1891 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1892 return false;
1893 }
1894
1895 // Make sure that the load address is not loop invariant; we want an
1896 // address calculation that we can rotate to the next vector iteration.
1897 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1898 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1900 "Uncountable exit condition depends on load with an address that is "
1901 "not an add recurrence in the loop",
1902 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1903 return false;
1904 }
1905
1906 // FIXME: Support gathers after first-faulting load support lands.
1908 LoadInst *Load = cast<LoadInst>(L);
1909 if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
1910 &Predicates)) {
1912 "Loop may fault",
1913 "Cannot vectorize potentially faulting early exit loop",
1914 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1915 return false;
1916 }
1917
1918 ICFLoopSafetyInfo SafetyInfo;
1919 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1920 // We need to know that load will be executed before we can hoist a
1921 // copy out to run just before the first iteration.
1922 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1924 "Load for uncountable exit not guaranteed to execute",
1925 "ConditionalUncountableExitLoad", ORE, TheLoop);
1926 return false;
1927 }
1928
1929 // Prohibit any potential aliasing with any instruction in the loop which
1930 // might store to memory.
1931 // FIXME: Relax this constraint where possible.
1932 for (auto *BB : TheLoop->blocks()) {
1933 for (auto &I : *BB) {
1934 if (&I == Load)
1935 continue;
1936
1937 if (I.mayWriteToMemory()) {
1938 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1939 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1940 if (AR == AliasResult::NoAlias)
1941 continue;
1942 }
1943
1945 "Cannot determine whether critical uncountable exit load address "
1946 "does not alias with a memory write",
1947 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1948 return false;
1949 }
1950 }
1951 }
1952
1953 return true;
1954}
1955
1956bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1957 // Store the result and return it at the end instead of exiting early, in case
1958 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1959 bool Result = true;
1960
1961 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1962 // Check whether the loop-related control flow in the loop nest is expected by
1963 // vectorizer.
1964 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1965 if (DoExtraAnalysis) {
1966 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1967 Result = false;
1968 } else {
1969 return false;
1970 }
1971 }
1972
1973 // We need to have a loop header.
1974 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1975 << '\n');
1976
1977 // Specific checks for outer loops. We skip the remaining legal checks at this
1978 // point because they don't support outer loops.
1979 if (!TheLoop->isInnermost()) {
1980 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1981
1982 if (!canVectorizeOuterLoop()) {
1983 reportVectorizationFailure("Unsupported outer loop",
1984 "UnsupportedOuterLoop", ORE, TheLoop);
1985 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1986 // outer loops.
1987 return false;
1988 }
1989
1990 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1991 return Result;
1992 }
1993
1994 assert(TheLoop->isInnermost() && "Inner loop expected.");
1995 // Check if we can if-convert non-single-bb loops.
1996 unsigned NumBlocks = TheLoop->getNumBlocks();
1997 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1998 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1999 if (DoExtraAnalysis)
2000 Result = false;
2001 else
2002 return false;
2003 }
2004
2005 // Check if we can vectorize the instructions and CFG in this loop.
2006 if (!canVectorizeInstrs()) {
2007 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
2008 if (DoExtraAnalysis)
2009 Result = false;
2010 else
2011 return false;
2012 }
2013
2014 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
2015 if (TheLoop->getExitingBlock()) {
2016 reportVectorizationFailure("Cannot vectorize uncountable loop",
2017 "UnsupportedUncountableLoop", ORE, TheLoop);
2018 if (DoExtraAnalysis)
2019 Result = false;
2020 else
2021 return false;
2022 } else {
2023 if (!isVectorizableEarlyExitLoop()) {
2026 "Must be false without vectorizable early-exit loop");
2027 if (DoExtraAnalysis)
2028 Result = false;
2029 else
2030 return false;
2031 }
2032 }
2033 }
2034
2035 // Go over each instruction and look at memory deps.
2036 if (!canVectorizeMemory()) {
2037 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
2038 if (DoExtraAnalysis)
2039 Result = false;
2040 else
2041 return false;
2042 }
2043
2044 // Bail out for state-changing loops with uncountable exits for now.
2045 if (UncountableExitWithSideEffects) {
2047 "Writes to memory unsupported in early exit loops",
2048 "Cannot vectorize early exit loop with writes to memory",
2049 "WritesInEarlyExitLoop", ORE, TheLoop);
2050 return false;
2051 }
2052
2053 if (Result) {
2054 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2055 << (LAI->getRuntimePointerChecking()->Need
2056 ? " (with a runtime bound check)"
2057 : "")
2058 << "!\n");
2059 }
2060
2061 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
2062 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
2063 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
2064
2065 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2066 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2067 "due to SCEVThreshold");
2068 reportVectorizationFailure("Too many SCEV checks needed",
2069 "Too many SCEV assumptions need to be made and checked at runtime",
2070 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2071 if (DoExtraAnalysis)
2072 Result = false;
2073 else
2074 return false;
2075 }
2076
2077 // Okay! We've done all the tests. If any have failed, return false. Otherwise
2078 // we can vectorize, and at this point we don't have any other mem analysis
2079 // which may limit our maximum vectorization factor, so just return true with
2080 // no restrictions.
2081 return Result;
2082}
2083
2085 // The only loops we can vectorize without a scalar epilogue, are loops with
2086 // a bottom-test and a single exiting block. We'd have to handle the fact
2087 // that not every instruction executes on the last iteration. This will
2088 // require a lane mask which varies through the vector loop body. (TODO)
2089 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2090 LLVM_DEBUG(
2091 dbgs()
2092 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2093 return false;
2094 }
2095
2096 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2097
2098 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
2099
2100 for (const auto &Reduction : getReductionVars())
2101 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
2102
2103 for (const auto &Entry : getInductionVars()) {
2104 PHINode *OrigPhi = Entry.first;
2105 for (User *U : OrigPhi->users()) {
2106 auto *UI = cast<Instruction>(U);
2107 if (!TheLoop->contains(UI)) {
2108 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
2109 "outside user for "
2110 << *UI << "\n");
2111 return false;
2112 }
2113 }
2114 }
2115
2116 // The list of pointers that we can safely read and write to remains empty.
2117 SmallPtrSet<Value *, 8> SafePointers;
2118
2119 // Check all blocks for predication, including those that ordinarily do not
2120 // need predication such as the header block.
2122 for (BasicBlock *BB : TheLoop->blocks()) {
2123 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2124 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2125 return false;
2126 }
2127 }
2128
2129 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2130
2131 return true;
2132}
2133
2135 // The list of pointers that we can safely read and write to remains empty.
2136 SmallPtrSet<Value *, 8> SafePointers;
2137
2138 // Mark all blocks for predication, including those that ordinarily do not
2139 // need predication such as the header block.
2140 for (BasicBlock *BB : TheLoop->blocks()) {
2141 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
2142 assert(R && "Must be able to predicate block when tail-folding.");
2143 }
2144}
2145
2146} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has exactly one uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block, if there is exactly one.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:151
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:502
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1440
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
iterator find(const KeyT &Key)
Definition MapVector.h:154
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:74
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:420
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:869
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.