LLVM 23.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
19#include "llvm/Analysis/Loads.h"
28#include "llvm/IR/Dominators.h"
33
34using namespace llvm;
35using namespace PatternMatch;
36
37#define LV_NAME "loop-vectorize"
38#define DEBUG_TYPE LV_NAME
39
40static cl::opt<bool>
41 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
42 cl::desc("Enable if-conversion during vectorization."));
43
44static cl::opt<bool>
45AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
46 cl::desc("Enable recognition of non-constant strided "
47 "pointer induction variables."));
48
49static cl::opt<bool>
50 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
51 cl::desc("Allow enabling loop hints to reorder "
52 "FP operations during vectorization."));
53
54// TODO: Move size-based thresholds out of legality checking, make cost based
55// decisions instead of hard thresholds.
57 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
58 cl::desc("The maximum number of SCEV checks allowed."));
59
61 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
62 cl::desc("The maximum number of SCEV checks allowed with a "
63 "vectorize(enable) pragma"));
64
67 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
69 cl::desc("Control whether the compiler can use scalable vectors to "
70 "vectorize a loop"),
73 "Scalable vectorization is disabled."),
76 "Scalable vectorization is available and favored when the "
77 "cost is inconclusive."),
80 "Scalable vectorization is available and favored when the "
81 "cost is inconclusive."),
84 "Scalable vectorization is available and always favored when "
85 "feasible")));
86
88 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
89 cl::desc("Enables autovectorization of some loops containing histograms"));
90
91/// Maximum vectorization interleave count.
92static const unsigned MaxInterleaveFactor = 16;
93
94namespace llvm {
95
96bool LoopVectorizeHints::Hint::validate(unsigned Val) {
97 switch (Kind) {
98 case HK_WIDTH:
100 case HK_INTERLEAVE:
101 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
102 case HK_FORCE:
103 return (Val <= 1);
104 case HK_ISVECTORIZED:
105 case HK_PREDICATE:
106 case HK_SCALABLE:
107 return (Val == 0 || Val == 1);
108 }
109 return false;
110}
111
113 bool InterleaveOnlyWhenForced,
116 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
117 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
118 Force("vectorize.enable", FK_Undefined, HK_FORCE),
119 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
120 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
121 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
122 TheLoop(L), ORE(ORE) {
123 // Populate values with existing loop metadata.
124 getHintsFromMetadata();
125
126 // force-vector-interleave overrides DisableInterleaving.
129
130 // If the metadata doesn't explicitly specify whether to enable scalable
131 // vectorization, then decide based on the following criteria (increasing
132 // level of priority):
133 // - Target default
134 // - Metadata width
135 // - Force option (always overrides)
137 if (TTI)
138 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
140
141 if (Width.Value)
142 // If the width is set, but the metadata says nothing about the scalable
143 // property, then assume it concerns only a fixed-width UserVF.
144 // If width is not set, the flag takes precedence.
145 Scalable.Value = SK_FixedWidthOnly;
146 }
147
148 // If the flag is set to force any use of scalable vectors, override the loop
149 // hints.
150 if (ForceScalableVectorization.getValue() !=
152 Scalable.Value = ForceScalableVectorization.getValue();
153
154 // Scalable vectorization is disabled if no preference is specified.
156 Scalable.Value = SK_FixedWidthOnly;
157
158 if (IsVectorized.Value != 1)
159 // If the vectorization width and interleaving count are both 1 then
160 // consider the loop to have been already vectorized because there's
161 // nothing more that we can do.
162 IsVectorized.Value =
164 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
165 << "LV: Interleaving disabled by the pass manager\n");
166}
167
169 TheLoop->addIntLoopAttribute("llvm.loop.isvectorized", 1,
170 {Twine(Prefix(), "vectorize.").str(),
171 Twine(Prefix(), "interleave.").str()});
172
173 // Update internal cache.
174 IsVectorized.Value = 1;
175}
176
177void LoopVectorizeHints::reportDisallowedVectorization(
178 const StringRef DebugMsg, const StringRef RemarkName,
179 const StringRef RemarkMsg, const Loop *L) const {
180 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
181 ORE.emit(OptimizationRemarkMissed(LV_NAME, RemarkName, L->getStartLoc(),
182 L->getHeader())
183 << "loop not vectorized: " << RemarkMsg);
184}
185
187 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
189 if (Force.Value == LoopVectorizeHints::FK_Disabled) {
190 reportDisallowedVectorization("#pragma vectorize disable",
191 "MissedExplicitlyDisabled",
192 "vectorization is explicitly disabled", L);
193 } else if (hasDisableAllTransformsHint(L)) {
194 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
195 "MissedTransformsDisabled",
196 "loop transformations are disabled", L);
197 } else {
198 llvm_unreachable("loop vect disabled for an unknown reason");
199 }
200 return false;
201 }
202
203 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
204 reportDisallowedVectorization(
205 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
206 "MissedForceOnly", "only vectorizing loops that explicitly request it",
207 L);
208 return false;
209 }
210
211 if (getIsVectorized() == 1) {
212 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
213 // FIXME: Add interleave.disable metadata. This will allow
214 // vectorize.disable to be used without disabling the pass and errors
215 // to differentiate between disabled vectorization and a width of 1.
216 ORE.emit([&]() {
218 "AllDisabled", L->getStartLoc(),
219 L->getHeader())
220 << "loop not vectorized: vectorization and interleaving are "
221 "explicitly disabled, or the loop has already been "
222 "vectorized";
223 });
224 return false;
225 }
226
227 return true;
228}
229
231 using namespace ore;
232
233 ORE.emit([&]() {
234 if (Force.Value == LoopVectorizeHints::FK_Disabled)
235 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
236 TheLoop->getStartLoc(),
237 TheLoop->getHeader())
238 << "loop not vectorized: vectorization is explicitly disabled";
239
240 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
241 TheLoop->getHeader());
242 R << "loop not vectorized";
243 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
244 R << " (Force=" << NV("Force", true);
245 if (Width.Value != 0)
246 R << ", Vector Width=" << NV("VectorWidth", getWidth());
247 if (getInterleave() != 0)
248 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
249 R << ")";
250 }
251 return R;
252 });
253}
254
264
266 // Allow the vectorizer to change the order of operations if enabling
267 // loop hints are provided
268 ElementCount EC = getWidth();
269 return HintsAllowReordering &&
271 EC.getKnownMinValue() > 1);
272}
273
274void LoopVectorizeHints::getHintsFromMetadata() {
275 MDNode *LoopID = TheLoop->getLoopID();
276 if (!LoopID)
277 return;
278
279 // First operand should refer to the loop id itself.
280 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
281 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
282
283 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
284 const MDString *S = nullptr;
286
287 // The expected hint is either a MDString or a MDNode with the first
288 // operand a MDString.
289 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
290 if (!MD || MD->getNumOperands() == 0)
291 continue;
292 S = dyn_cast<MDString>(MD->getOperand(0));
293 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
294 Args.push_back(MD->getOperand(Idx));
295 } else {
296 S = dyn_cast<MDString>(MDO);
297 assert(Args.size() == 0 && "too many arguments for MDString");
298 }
299
300 if (!S)
301 continue;
302
303 // Check if the hint starts with the loop metadata prefix.
304 StringRef Name = S->getString();
305 if (Args.size() == 1)
306 setHint(Name, Args[0]);
307 }
308}
309
310void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
311 if (!Name.consume_front(Prefix()))
312 return;
313
314 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
315 if (!C)
316 return;
317 unsigned Val = C->getZExtValue();
318
319 Hint *Hints[] = {&Width, &Interleave, &Force,
320 &IsVectorized, &Predicate, &Scalable};
321 for (auto *H : Hints) {
322 if (Name == H->Name) {
323 if (H->validate(Val))
324 H->Value = Val;
325 else
326 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
327 break;
328 }
329 }
330}
331
332// Return true if the inner loop \p Lp is uniform with regard to the outer loop
333// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
334// executing the inner loop will execute the same iterations). This check is
335// very constrained for now but it will be relaxed in the future. \p Lp is
336// considered uniform if it meets all the following conditions:
337// 1) it has a canonical IV (starting from 0 and with stride 1),
338// 2) its latch terminator is a conditional branch and,
339// 3) its latch condition is a compare instruction whose operands are the
340// canonical IV and an OuterLp invariant.
341// This check doesn't take into account the uniformity of other conditions not
342// related to the loop latch because they don't affect the loop uniformity.
343//
344// NOTE: We decided to keep all these checks and its associated documentation
345// together so that we can easily have a picture of the current supported loop
346// nests. However, some of the current checks don't depend on \p OuterLp and
347// would be redundantly executed for each \p Lp if we invoked this function for
348// different candidate outer loops. This is not the case for now because we
349// don't currently have the infrastructure to evaluate multiple candidate outer
350// loops and \p OuterLp will be a fixed parameter while we only support explicit
351// outer loop vectorization. It's also very likely that these checks go away
352// before introducing the aforementioned infrastructure. However, if this is not
353// the case, we should move the \p OuterLp independent checks to a separate
354// function that is only executed once for each \p Lp.
355static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
356 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
357
358 // If Lp is the outer loop, it's uniform by definition.
359 if (Lp == OuterLp)
360 return true;
361 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
362
363 // 1.
365 if (!IV) {
366 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
367 return false;
368 }
369
370 // 2.
371 BasicBlock *Latch = Lp->getLoopLatch();
372 auto *LatchBr = dyn_cast<CondBrInst>(Latch->getTerminator());
373 if (!LatchBr) {
374 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
375 return false;
376 }
377
378 // 3.
379 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
380 if (!LatchCmp) {
382 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
383 return false;
384 }
385
386 Value *CondOp0 = LatchCmp->getOperand(0);
387 Value *CondOp1 = LatchCmp->getOperand(1);
388 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
389 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
390 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
391 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
392 return false;
393 }
394
395 return true;
396}
397
398// Return true if \p Lp and all its nested loops are uniform with regard to \p
399// OuterLp.
400static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
401 if (!isUniformLoop(Lp, OuterLp))
402 return false;
403
404 // Check if nested loops are uniform.
405 for (Loop *SubLp : *Lp)
406 if (!isUniformLoopNest(SubLp, OuterLp))
407 return false;
408
409 return true;
410}
411
413 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
414
415 if (Ty->isPointerTy())
416 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
417
418 // It is possible that char's or short's overflow when we ask for the loop's
419 // trip count, work around this by changing the type size.
420 if (Ty->getScalarSizeInBits() < 32)
421 return Type::getInt32Ty(Ty->getContext());
422
423 return cast<IntegerType>(Ty);
424}
425
427 Type *Ty1) {
430 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
431}
432
433/// Check that the instruction has outside loop users and is not an
434/// identified reduction variable.
435static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
436 SmallPtrSetImpl<Value *> &AllowedExit) {
437 // Reductions, Inductions and non-header phis are allowed to have exit users. All
438 // other instructions must not have external users.
439 if (!AllowedExit.count(Inst))
440 // Check that all of the users of the loop are inside the BB.
441 for (User *U : Inst->users()) {
443 // This user may be a reduction exit value.
444 if (!TheLoop->contains(UI)) {
445 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
446 return true;
447 }
448 }
449 return false;
450}
451
452/// Returns true if A and B have same pointer operands or same SCEVs addresses
454 StoreInst *B) {
455 // Compare store
456 if (A == B)
457 return true;
458
459 // Otherwise Compare pointers
460 Value *APtr = A->getPointerOperand();
461 Value *BPtr = B->getPointerOperand();
462 if (APtr == BPtr)
463 return true;
464
465 // Otherwise compare address SCEVs
466 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
467}
468
470 Value *Ptr) const {
471 // FIXME: Currently, the set of symbolic strides is sometimes queried before
472 // it's collected. This happens from canVectorizeWithIfConvert, when the
473 // pointer is checked to reference consecutive elements suitable for a
474 // masked access.
475 const auto &Strides =
476 LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
477
478 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
479 AllowRuntimeSCEVChecks, false)
480 .value_or(0);
481 if (Stride == 1 || Stride == -1)
482 return Stride;
483 return 0;
484}
485
487 return LAI->isInvariant(V);
488}
489
490namespace {
491/// A rewriter to build the SCEVs for each of the VF lanes in the expected
492/// vectorized loop, which can then be compared to detect their uniformity. This
493/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
494/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
495/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
496/// uniformity.
497class SCEVAddRecForUniformityRewriter
498 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
499 /// Multiplier to be applied to the step of AddRecs in TheLoop.
500 unsigned StepMultiplier;
501
502 /// Offset to be added to the AddRecs in TheLoop.
503 unsigned Offset;
504
505 /// Loop for which to rewrite AddRecsFor.
506 Loop *TheLoop;
507
508 /// Is any sub-expressions not analyzable w.r.t. uniformity?
509 bool CannotAnalyze = false;
510
511 bool canAnalyze() const { return !CannotAnalyze; }
512
513public:
514 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
515 unsigned Offset, Loop *TheLoop)
516 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
517 TheLoop(TheLoop) {}
518
519 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
520 assert(Expr->getLoop() == TheLoop &&
521 "addrec outside of TheLoop must be invariant and should have been "
522 "handled earlier");
523 // Build a new AddRec by multiplying the step by StepMultiplier and
524 // incrementing the start by Offset * step.
525 Type *Ty = Expr->getType();
526 const SCEV *Step = Expr->getStepRecurrence(SE);
527 if (!SE.isLoopInvariant(Step, TheLoop)) {
528 CannotAnalyze = true;
529 return Expr;
530 }
531 const SCEV *NewStep =
532 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
533 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
534 const SCEV *NewStart =
535 SE.getAddExpr(Expr->getStart(), SCEVUse(ScaledOffset));
536 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
537 }
538
539 const SCEV *visit(const SCEV *S) {
540 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
541 return S;
543 }
544
545 const SCEV *visitUnknown(const SCEVUnknown *S) {
546 if (SE.isLoopInvariant(S, TheLoop))
547 return S;
548 // The value could vary across iterations.
549 CannotAnalyze = true;
550 return S;
551 }
552
553 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
554 // Could not analyze the expression.
555 CannotAnalyze = true;
556 return S;
557 }
558
559 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
560 unsigned StepMultiplier, unsigned Offset,
561 Loop *TheLoop) {
562 /// Bail out if the expression does not contain an UDiv expression.
563 /// Uniform values which are not loop invariant require operations to strip
564 /// out the lowest bits. For now just look for UDivs and use it to avoid
565 /// re-writing UDIV-free expressions for other lanes to limit compile time.
566 if (!SCEVExprContains(S,
567 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
568 return SE.getCouldNotCompute();
569
570 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
571 TheLoop);
572 const SCEV *Result = Rewriter.visit(S);
573
574 if (Rewriter.canAnalyze())
575 return Result;
576 return SE.getCouldNotCompute();
577 }
578};
579
580} // namespace
581
583 if (isInvariant(V))
584 return true;
585 if (VF.isScalable())
586 return false;
587 if (VF.isScalar())
588 return true;
589
590 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
591 // never considered uniform.
592 auto *SE = PSE.getSE();
593 if (!SE->isSCEVable(V->getType()))
594 return false;
595 const SCEV *S = SE->getSCEV(V);
596
597 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
598 // lane 0 matches the expressions for all other lanes.
599 unsigned FixedVF = VF.getKnownMinValue();
600 const SCEV *FirstLaneExpr =
601 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
602 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
603 return false;
604
605 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
606 // lane 0. We check lanes in reverse order for compile-time, as frequently
607 // checking the last lane is sufficient to rule out uniformity.
608 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
609 const SCEV *IthLaneExpr =
610 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
611 return FirstLaneExpr == IthLaneExpr;
612 });
613}
614
616 ElementCount VF) const {
618 if (!Ptr)
619 return false;
620 // Note: There's nothing inherent which prevents predicated loads and
621 // stores from being uniform. The current lowering simply doesn't handle
622 // it; in particular, the cost model distinguishes scatter/gather from
623 // scalar w/predication, and we currently rely on the scalar path.
624 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
625}
626
627bool LoopVectorizationLegality::canVectorizeOuterLoop() {
628 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
629 // Store the result and return it at the end instead of exiting early, in case
630 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
631 bool Result = true;
632 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
633
634 for (BasicBlock *BB : TheLoop->blocks()) {
635 // Check whether the BB terminator is a branch. Any other terminator is
636 // not supported yet.
637 Instruction *Term = BB->getTerminator();
639 reportVectorizationFailure("Unsupported basic block terminator",
640 "loop control flow is not understood by vectorizer",
641 "CFGNotUnderstood", ORE, TheLoop);
642 if (DoExtraAnalysis)
643 Result = false;
644 else
645 return false;
646 }
647
648 // Check whether the branch is a supported one. Only unconditional
649 // branches, conditional branches with an outer loop invariant condition or
650 // backedges are supported.
651 // FIXME: We skip these checks when VPlan predication is enabled as we
652 // want to allow divergent branches. This whole check will be removed
653 // once VPlan predication is on by default.
654 auto *Br = dyn_cast<CondBrInst>(Term);
655 if (Br && !TheLoop->isLoopInvariant(Br->getCondition()) &&
656 !LI->isLoopHeader(Br->getSuccessor(0)) &&
657 !LI->isLoopHeader(Br->getSuccessor(1))) {
658 reportVectorizationFailure("Unsupported conditional branch",
659 "loop control flow is not understood by vectorizer",
660 "CFGNotUnderstood", ORE, TheLoop);
661 if (DoExtraAnalysis)
662 Result = false;
663 else
664 return false;
665 }
666 }
667
668 // Check whether inner loops are uniform. At this point, we only support
669 // simple outer loops scenarios with uniform nested loops.
670 if (!isUniformLoopNest(TheLoop /*loop nest*/,
671 TheLoop /*context outer loop*/)) {
672 reportVectorizationFailure("Outer loop contains divergent loops",
673 "loop control flow is not understood by vectorizer",
674 "CFGNotUnderstood", ORE, TheLoop);
675 if (DoExtraAnalysis)
676 Result = false;
677 else
678 return false;
679 }
680
681 // Check whether we are able to set up outer loop induction.
682 if (!setupOuterLoopInductions()) {
683 reportVectorizationFailure("Unsupported outer loop Phi(s)",
684 "UnsupportedPhi", ORE, TheLoop);
685 if (DoExtraAnalysis)
686 Result = false;
687 else
688 return false;
689 }
690
691 return Result;
692}
693
694void LoopVectorizationLegality::addInductionPhi(
695 PHINode *Phi, const InductionDescriptor &ID,
696 SmallPtrSetImpl<Value *> &AllowedExit) {
697 Inductions[Phi] = ID;
698
699 // In case this induction also comes with casts that we know we can ignore
700 // in the vectorized loop body, record them here. All casts could be recorded
701 // here for ignoring, but suffices to record only the first (as it is the
702 // only one that may bw used outside the cast sequence).
703 ArrayRef<Instruction *> Casts = ID.getCastInsts();
704 if (!Casts.empty())
705 InductionCastsToIgnore.insert(*Casts.begin());
706
707 Type *PhiTy = Phi->getType();
708 const DataLayout &DL = Phi->getDataLayout();
709
710 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
711 "Expected int, ptr, or FP induction phi type");
712
713 // Get the widest type.
714 if (PhiTy->isIntOrPtrTy()) {
715 if (!WidestIndTy)
716 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
717 else
718 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
719 }
720
721 // Int inductions are special because we only allow one IV.
722 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
723 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
724 isa<Constant>(ID.getStartValue()) &&
725 cast<Constant>(ID.getStartValue())->isNullValue()) {
726
727 // Use the phi node with the widest type as induction. Use the last
728 // one if there are multiple (no good reason for doing this other
729 // than it is expedient). We've checked that it begins at zero and
730 // steps by one, so this is a canonical induction variable.
731 if (!PrimaryInduction || PhiTy == WidestIndTy)
732 PrimaryInduction = Phi;
733 }
734
735 // Both the PHI node itself, and the "post-increment" value feeding
736 // back into the PHI node may have external users.
737 // We can allow those uses, except if the SCEVs we have for them rely
738 // on predicates that only hold within the loop, since allowing the exit
739 // currently means re-using this SCEV outside the loop (see PR33706 for more
740 // details).
741 if (PSE.getPredicate().isAlwaysTrue()) {
742 AllowedExit.insert(Phi);
743 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
744 }
745
746 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
747}
748
749bool LoopVectorizationLegality::setupOuterLoopInductions() {
750 BasicBlock *Header = TheLoop->getHeader();
751
752 // Returns true if a given Phi is a supported induction.
753 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
754 InductionDescriptor ID;
755 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
757 addInductionPhi(&Phi, ID, AllowedExit);
758 return true;
759 }
760 // Bail out for any Phi in the outer loop header that is not a supported
761 // induction.
763 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
764 return false;
765 };
766
767 return llvm::all_of(Header->phis(), IsSupportedPhi);
768}
769
770/// Checks if a function is scalarizable according to the TLI, in
771/// the sense that it should be vectorized and then expanded in
772/// multiple scalar calls. This is represented in the
773/// TLI via mappings that do not specify a vector name, as in the
774/// following example:
775///
776/// const VecDesc VecIntrinsics[] = {
777/// {"llvm.phx.abs.i32", "", 4}
778/// };
779static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
780 const StringRef ScalarName = CI.getCalledFunction()->getName();
781 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
782 // Check that all known VFs are not associated to a vector
783 // function, i.e. the vector name is emty.
784 if (Scalarize) {
785 ElementCount WidestFixedVF, WidestScalableVF;
786 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
788 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
789 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
791 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
792 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
793 assert((WidestScalableVF.isZero() || !Scalarize) &&
794 "Caller may decide to scalarize a variant using a scalable VF");
795 }
796 return Scalarize;
797}
798
799bool LoopVectorizationLegality::canVectorizeInstrs() {
800 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
801 bool Result = true;
802
803 // For each block in the loop.
804 for (BasicBlock *BB : TheLoop->blocks()) {
805 // Scan the instructions in the block and look for hazards.
806 for (Instruction &I : *BB) {
807 Result &= canVectorizeInstr(I);
808 if (!DoExtraAnalysis && !Result)
809 return false;
810 }
811 }
812
813 if (!PrimaryInduction) {
814 if (Inductions.empty()) {
816 "Did not find one integer induction var",
817 "loop induction variable could not be identified",
818 "NoInductionVariable", ORE, TheLoop);
819 return false;
820 }
821 if (!WidestIndTy) {
823 "Did not find one integer induction var",
824 "integer loop induction variable could not be identified",
825 "NoIntegerInductionVariable", ORE, TheLoop);
826 return false;
827 }
828 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
829 }
830
831 // Now we know the widest induction type, check if our found induction
832 // is the same size. If it's not, unset it here and InnerLoopVectorizer
833 // will create another.
834 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
835 PrimaryInduction = nullptr;
836
837 return Result;
838}
839
840bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
841 BasicBlock *BB = I.getParent();
842 BasicBlock *Header = TheLoop->getHeader();
843
844 if (auto *Phi = dyn_cast<PHINode>(&I)) {
845 Type *PhiTy = Phi->getType();
846 // Check that this PHI type is allowed.
847 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
848 !PhiTy->isPointerTy()) {
850 "Found a non-int non-pointer PHI",
851 "loop control flow is not understood by vectorizer",
852 "CFGNotUnderstood", ORE, TheLoop);
853 return false;
854 }
855
856 // If this PHINode is not in the header block, then we know that we
857 // can convert it to select during if-conversion. No need to check if
858 // the PHIs in this block are induction or reduction variables.
859 if (BB != Header) {
860 // Non-header phi nodes that have outside uses can be vectorized. Add
861 // them to the list of allowed exits.
862 // Unsafe cyclic dependencies with header phis are identified during
863 // legalization for reduction, induction and fixed order
864 // recurrences.
865 AllowedExit.insert(&I);
866 return true;
867 }
868
869 // We only allow if-converted PHIs with exactly two incoming values.
870 if (Phi->getNumIncomingValues() != 2) {
872 "Found an invalid PHI",
873 "loop control flow is not understood by vectorizer",
874 "CFGNotUnderstood", ORE, TheLoop, Phi);
875 return false;
876 }
877
878 RecurrenceDescriptor RedDes;
879 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
880 PSE.getSE())) {
881 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
882 AllowedExit.insert(RedDes.getLoopExitInstr());
883 Reductions[Phi] = std::move(RedDes);
886 RedDes.getRecurrenceKind())) &&
887 "Only min/max recurrences are allowed to have multiple uses "
888 "currently");
889 return true;
890 }
891
892 // We prevent matching non-constant strided pointer IVS to preserve
893 // historical vectorizer behavior after a generalization of the
894 // IVDescriptor code. The intent is to remove this check, but we
895 // have to fix issues around code quality for such loops first.
896 auto IsDisallowedStridedPointerInduction =
897 [](const InductionDescriptor &ID) {
899 return false;
900 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
901 ID.getConstIntStepValue() == nullptr;
902 };
903
904 // TODO: Instead of recording the AllowedExit, it would be good to
905 // record the complementary set: NotAllowedExit. These include (but may
906 // not be limited to):
907 // 1. Reduction phis as they represent the one-before-last value, which
908 // is not available when vectorized
909 // 2. Induction phis and increment when SCEV predicates cannot be used
910 // outside the loop - see addInductionPhi
911 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
912 // outside the loop - see call to hasOutsideLoopUser in the non-phi
913 // handling below
914 // 4. FixedOrderRecurrence phis that can possibly be handled by
915 // extraction.
916 // By recording these, we can then reason about ways to vectorize each
917 // of these NotAllowedExit.
918 InductionDescriptor ID;
919 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
920 !IsDisallowedStridedPointerInduction(ID)) {
921 addInductionPhi(Phi, ID, AllowedExit);
922 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
923 return true;
924 }
925
926 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
927 AllowedExit.insert(Phi);
928 FixedOrderRecurrences.insert(Phi);
929 return true;
930 }
931
932 // As a last resort, coerce the PHI to a AddRec expression
933 // and re-try classifying it a an induction PHI.
934 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
935 !IsDisallowedStridedPointerInduction(ID)) {
936 addInductionPhi(Phi, ID, AllowedExit);
937 return true;
938 }
939
940 reportVectorizationFailure("Found an unidentified PHI",
941 "value that could not be identified as "
942 "reduction is used outside the loop",
943 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
944 Phi);
945 return false;
946 } // end of PHI handling
947
948 // We handle calls that:
949 // * Have a mapping to an IR intrinsic.
950 // * Have a vector version available.
951 auto *CI = dyn_cast<CallInst>(&I);
952
953 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
954 !(CI->getCalledFunction() && TLI &&
955 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
956 // If the call is a recognized math libary call, it is likely that
957 // we can vectorize it given loosened floating-point constraints.
958 LibFunc Func;
959 bool IsMathLibCall =
960 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
961 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
962 TLI->hasOptimizedCodeGen(Func);
963
964 if (IsMathLibCall) {
965 // TODO: Ideally, we should not use clang-specific language here,
966 // but it's hard to provide meaningful yet generic advice.
967 // Also, should this be guarded by allowExtraAnalysis() and/or be part
968 // of the returned info from isFunctionVectorizable()?
970 "Found a non-intrinsic callsite",
971 "library call cannot be vectorized. "
972 "Try compiling with -fno-math-errno, -ffast-math, "
973 "or similar flags",
974 "CantVectorizeLibcall", ORE, TheLoop, CI);
975 } else {
976 reportVectorizationFailure("Found a non-intrinsic callsite",
977 "call instruction cannot be vectorized",
978 "CantVectorizeLibcall", ORE, TheLoop, CI);
979 }
980 return false;
981 }
982
983 // Some intrinsics have scalar arguments and should be same in order for
984 // them to be vectorized (i.e. loop invariant).
985 if (CI) {
986 auto *SE = PSE.getSE();
987 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
988 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
989 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
990 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
992 "Found unvectorizable intrinsic",
993 "intrinsic instruction cannot be vectorized",
994 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
995 return false;
996 }
997 }
998 }
999
1000 // If we found a vectorized variant of a function, note that so LV can
1001 // make better decisions about maximum VF.
1002 if (CI && !VFDatabase::getMappings(*CI).empty())
1003 VecCallVariantsFound = true;
1004
1005 auto CanWidenInstructionTy = [](Instruction const &Inst) {
1006 Type *InstTy = Inst.getType();
1007 if (!isa<StructType>(InstTy))
1008 return canVectorizeTy(InstTy);
1009
1010 // For now, we only recognize struct values returned from calls where
1011 // all users are extractvalue as vectorizable. All element types of the
1012 // struct must be types that can be widened.
1013 return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
1014 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1015 };
1016
1017 // Check that the instruction return type is vectorizable.
1018 // We can't vectorize casts from vector type to scalar type.
1019 // Also, we can't vectorize extractelement instructions.
1020 if (!CanWidenInstructionTy(I) ||
1021 (isa<CastInst>(I) &&
1022 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1024 reportVectorizationFailure("Found unvectorizable type",
1025 "instruction return type cannot be vectorized",
1026 "CantVectorizeInstructionReturnType", ORE,
1027 TheLoop, &I);
1028 return false;
1029 }
1030
1031 // Check that the stored type is vectorizable.
1032 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1033 Type *T = ST->getValueOperand()->getType();
1035 reportVectorizationFailure("Store instruction cannot be vectorized",
1036 "CantVectorizeStore", ORE, TheLoop, ST);
1037 return false;
1038 }
1039
1040 // For nontemporal stores, check that a nontemporal vector version is
1041 // supported on the target.
1042 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1043 // Arbitrarily try a vector of 2 elements.
1044 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1045 assert(VecTy && "did not find vectorized version of stored type");
1046 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1048 "nontemporal store instruction cannot be vectorized",
1049 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1050 return false;
1051 }
1052 }
1053
1054 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1055 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1056 // For nontemporal loads, check that a nontemporal vector version is
1057 // supported on the target (arbitrarily try a vector of 2 elements).
1058 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1059 assert(VecTy && "did not find vectorized version of load type");
1060 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1062 "nontemporal load instruction cannot be vectorized",
1063 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1064 return false;
1065 }
1066 }
1067
1068 // FP instructions can allow unsafe algebra, thus vectorizable by
1069 // non-IEEE-754 compliant SIMD units.
1070 // This applies to floating-point math operations and calls, not memory
1071 // operations, shuffles, or casts, as they don't change precision or
1072 // semantics.
1073 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1074 !I.isFast()) {
1075 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1076 Hints->setPotentiallyUnsafe();
1077 }
1078
1079 // Reduction instructions are allowed to have exit users.
1080 // All other instructions must not have external users.
1081 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1082 // We can safely vectorize loops where instructions within the loop are
1083 // used outside the loop only if the SCEV predicates within the loop is
1084 // same as outside the loop. Allowing the exit means reusing the SCEV
1085 // outside the loop.
1086 if (PSE.getPredicate().isAlwaysTrue()) {
1087 AllowedExit.insert(&I);
1088 return true;
1089 }
1090 reportVectorizationFailure("Value cannot be used outside the loop",
1091 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1092 return false;
1093 }
1094
1095 return true;
1096}
1097
1098/// Find histogram operations that match high-level code in loops:
1099/// \code
1100/// buckets[indices[i]]+=step;
1101/// \endcode
1102///
1103/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1104/// array the computed histogram. It uses a BinOp to sum all counts, storing
1105/// them using a loop-variant index Load from the 'indices' input array.
1106///
1107/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1108/// regardless of hardware support. When there is support, it additionally
1109/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1110/// used to update histogram in \p HistogramPtrs.
1111static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1112 const PredicatedScalarEvolution &PSE,
1113 SmallVectorImpl<HistogramInfo> &Histograms) {
1114
1115 // Store value must come from a Binary Operation.
1116 Instruction *HPtrInstr = nullptr;
1117 BinaryOperator *HBinOp = nullptr;
1118 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1119 return false;
1120
1121 // BinOp must be an Add or a Sub modifying the bucket value by a
1122 // loop invariant amount.
1123 // FIXME: We assume the loop invariant term is on the RHS.
1124 // Fine for an immediate/constant, but maybe not a generic value?
1125 Value *HIncVal = nullptr;
1126 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1127 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1128 return false;
1129
1130 // Make sure the increment value is loop invariant.
1131 if (!TheLoop->isLoopInvariant(HIncVal))
1132 return false;
1133
1134 // The address to store is calculated through a GEP Instruction.
1136 if (!GEP)
1137 return false;
1138
1139 // Restrict address calculation to constant indices except for the last term.
1140 Value *HIdx = nullptr;
1141 for (Value *Index : GEP->indices()) {
1142 if (HIdx)
1143 return false;
1144 if (!isa<ConstantInt>(Index))
1145 HIdx = Index;
1146 }
1147
1148 if (!HIdx)
1149 return false;
1150
1151 // Check that the index is calculated by loading from another array. Ignore
1152 // any extensions.
1153 // FIXME: Support indices from other sources than a linear load from memory?
1154 // We're currently trying to match an operation looping over an array
1155 // of indices, but there could be additional levels of indirection
1156 // in place, or possibly some additional calculation to form the index
1157 // from the loaded data.
1158 Value *VPtrVal;
1159 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1160 return false;
1161
1162 // Make sure the index address varies in this loop, not an outer loop.
1163 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1164 if (!AR || AR->getLoop() != TheLoop)
1165 return false;
1166
1167 // Ensure we'll have the same mask by checking that all parts of the histogram
1168 // (gather load, update, scatter store) are in the same block.
1169 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1170 BasicBlock *LdBB = IndexedLoad->getParent();
1171 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1172 return false;
1173
1174 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1175
1176 // Store the operations that make up the histogram.
1177 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1178 return true;
1179}
1180
1181bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1182 // For now, we only support an IndirectUnsafe dependency that calculates
1183 // a histogram
1185 return false;
1186
1187 // Find a single IndirectUnsafe dependency.
1188 const MemoryDepChecker::Dependence *IUDep = nullptr;
1189 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1190 const auto *Deps = DepChecker.getDependences();
1191 // If there were too many dependences, LAA abandons recording them. We can't
1192 // proceed safely if we don't know what the dependences are.
1193 if (!Deps)
1194 return false;
1195
1196 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1197 // Ignore dependencies that are either known to be safe or can be
1198 // checked at runtime.
1201 continue;
1202
1203 // We're only interested in IndirectUnsafe dependencies here, where the
1204 // address might come from a load from memory. We also only want to handle
1205 // one such dependency, at least for now.
1206 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1207 return false;
1208
1209 IUDep = &Dep;
1210 }
1211 if (!IUDep)
1212 return false;
1213
1214 // For now only normal loads and stores are supported.
1215 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1216 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1217
1218 if (!LI || !SI)
1219 return false;
1220
1221 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1222 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1223}
1224
1225bool LoopVectorizationLegality::canVectorizeMemory() {
1226 LAI = &LAIs.getInfo(*TheLoop);
1227 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1228 if (LAR) {
1229 ORE->emit([&]() {
1230 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1231 "loop not vectorized: ", *LAR);
1232 });
1233 }
1234
1235 if (!LAI->canVectorizeMemory()) {
1238 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1239 "side effects",
1240 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1241 TheLoop);
1242 return false;
1243 }
1244
1245 return canVectorizeIndirectUnsafeDependences();
1246 }
1247
1248 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1249 reportVectorizationFailure("We don't allow storing to uniform addresses",
1250 "write to a loop invariant address could not "
1251 "be vectorized",
1252 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1253 TheLoop);
1254 return false;
1255 }
1256
1257 // We can vectorize stores to invariant address when final reduction value is
1258 // guaranteed to be stored at the end of the loop. Also, if decision to
1259 // vectorize loop is made, runtime checks are added so as to make sure that
1260 // invariant address won't alias with any other objects.
1261 if (!LAI->getStoresToInvariantAddresses().empty()) {
1262 // For each invariant address, check if last stored value is unconditional
1263 // and the address is not calculated inside the loop.
1264 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1266 continue;
1267
1268 if (blockNeedsPredication(SI->getParent())) {
1270 "We don't allow storing to uniform addresses",
1271 "write of conditional recurring variant value to a loop "
1272 "invariant address could not be vectorized",
1273 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1274 return false;
1275 }
1276
1277 // Invariant address should be defined outside of loop. LICM pass usually
1278 // makes sure it happens, but in rare cases it does not, we do not want
1279 // to overcomplicate vectorization to support this case.
1280 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1281 if (TheLoop->contains(Ptr)) {
1283 "Invariant address is calculated inside the loop",
1284 "write to a loop invariant address could not "
1285 "be vectorized",
1286 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1287 return false;
1288 }
1289 }
1290 }
1291
1292 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1293 // For each invariant address, check its last stored value is the result
1294 // of one of our reductions.
1295 //
1296 // We do not check if dependence with loads exists because that is already
1297 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1298 ScalarEvolution *SE = PSE.getSE();
1299 SmallVector<StoreInst *, 4> UnhandledStores;
1300 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1302 // Earlier stores to this address are effectively deadcode.
1303 // With opaque pointers it is possible for one pointer to be used with
1304 // different sizes of stored values:
1305 // store i32 0, ptr %x
1306 // store i8 0, ptr %x
1307 // The latest store doesn't complitely overwrite the first one in the
1308 // example. That is why we have to make sure that types of stored
1309 // values are same.
1310 // TODO: Check that bitwidth of unhandled store is smaller then the
1311 // one that overwrites it and add a test.
1312 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1313 return storeToSameAddress(SE, SI, I) &&
1314 I->getValueOperand()->getType() ==
1315 SI->getValueOperand()->getType();
1316 });
1317 continue;
1318 }
1319 UnhandledStores.push_back(SI);
1320 }
1321
1322 bool IsOK = UnhandledStores.empty();
1323 // TODO: we should also validate against InvariantMemSets.
1324 if (!IsOK) {
1326 "We don't allow storing to uniform addresses",
1327 "write to a loop invariant address could not "
1328 "be vectorized",
1329 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1330 return false;
1331 }
1332 }
1333 }
1334
1335 PSE.addPredicate(LAI->getPSE().getPredicate());
1336 return true;
1337}
1338
1340 bool EnableStrictReductions) {
1341
1342 // First check if there is any ExactFP math or if we allow reassociations
1343 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1344 return true;
1345
1346 // If the above is false, we have ExactFPMath & do not allow reordering.
1347 // If the EnableStrictReductions flag is set, first check if we have any
1348 // Exact FP induction vars, which we cannot vectorize.
1349 if (!EnableStrictReductions ||
1350 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1351 InductionDescriptor IndDesc = Induction.second;
1352 return IndDesc.getExactFPMathInst();
1353 }))
1354 return false;
1355
1356 // We can now only vectorize if all reductions with Exact FP math also
1357 // have the isOrdered flag set, which indicates that we can move the
1358 // reduction operations in-loop.
1359 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1360 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1361 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1362 }));
1363}
1364
1366 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1367 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1368 return RdxDesc.IntermediateStore == SI;
1369 });
1370}
1371
1373 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1374 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1375 if (!RdxDesc.IntermediateStore)
1376 return false;
1377
1378 ScalarEvolution *SE = PSE.getSE();
1379 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1380 return V == InvariantAddress ||
1381 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1382 });
1383}
1384
1386 Value *In0 = const_cast<Value *>(V);
1388 if (!PN)
1389 return false;
1390
1391 return Inductions.count(PN);
1392}
1393
1394const InductionDescriptor *
1396 if (!isInductionPhi(Phi))
1397 return nullptr;
1398 auto &ID = getInductionVars().find(Phi)->second;
1399 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1401 return &ID;
1402 return nullptr;
1403}
1404
1405const InductionDescriptor *
1407 if (!isInductionPhi(Phi))
1408 return nullptr;
1409 auto &ID = getInductionVars().find(Phi)->second;
1411 return &ID;
1412 return nullptr;
1413}
1414
1416 const Value *V) const {
1417 auto *Inst = dyn_cast<Instruction>(V);
1418 return (Inst && InductionCastsToIgnore.count(Inst));
1419}
1420
1424
1426 const PHINode *Phi) const {
1427 return FixedOrderRecurrences.count(Phi);
1428}
1429
1431 const BasicBlock *BB) const {
1432 // When vectorizing early exits, create predicates for the latch block only.
1433 // For a single early exit, it must be a direct predecessor of the latch.
1434 // For multiple early exits, they form a chain where each exiting block
1435 // dominates all subsequent blocks up to the latch.
1436 BasicBlock *Latch = TheLoop->getLoopLatch();
1438 return BB == Latch;
1439 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1440}
1441
1442bool LoopVectorizationLegality::blockCanBePredicated(
1443 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1444 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1445 for (Instruction &I : *BB) {
1446 // We can predicate blocks with calls to assume, as long as we drop them in
1447 // case we flatten the CFG via predication.
1449 MaskedOp.insert(&I);
1450 continue;
1451 }
1452
1453 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1454 // TODO: there might be cases that it should block the vectorization. Let's
1455 // ignore those for now.
1457 continue;
1458
1459 // We can allow masked calls if there's at least one vector variant, even
1460 // if we end up scalarizing due to the cost model calculations.
1461 // TODO: Allow other calls if they have appropriate attributes... readonly
1462 // and argmemonly?
1463 if (CallInst *CI = dyn_cast<CallInst>(&I))
1465 MaskedOp.insert(CI);
1466 continue;
1467 }
1468
1469 // Loads are handled via masking (or speculated if safe to do so.)
1470 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1471 if (!SafePtrs.count(LI->getPointerOperand()))
1472 MaskedOp.insert(LI);
1473 continue;
1474 }
1475
1476 // Predicated store requires some form of masking:
1477 // 1) masked store HW instruction,
1478 // 2) emulation via load-blend-store (only if safe and legal to do so,
1479 // be aware on the race conditions), or
1480 // 3) element-by-element predicate check and scalar store.
1481 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1482 MaskedOp.insert(SI);
1483 continue;
1484 }
1485
1486 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1487 return false;
1488 }
1489
1490 return true;
1491}
1492
1493bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1494 if (!EnableIfConversion) {
1495 reportVectorizationFailure("If-conversion is disabled",
1496 "IfConversionDisabled", ORE, TheLoop);
1497 return false;
1498 }
1499
1500 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1501
1502 // A list of pointers which are known to be dereferenceable within scope of
1503 // the loop body for each iteration of the loop which executes. That is,
1504 // the memory pointed to can be dereferenced (with the access size implied by
1505 // the value's type) unconditionally within the loop header without
1506 // introducing a new fault.
1507 SmallPtrSet<Value *, 8> SafePointers;
1508
1509 // Collect safe addresses.
1510 for (BasicBlock *BB : TheLoop->blocks()) {
1511 if (!blockNeedsPredication(BB)) {
1512 for (Instruction &I : *BB)
1513 if (auto *Ptr = getLoadStorePointerOperand(&I))
1514 SafePointers.insert(Ptr);
1515 continue;
1516 }
1517
1518 // For a block which requires predication, a address may be safe to access
1519 // in the loop w/o predication if we can prove dereferenceability facts
1520 // sufficient to ensure it'll never fault within the loop. For the moment,
1521 // we restrict this to loads; stores are more complicated due to
1522 // concurrency restrictions.
1523 ScalarEvolution &SE = *PSE.getSE();
1525 for (Instruction &I : *BB) {
1526 LoadInst *LI = dyn_cast<LoadInst>(&I);
1527
1528 // Make sure we can execute all computations feeding into Ptr in the loop
1529 // w/o triggering UB and that none of the out-of-loop operands are poison.
1530 // We do not need to check if operations inside the loop can produce
1531 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1532 // because flags will be dropped when executing them unconditionally.
1533 // TODO: Results could be improved by considering poison-propagation
1534 // properties of visited ops.
1535 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1536 SmallVector<Value *> Worklist = {Ptr};
1537 SmallPtrSet<Value *, 4> Visited;
1538 while (!Worklist.empty()) {
1539 Value *CurrV = Worklist.pop_back_val();
1540 if (!Visited.insert(CurrV).second)
1541 continue;
1542
1543 auto *CurrI = dyn_cast<Instruction>(CurrV);
1544 if (!CurrI || !TheLoop->contains(CurrI)) {
1545 // If operands from outside the loop may be poison then Ptr may also
1546 // be poison.
1547 if (!isGuaranteedNotToBePoison(CurrV, AC,
1548 TheLoop->getLoopPredecessor()
1549 ->getTerminator()
1550 ->getIterator(),
1551 DT))
1552 return false;
1553 continue;
1554 }
1555
1556 // A loaded value may be poison, independent of any flags.
1557 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1558 return false;
1559
1560 // For other ops, assume poison can only be introduced via flags,
1561 // which can be dropped.
1562 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1563 return false;
1564 append_range(Worklist, CurrI->operands());
1565 }
1566 return true;
1567 };
1568 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1569 // that it will consider loops that need guarding by SCEV checks. The
1570 // vectoriser will generate these checks if we decide to vectorise.
1571 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1572 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1573 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1574 &Predicates))
1575 SafePointers.insert(LI->getPointerOperand());
1576 Predicates.clear();
1577 }
1578 }
1579
1580 // Collect the blocks that need predication.
1581 for (BasicBlock *BB : TheLoop->blocks()) {
1582 // We support only branches and switch statements as terminators inside the
1583 // loop.
1584 if (isa<SwitchInst>(BB->getTerminator())) {
1585 if (TheLoop->isLoopExiting(BB)) {
1586 reportVectorizationFailure("Loop contains an unsupported switch",
1587 "LoopContainsUnsupportedSwitch", ORE,
1588 TheLoop, BB->getTerminator());
1589 return false;
1590 }
1591 } else if (!isa<UncondBrInst, CondBrInst>(BB->getTerminator())) {
1592 reportVectorizationFailure("Loop contains an unsupported terminator",
1593 "LoopContainsUnsupportedTerminator", ORE,
1594 TheLoop, BB->getTerminator());
1595 return false;
1596 }
1597
1598 // We must be able to predicate all blocks that need to be predicated.
1599 if (blockNeedsPredication(BB) &&
1600 !blockCanBePredicated(BB, SafePointers, ConditionallyExecutedOps)) {
1602 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1603 ORE, TheLoop, BB->getTerminator());
1604 return false;
1605 }
1606 }
1607
1608 // We can if-convert this loop.
1609 return true;
1610}
1611
1612// Helper function to canVectorizeLoopNestCFG.
1613bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1614 bool UseVPlanNativePath) {
1615 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1616 "VPlan-native path is not enabled.");
1617
1618 // TODO: ORE should be improved to show more accurate information when an
1619 // outer loop can't be vectorized because a nested loop is not understood or
1620 // legal. Something like: "outer_loop_location: loop not vectorized:
1621 // (inner_loop_location) loop control flow is not understood by vectorizer".
1622
1623 // Store the result and return it at the end instead of exiting early, in case
1624 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1625 bool Result = true;
1626 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1627
1628 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1629 // be canonicalized.
1630 if (!Lp->getLoopPreheader()) {
1631 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1632 "loop control flow is not understood by vectorizer",
1633 "CFGNotUnderstood", ORE, TheLoop);
1634 if (DoExtraAnalysis)
1635 Result = false;
1636 else
1637 return false;
1638 }
1639
1640 // We must have a single backedge.
1641 if (Lp->getNumBackEdges() != 1) {
1642 reportVectorizationFailure("The loop must have a single backedge",
1643 "loop control flow is not understood by vectorizer",
1644 "CFGNotUnderstood", ORE, TheLoop);
1645 if (DoExtraAnalysis)
1646 Result = false;
1647 else
1648 return false;
1649 }
1650
1651 // The latch must be terminated by a branch.
1652 BasicBlock *Latch = Lp->getLoopLatch();
1653 if (Latch && !isa<UncondBrInst, CondBrInst>(Latch->getTerminator())) {
1655 "The loop latch terminator is not a UncondBrInst/CondBrInst",
1656 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1657 ORE, TheLoop);
1658 if (DoExtraAnalysis)
1659 Result = false;
1660 else
1661 return false;
1662 }
1663
1664 return Result;
1665}
1666
1667bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1668 Loop *Lp, bool UseVPlanNativePath) {
1669 // Store the result and return it at the end instead of exiting early, in case
1670 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1671 bool Result = true;
1672 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1673 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1674 if (DoExtraAnalysis)
1675 Result = false;
1676 else
1677 return false;
1678 }
1679
1680 // Recursively check whether the loop control flow of nested loops is
1681 // understood.
1682 for (Loop *SubLp : *Lp)
1683 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1684 if (DoExtraAnalysis)
1685 Result = false;
1686 else
1687 return false;
1688 }
1689
1690 return Result;
1691}
1692
1693bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1694 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1695 if (!LatchBB) {
1696 reportVectorizationFailure("Loop does not have a latch",
1697 "Cannot vectorize early exit loop",
1698 "NoLatchEarlyExit", ORE, TheLoop);
1699 return false;
1700 }
1701
1702 if (Reductions.size() || FixedOrderRecurrences.size()) {
1704 "Found reductions or recurrences in early-exit loop",
1705 "Cannot vectorize early exit loop with reductions or recurrences",
1706 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1707 return false;
1708 }
1709
1710 SmallVector<BasicBlock *, 8> ExitingBlocks;
1711 TheLoop->getExitingBlocks(ExitingBlocks);
1712
1713 // Keep a record of all the exiting blocks.
1715 SmallVector<BasicBlock *> UncountableExitingBlocks;
1716 for (BasicBlock *BB : ExitingBlocks) {
1717 const SCEV *EC =
1718 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1719 if (isa<SCEVCouldNotCompute>(EC)) {
1720 if (size(successors(BB)) != 2) {
1722 "Early exiting block does not have exactly two successors",
1723 "Incorrect number of successors from early exiting block",
1724 "EarlyExitTooManySuccessors", ORE, TheLoop);
1725 return false;
1726 }
1727
1728 UncountableExitingBlocks.push_back(BB);
1729 } else
1730 CountableExitingBlocks.push_back(BB);
1731 }
1732 // We can safely ignore the predicates here because when vectorizing the loop
1733 // the PredicatatedScalarEvolution class will keep track of all predicates
1734 // for each exiting block anyway. This happens when calling
1735 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1736 Predicates.clear();
1737
1738 if (UncountableExitingBlocks.empty()) {
1739 LLVM_DEBUG(dbgs() << "LV: Could not find any uncountable exits");
1740 return false;
1741 }
1742
1743 // The latch block must have a countable exit.
1745 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1747 "Cannot determine exact exit count for latch block",
1748 "Cannot vectorize early exit loop",
1749 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1750 return false;
1751 }
1752 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1753 "Latch block not found in list of countable exits!");
1754
1755 // Check to see if there are instructions that could potentially generate
1756 // exceptions or have side-effects.
1757 auto IsSafeOperation = [](Instruction *I) -> bool {
1758 switch (I->getOpcode()) {
1759 case Instruction::Load:
1760 case Instruction::Store:
1761 case Instruction::PHI:
1762 case Instruction::UncondBr:
1763 case Instruction::CondBr:
1764 // These are checked separately.
1765 return true;
1766 default:
1768 }
1769 };
1770
1771 bool HasSideEffects = false;
1772 for (auto *BB : TheLoop->blocks())
1773 for (auto &I : *BB) {
1774 if (I.mayWriteToMemory()) {
1775 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1776 HasSideEffects = true;
1777 continue;
1778 }
1779
1780 // We don't support complex writes to memory.
1782 "Complex writes to memory unsupported in early exit loops",
1783 "Cannot vectorize early exit loop with complex writes to memory",
1784 "WritesInEarlyExitLoop", ORE, TheLoop);
1785 return false;
1786 }
1787
1788 if (!IsSafeOperation(&I)) {
1789 reportVectorizationFailure("Early exit loop contains operations that "
1790 "cannot be speculatively executed",
1791 "UnsafeOperationsEarlyExitLoop", ORE,
1792 TheLoop);
1793 return false;
1794 }
1795 }
1796
1797 SmallVector<LoadInst *, 4> NonDerefLoads;
1798 // TODO: Handle loops that may fault.
1799 if (!HasSideEffects) {
1800 // Read-only loop.
1801 Predicates.clear();
1802 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1803 &Predicates)) {
1805 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1806 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1807 return false;
1808 }
1809 } else {
1810 // Check all uncountable exiting blocks for movable loads.
1811 for (BasicBlock *ExitingBB : UncountableExitingBlocks) {
1812 if (!canUncountableExitConditionLoadBeMoved(ExitingBB))
1813 return false;
1814 }
1815 }
1816
1817 // Check non-dereferenceable loads if any.
1818 for (LoadInst *LI : NonDerefLoads) {
1819 // Only support unit-stride access for now.
1820 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1821 if (Stride != 1) {
1823 "Loop contains potentially faulting strided load",
1824 "Cannot vectorize early exit loop with "
1825 "strided fault-only-first load",
1826 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1827 return false;
1828 }
1829 }
1830
1831 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1832 PSE.getSymbolicMaxBackedgeTakenCount();
1833 // Since we have an exact exit count for the latch and the early exit
1834 // dominates the latch, then this should guarantee a computed SCEV value.
1835 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1836 "Failed to get symbolic expression for backedge taken count");
1837 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1838 "backedge taken count: "
1839 << *SymbolicMaxBTC << '\n');
1840 UncountableExitType = HasSideEffects ? UncountableExitTrait::ReadWrite
1842 return true;
1843}
1844
1845bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1846 BasicBlock *ExitingBlock) {
1847 // Try to find a load in the critical path for the uncountable exit condition.
1848 // This is currently matching about the simplest form we can, expecting
1849 // only one in-loop load, the result of which is directly compared against
1850 // a loop-invariant value.
1851 // FIXME: We're insisting on a single use for now, because otherwise we will
1852 // need to make PHI nodes for other users. That can be done once the initial
1853 // transform code lands.
1854 auto *Br = cast<CondBrInst>(ExitingBlock->getTerminator());
1855
1856 using namespace llvm::PatternMatch;
1857 Instruction *L = nullptr;
1858 Value *Ptr = nullptr;
1859 Value *R = nullptr;
1860 if (!match(Br->getCondition(),
1862 m_Value(R))))) {
1864 "Early exit loop with store but no supported condition load",
1865 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1866 return false;
1867 }
1868
1869 // FIXME: Don't rely on operand ordering for the comparison.
1870 if (!TheLoop->isLoopInvariant(R)) {
1872 "Early exit loop with store but no supported condition load",
1873 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1874 return false;
1875 }
1876
1877 // Make sure that the load address is not loop invariant; we want an
1878 // address calculation that we can rotate to the next vector iteration.
1879 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1880 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1882 "Uncountable exit condition depends on load with an address that is "
1883 "not an add recurrence in the loop",
1884 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1885 return false;
1886 }
1887
1888 ICFLoopSafetyInfo SafetyInfo;
1889 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1890 LoadInst *Load = cast<LoadInst>(L);
1891 // We need to know that load will be executed before we can hoist a
1892 // copy out to run just before the first iteration.
1893 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1895 "Load for uncountable exit not guaranteed to execute",
1896 "ConditionalUncountableExitLoad", ORE, TheLoop);
1897 return false;
1898 }
1899
1900 // Prohibit any potential aliasing with any instruction in the loop which
1901 // might store to memory.
1902 // FIXME: Relax this constraint where possible.
1903 for (auto *BB : TheLoop->blocks()) {
1904 for (auto &I : *BB) {
1905 if (&I == Load)
1906 continue;
1907
1908 if (I.mayWriteToMemory()) {
1909 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1910 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1911 if (AR == AliasResult::NoAlias)
1912 continue;
1913 }
1914
1916 "Cannot determine whether critical uncountable exit load address "
1917 "does not alias with a memory write",
1918 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1919 return false;
1920 }
1921 }
1922 }
1923
1924 return true;
1925}
1926
1927bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1928 // Store the result and return it at the end instead of exiting early, in case
1929 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1930 bool Result = true;
1931
1932 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1933 // Check whether the loop-related control flow in the loop nest is expected by
1934 // vectorizer.
1935 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1936 if (DoExtraAnalysis) {
1937 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1938 Result = false;
1939 } else {
1940 return false;
1941 }
1942 }
1943
1944 // We need to have a loop header.
1945 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1946 << '\n');
1947
1948 // Specific checks for outer loops. We skip the remaining legal checks at this
1949 // point because they don't support outer loops.
1950 if (!TheLoop->isInnermost()) {
1951 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1952
1953 if (!canVectorizeOuterLoop()) {
1954 reportVectorizationFailure("Unsupported outer loop",
1955 "UnsupportedOuterLoop", ORE, TheLoop);
1956 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1957 // outer loops.
1958 return false;
1959 }
1960
1961 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1962 return Result;
1963 }
1964
1965 assert(TheLoop->isInnermost() && "Inner loop expected.");
1966 // Check if we can if-convert non-single-bb loops.
1967 unsigned NumBlocks = TheLoop->getNumBlocks();
1968 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1969 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1970 if (DoExtraAnalysis)
1971 Result = false;
1972 else
1973 return false;
1974 }
1975
1976 // Check if we can vectorize the instructions and CFG in this loop.
1977 if (!canVectorizeInstrs()) {
1978 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1979 if (DoExtraAnalysis)
1980 Result = false;
1981 else
1982 return false;
1983 }
1984
1985 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1986 if (TheLoop->getExitingBlock()) {
1987 reportVectorizationFailure("Cannot vectorize uncountable loop",
1988 "UnsupportedUncountableLoop", ORE, TheLoop);
1989 if (DoExtraAnalysis)
1990 Result = false;
1991 else
1992 return false;
1993 } else {
1994 if (!isVectorizableEarlyExitLoop()) {
1995 assert(UncountableExitType == UncountableExitTrait::None &&
1996 "Must be false without vectorizable early-exit loop");
1997 if (DoExtraAnalysis)
1998 Result = false;
1999 else
2000 return false;
2001 }
2002 }
2003 }
2004
2005 // Go over each instruction and look at memory deps.
2006 if (!canVectorizeMemory()) {
2007 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
2008 if (DoExtraAnalysis)
2009 Result = false;
2010 else
2011 return false;
2012 }
2013
2014 // Bail out for ReadWrite loops with uncountable exits for now.
2015 if (UncountableExitType == UncountableExitTrait::ReadWrite) {
2017 "Writes to memory unsupported in early exit loops",
2018 "Cannot vectorize early exit loop with writes to memory",
2019 "WritesInEarlyExitLoop", ORE, TheLoop);
2020 return false;
2021 }
2022
2023 if (Result) {
2024 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2025 << (LAI->getRuntimePointerChecking()->Need
2026 ? " (with a runtime bound check)"
2027 : "")
2028 << "!\n");
2029 }
2030
2031 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
2032 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
2033 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
2034
2035 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2036 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2037 "due to SCEVThreshold");
2038 reportVectorizationFailure("Too many SCEV checks needed",
2039 "Too many SCEV assumptions need to be made and checked at runtime",
2040 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2041 if (DoExtraAnalysis)
2042 Result = false;
2043 else
2044 return false;
2045 }
2046
2047 // Okay! We've done all the tests. If any have failed, return false. Otherwise
2048 // we can vectorize, and at this point we don't have any other mem analysis
2049 // which may limit our maximum vectorization factor, so just return true with
2050 // no restrictions.
2051 return Result;
2052}
2053
2055 // The only loops we can vectorize without a scalar epilogue, are loops with
2056 // a bottom-test and a single exiting block. We'd have to handle the fact
2057 // that not every instruction executes on the last iteration. This will
2058 // require a lane mask which varies through the vector loop body. (TODO)
2059 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2060 LLVM_DEBUG(
2061 dbgs()
2062 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2063 return false;
2064 }
2065
2066 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2067
2068 // The list of pointers that we can safely read and write to remains empty.
2069 SmallPtrSet<Value *, 8> SafePointers;
2070
2071 // Check all blocks for predication, including those that ordinarily do not
2072 // need predication such as the header block.
2074 for (BasicBlock *BB : TheLoop->blocks()) {
2075 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2076 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2077 return false;
2078 }
2079 }
2080
2081 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2082
2083 return true;
2084}
2085
2087 // The list of pointers that we can safely read and write to remains empty.
2088 SmallPtrSet<Value *, 8> SafePointers;
2089
2090 // Mark all blocks for predication, including those that ordinarily do not
2091 // need predication such as the header block, and collect instructions needing
2092 // predication in TailFoldedMaskedOp.
2093 for (BasicBlock *BB : TheLoop->blocks()) {
2094 [[maybe_unused]] bool R =
2095 blockCanBePredicated(BB, SafePointers, TailFoldedMaskedOp);
2096 assert(R && "Must be able to predicate block when tail-folding.");
2097 }
2098}
2099
2100} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_AlwaysScalable, "always", "Scalable vectorization is available and always favored when " "feasible")))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has uncountable early exits, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_AlwaysScalable
Always vectorize loops using scalable vectors if feasible (i.e.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:174
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:523
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1442
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
iterator find(const KeyT &Key)
Definition MapVector.h:154
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
static constexpr auto FlagAnyWrap
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:87
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:431
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:891
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.