LLVM 23.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
20#include "llvm/Analysis/Loads.h"
29#include "llvm/IR/Dominators.h"
34
35using namespace llvm;
36using namespace PatternMatch;
37using namespace LoopVectorizationUtils;
38
39#define LV_NAME "loop-vectorize"
40#define DEBUG_TYPE LV_NAME
41
42static cl::opt<bool>
43 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
44 cl::desc("Enable if-conversion during vectorization."));
45
46static cl::opt<bool>
47AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
48 cl::desc("Enable recognition of non-constant strided "
49 "pointer induction variables."));
50
51static cl::opt<bool>
52 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
53 cl::desc("Allow enabling loop hints to reorder "
54 "FP operations during vectorization."));
55
58 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
60 cl::desc("Control whether the compiler can use scalable vectors to "
61 "vectorize a loop"),
64 "Scalable vectorization is disabled."),
67 "Scalable vectorization is available and favored when the "
68 "cost is inconclusive."),
71 "Scalable vectorization is available and favored when the "
72 "cost is inconclusive."),
75 "Scalable vectorization is available and always favored when "
76 "feasible")));
77
79 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
80 cl::desc("Enables autovectorization of some loops containing histograms"));
81
82/// Maximum vectorization interleave count.
83static const unsigned MaxInterleaveFactor = 16;
84
85namespace llvm {
86
87bool LoopVectorizeHints::Hint::validate(unsigned Val) {
88 switch (Kind) {
89 case HK_WIDTH:
91 case HK_INTERLEAVE:
92 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
93 case HK_FORCE:
94 return (Val <= 1);
95 case HK_ISVECTORIZED:
96 case HK_PREDICATE:
97 case HK_SCALABLE:
98 return (Val == 0 || Val == 1);
99 }
100 return false;
101}
102
104 bool InterleaveOnlyWhenForced,
107 : Width("vectorize.width",
108 VectorizerParams::VectorizationFactor.getKnownMinValue(), HK_WIDTH),
109 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
110 Force("vectorize.enable", FK_Undefined, HK_FORCE),
111 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
112 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
113 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
114 TheLoop(L), ORE(ORE) {
115 // Populate values with existing loop metadata.
116 getHintsFromMetadata();
117
118 // force-vector-interleave overrides DisableInterleaving.
121
122 // If the metadata doesn't explicitly specify whether to enable scalable
123 // vectorization, then decide based on the following criteria (increasing
124 // level of priority):
125 // - Target default
126 // - Metadata width
127 // - Force option (always overrides)
129 if (TTI)
130 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
132
133 if (Width.Value)
134 // If the width is set, but the metadata says nothing about the scalable
135 // property, then assume it concerns only a fixed-width UserVF.
136 // If width is not set, the flag takes precedence.
137 Scalable.Value = SK_FixedWidthOnly;
138 }
139
140 // If the flag is set to force any use of scalable vectors, override the loop
141 // hints.
142 if (ForceScalableVectorization.getValue() !=
144 Scalable.Value = ForceScalableVectorization.getValue();
145
146 // If force-vector-width is scalable, force scalable vectorization.
148 Scalable.Value = SK_AlwaysScalable;
149
150 // Scalable vectorization is disabled if no preference is specified.
152 Scalable.Value = SK_FixedWidthOnly;
153
154 if (IsVectorized.Value != 1)
155 // If the vectorization width and interleaving count are both 1 then
156 // consider the loop to have been already vectorized because there's
157 // nothing more that we can do.
158 IsVectorized.Value =
160 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
161 << "LV: Interleaving disabled by the pass manager\n");
162}
163
165 TheLoop->addIntLoopAttribute("llvm.loop.isvectorized", 1,
166 {Twine(Prefix(), "vectorize.").str(),
167 Twine(Prefix(), "interleave.").str()});
168
169 // Update internal cache.
170 IsVectorized.Value = 1;
171}
172
173void LoopVectorizeHints::reportDisallowedVectorization(
174 const StringRef DebugMsg, const StringRef RemarkName,
175 const StringRef RemarkMsg, const Loop *L) const {
176 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
177 ORE.emit(OptimizationRemarkMissed(LV_NAME, RemarkName, L->getStartLoc(),
178 L->getHeader())
179 << "loop not vectorized: " << RemarkMsg);
180}
181
183 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
185 if (Force.Value == LoopVectorizeHints::FK_Disabled) {
186 reportDisallowedVectorization("#pragma vectorize disable",
187 "MissedExplicitlyDisabled",
188 "vectorization is explicitly disabled", L);
189 } else if (hasDisableAllTransformsHint(L)) {
190 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
191 "MissedTransformsDisabled",
192 "loop transformations are disabled", L);
193 } else {
194 llvm_unreachable("loop vect disabled for an unknown reason");
195 }
196 return false;
197 }
198
199 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
200 reportDisallowedVectorization(
201 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
202 "MissedForceOnly", "only vectorizing loops that explicitly request it",
203 L);
204 return false;
205 }
206
207 if (getIsVectorized() == 1) {
208 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
209 // FIXME: Add interleave.disable metadata. This will allow
210 // vectorize.disable to be used without disabling the pass and errors
211 // to differentiate between disabled vectorization and a width of 1.
212 ORE.emit([&]() {
213 return OptimizationRemarkAnalysis(LV_NAME, "AllDisabled",
214 L->getStartLoc(), L->getHeader())
215 << "loop not vectorized: vectorization and interleaving are "
216 "explicitly disabled, or the loop has already been "
217 "vectorized";
218 });
219 return false;
220 }
221
222 return true;
223}
224
226 using namespace ore;
227
228 ORE.emit([&]() {
229 if (Force.Value == LoopVectorizeHints::FK_Disabled)
230 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
231 TheLoop->getStartLoc(),
232 TheLoop->getHeader())
233 << "loop not vectorized: vectorization is explicitly disabled";
234
235 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
236 TheLoop->getHeader());
237 R << "loop not vectorized";
238 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
239 R << " (Force=" << NV("Force", true);
240 if (Width.Value != 0)
241 R << ", Vector Width=" << NV("VectorWidth", getWidth());
242 if (getInterleave() != 0)
243 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
244 R << ")";
245 }
246 return R;
247 });
248}
249
251 // Allow the vectorizer to change the order of operations if enabling
252 // loop hints are provided
253 ElementCount EC = getWidth();
254 return HintsAllowReordering &&
256 EC.getKnownMinValue() > 1);
257}
258
259void LoopVectorizeHints::getHintsFromMetadata() {
260 MDNode *LoopID = TheLoop->getLoopID();
261 if (!LoopID)
262 return;
263
264 // First operand should refer to the loop id itself.
265 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
266 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
267
268 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
269 const MDString *S = nullptr;
271
272 // The expected hint is either a MDString or a MDNode with the first
273 // operand a MDString.
274 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
275 if (!MD || MD->getNumOperands() == 0)
276 continue;
277 S = dyn_cast<MDString>(MD->getOperand(0));
278 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
279 Args.push_back(MD->getOperand(Idx));
280 } else {
281 S = dyn_cast<MDString>(MDO);
282 assert(Args.size() == 0 && "too many arguments for MDString");
283 }
284
285 if (!S)
286 continue;
287
288 // Check if the hint starts with the loop metadata prefix.
289 StringRef Name = S->getString();
290 if (Args.size() == 1)
291 setHint(Name, Args[0]);
292 }
293}
294
295void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
296 if (!Name.consume_front(Prefix()))
297 return;
298
299 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
300 if (!C)
301 return;
302 unsigned Val = C->getZExtValue();
303
304 Hint *Hints[] = {&Width, &Interleave, &Force,
305 &IsVectorized, &Predicate, &Scalable};
306 for (auto *H : Hints) {
307 if (Name == H->Name) {
308 if (H->validate(Val))
309 H->Value = Val;
310 else
311 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
312 break;
313 }
314 }
315}
316
317// Return true if the inner loop \p Lp is uniform with regard to the outer loop
318// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
319// executing the inner loop will execute the same iterations). This check is
320// very constrained for now but it will be relaxed in the future. \p Lp is
321// considered uniform if it meets all the following conditions:
322// 1) it has a canonical IV (starting from 0 and with stride 1),
323// 2) its latch terminator is a conditional branch and,
324// 3) its latch condition is a compare instruction whose operands are the
325// canonical IV and an OuterLp invariant.
326// This check doesn't take into account the uniformity of other conditions not
327// related to the loop latch because they don't affect the loop uniformity.
328//
329// NOTE: We decided to keep all these checks and its associated documentation
330// together so that we can easily have a picture of the current supported loop
331// nests. However, some of the current checks don't depend on \p OuterLp and
332// would be redundantly executed for each \p Lp if we invoked this function for
333// different candidate outer loops. This is not the case for now because we
334// don't currently have the infrastructure to evaluate multiple candidate outer
335// loops and \p OuterLp will be a fixed parameter while we only support explicit
336// outer loop vectorization. It's also very likely that these checks go away
337// before introducing the aforementioned infrastructure. However, if this is not
338// the case, we should move the \p OuterLp independent checks to a separate
339// function that is only executed once for each \p Lp.
340static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
341 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
342
343 // If Lp is the outer loop, it's uniform by definition.
344 if (Lp == OuterLp)
345 return true;
346 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
347
348 // 1.
350 if (!IV) {
351 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
352 return false;
353 }
354
355 // 2.
356 BasicBlock *Latch = Lp->getLoopLatch();
357 auto *LatchBr = dyn_cast<CondBrInst>(Latch->getTerminator());
358 if (!LatchBr) {
359 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
360 return false;
361 }
362
363 // 3.
364 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
365 if (!LatchCmp) {
367 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
368 return false;
369 }
370
371 Value *CondOp0 = LatchCmp->getOperand(0);
372 Value *CondOp1 = LatchCmp->getOperand(1);
373 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
374 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
375 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
376 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
377 return false;
378 }
379
380 return true;
381}
382
383// Return true if \p Lp and all its nested loops are uniform with regard to \p
384// OuterLp.
385static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
386 if (!isUniformLoop(Lp, OuterLp))
387 return false;
388
389 // Check if nested loops are uniform.
390 for (Loop *SubLp : *Lp)
391 if (!isUniformLoopNest(SubLp, OuterLp))
392 return false;
393
394 return true;
395}
396
398 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
399
400 if (Ty->isPointerTy())
401 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
402
403 // It is possible that char's or short's overflow when we ask for the loop's
404 // trip count, work around this by changing the type size.
405 if (Ty->getScalarSizeInBits() < 32)
406 return Type::getInt32Ty(Ty->getContext());
407
408 return cast<IntegerType>(Ty);
409}
410
412 Type *Ty1) {
415 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
416}
417
418/// Returns true if A and B have same pointer operands or same SCEVs addresses
420 StoreInst *B) {
421 // Compare store
422 if (A == B)
423 return true;
424
425 // Otherwise Compare pointers
426 Value *APtr = A->getPointerOperand();
427 Value *BPtr = B->getPointerOperand();
428 if (APtr == BPtr)
429 return true;
430
431 // Otherwise compare address SCEVs
432 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
433}
434
436 if (!AllowRuntimeSCEVChecks || !TheLoop->isInnermost())
437 return;
438
439 for (BasicBlock *BB : TheLoop->blocks())
440 for (Instruction &I : *BB)
443}
444
446 Value *Ptr) const {
447 // FIXME: Currently, the set of symbolic strides is sometimes queried before
448 // it's collected. This happens from canVectorizeWithIfConvert, when the
449 // pointer is checked to reference consecutive elements suitable for a
450 // masked access.
451 // Stride versioning requires adding a SCEV equality predicate; only consult
452 // the symbolic strides when runtime SCEV checks are permitted.
453 const auto &Strides = LAI && AllowRuntimeSCEVChecks
454 ? LAI->getSymbolicStrides()
456 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
457 AllowRuntimeSCEVChecks, false)
458 .value_or(0);
459 if (Stride == 1 || Stride == -1)
460 return Stride;
461 return 0;
462}
463
465 return LAI->isInvariant(V);
466}
467
468namespace {
469/// A rewriter to build the SCEVs for each of the VF lanes in the expected
470/// vectorized loop, which can then be compared to detect their uniformity. This
471/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
472/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
473/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
474/// uniformity.
475class SCEVAddRecForUniformityRewriter
476 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
477 /// Multiplier to be applied to the step of AddRecs in TheLoop.
478 unsigned StepMultiplier;
479
480 /// Offset to be added to the AddRecs in TheLoop.
481 unsigned Offset;
482
483 /// Loop for which to rewrite AddRecsFor.
484 Loop *TheLoop;
485
486 /// Is any sub-expressions not analyzable w.r.t. uniformity?
487 bool CannotAnalyze = false;
488
489 bool canAnalyze() const { return !CannotAnalyze; }
490
491public:
492 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
493 unsigned Offset, Loop *TheLoop)
494 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
495 TheLoop(TheLoop) {}
496
497 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
498 assert(Expr->getLoop() == TheLoop &&
499 "addrec outside of TheLoop must be invariant and should have been "
500 "handled earlier");
501 // Build a new AddRec by multiplying the step by StepMultiplier and
502 // incrementing the start by Offset * step.
503 Type *Ty = Expr->getType();
504 const SCEV *Step = Expr->getStepRecurrence(SE);
505 if (!SE.isLoopInvariant(Step, TheLoop)) {
506 CannotAnalyze = true;
507 return Expr;
508 }
509 const SCEV *NewStep =
510 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
511 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
512 const SCEV *NewStart =
513 SE.getAddExpr(Expr->getStart(), SCEVUse(ScaledOffset));
514 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
515 }
516
517 const SCEV *visit(const SCEV *S) {
518 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
519 return S;
521 }
522
523 const SCEV *visitUnknown(const SCEVUnknown *S) {
524 if (SE.isLoopInvariant(S, TheLoop))
525 return S;
526 // The value could vary across iterations.
527 CannotAnalyze = true;
528 return S;
529 }
530
531 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
532 // Could not analyze the expression.
533 CannotAnalyze = true;
534 return S;
535 }
536
537 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
538 unsigned StepMultiplier, unsigned Offset,
539 Loop *TheLoop) {
540 /// Bail out if the expression does not contain an UDiv expression.
541 /// Uniform values which are not loop invariant require operations to strip
542 /// out the lowest bits. For now just look for UDivs and use it to avoid
543 /// re-writing UDIV-free expressions for other lanes to limit compile time.
544 if (!SCEVExprContains(S,
545 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
546 return SE.getCouldNotCompute();
547
548 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
549 TheLoop);
550 const SCEV *Result = Rewriter.visit(S);
551
552 if (Rewriter.canAnalyze())
553 return Result;
554 return SE.getCouldNotCompute();
555 }
556};
557
558} // namespace
559
561 Value *V, std::optional<ElementCount> VF) const {
562 if (isInvariant(V))
563 return true;
564 if (!VF || VF->isScalable())
565 return false;
566 if (VF->isScalar())
567 return true;
568
569 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
570 // never considered uniform.
571 auto *SE = PSE.getSE();
572 if (!SE->isSCEVable(V->getType()))
573 return false;
574 const SCEV *S = SE->getSCEV(V);
575
576 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
577 // lane 0 matches the expressions for all other lanes.
578 unsigned FixedVF = VF->getKnownMinValue();
579 const SCEV *FirstLaneExpr =
580 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
581 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
582 return false;
583
584 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
585 // lane 0. We check lanes in reverse order for compile-time, as frequently
586 // checking the last lane is sufficient to rule out uniformity.
587 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
588 const SCEV *IthLaneExpr =
589 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
590 return FirstLaneExpr == IthLaneExpr;
591 });
592}
593
595 Instruction &I, std::optional<ElementCount> VF) const {
597 if (!Ptr)
598 return false;
599 // Note: There's nothing inherent which prevents predicated loads and
600 // stores from being uniform. The current lowering simply doesn't handle
601 // it; in particular, the cost model distinguishes scatter/gather from
602 // scalar w/predication, and we currently rely on the scalar path.
603 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
604}
605
606bool LoopVectorizationLegality::canVectorizeOuterLoop() {
607 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
608 // Store the result and return it at the end instead of exiting early, in case
609 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
610 bool Result = true;
611 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
612
613 for (BasicBlock *BB : TheLoop->blocks()) {
614 // Check whether the BB terminator is a branch. Any other terminator is
615 // not supported yet.
616 Instruction *Term = BB->getTerminator();
619 "Unsupported basic block terminator",
620 "loop control flow is not understood by vectorizer",
621 "CFGNotUnderstood", ORE, TheLoop);
622 if (DoExtraAnalysis)
623 Result = false;
624 else
625 return false;
626 }
627
628 // Check whether the branch is a supported one. Only unconditional
629 // branches, conditional branches with an outer loop invariant condition or
630 // backedges are supported.
631 // FIXME: We skip these checks when VPlan predication is enabled as we
632 // want to allow divergent branches. This whole check will be removed
633 // once VPlan predication is on by default.
634 auto *Br = dyn_cast<CondBrInst>(Term);
635 if (Br && !TheLoop->isLoopInvariant(Br->getCondition()) &&
636 !LI->isLoopHeader(Br->getSuccessor(0)) &&
637 !LI->isLoopHeader(Br->getSuccessor(1))) {
639 "Unsupported conditional branch",
640 "loop control flow is not understood by vectorizer",
641 "CFGNotUnderstood", ORE, TheLoop);
642 if (DoExtraAnalysis)
643 Result = false;
644 else
645 return false;
646 }
647 }
648
649 // Check whether inner loops are uniform. At this point, we only support
650 // simple outer loops scenarios with uniform nested loops.
651 if (!isUniformLoopNest(TheLoop /*loop nest*/,
652 TheLoop /*context outer loop*/)) {
654 "Outer loop contains divergent loops",
655 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
656 ORE, TheLoop);
657 if (DoExtraAnalysis)
658 Result = false;
659 else
660 return false;
661 }
662
663 // Check whether we are able to set up outer loop induction.
664 if (!setupOuterLoopInductions()) {
665 reportVectorizationFailure("Unsupported outer loop Phi(s)",
666 "UnsupportedPhi", ORE, TheLoop);
667 if (DoExtraAnalysis)
668 Result = false;
669 else
670 return false;
671 }
672
673 return Result;
674}
675
676void LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
677 const InductionDescriptor &ID) {
678 Inductions[Phi] = ID;
679
680 // In case this induction also comes with casts that we know we can ignore
681 // in the vectorized loop body, record them here. All casts could be recorded
682 // here for ignoring, but suffices to record only the first (as it is the
683 // only one that may bw used outside the cast sequence).
684 ArrayRef<Instruction *> Casts = ID.getCastInsts();
685 if (!Casts.empty())
686 InductionCastsToIgnore.insert(*Casts.begin());
687
688 Type *PhiTy = Phi->getType();
689 const DataLayout &DL = Phi->getDataLayout();
690
691 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
692 "Expected int, ptr, or FP induction phi type");
693
694 // Get the widest type.
695 if (PhiTy->isIntOrPtrTy()) {
696 if (!WidestIndTy)
697 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
698 else
699 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
700 }
701
702 // Int inductions are special because we only allow one IV.
703 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
704 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
705 isa<Constant>(ID.getStartValue()) &&
706 cast<Constant>(ID.getStartValue())->isNullValue()) {
707
708 // Use the phi node with the widest type as induction. Use the last
709 // one if there are multiple (no good reason for doing this other
710 // than it is expedient). We've checked that it begins at zero and
711 // steps by one, so this is a canonical induction variable.
712 if (!PrimaryInduction || PhiTy == WidestIndTy)
713 PrimaryInduction = Phi;
714 }
715
716 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
717}
718
719bool LoopVectorizationLegality::setupOuterLoopInductions() {
720 BasicBlock *Header = TheLoop->getHeader();
721
722 // Returns true if a given Phi is a supported induction.
723 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
724 InductionDescriptor ID;
725 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
727 addInductionPhi(&Phi, ID);
728 return true;
729 }
730 // Bail out for any Phi in the outer loop header that is not a supported
731 // induction.
733 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
734 return false;
735 };
736
737 return llvm::all_of(Header->phis(), IsSupportedPhi);
738}
739
740/// Checks if a function is scalarizable according to the TLI, in
741/// the sense that it should be vectorized and then expanded in
742/// multiple scalar calls. This is represented in the
743/// TLI via mappings that do not specify a vector name, as in the
744/// following example:
745///
746/// const VecDesc VecIntrinsics[] = {
747/// {"llvm.phx.abs.i32", "", 4}
748/// };
749static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
750 const StringRef ScalarName = CI.getCalledFunction()->getName();
751 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
752 // Check that all known VFs are not associated to a vector
753 // function, i.e. the vector name is emty.
754 if (Scalarize) {
755 ElementCount WidestFixedVF, WidestScalableVF;
756 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
758 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
759 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
761 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
762 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
763 assert((WidestScalableVF.isZero() || !Scalarize) &&
764 "Caller may decide to scalarize a variant using a scalable VF");
765 }
766 return Scalarize;
767}
768
769bool LoopVectorizationLegality::canVectorizeInstrs() {
770 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
771 bool Result = true;
772
773 // For each block in the loop.
774 for (BasicBlock *BB : TheLoop->blocks()) {
775 // Scan the instructions in the block and look for hazards.
776 for (Instruction &I : *BB) {
777 Result &= canVectorizeInstr(I);
778 if (!DoExtraAnalysis && !Result)
779 return false;
780 }
781 }
782
783 if (!PrimaryInduction) {
784 if (Inductions.empty()) {
786 "Did not find one integer induction var",
787 "loop induction variable could not be identified",
788 "NoInductionVariable", ORE, TheLoop);
789 return false;
790 }
791 if (!WidestIndTy) {
793 "Did not find one integer induction var",
794 "integer loop induction variable could not be identified",
795 "NoIntegerInductionVariable", ORE, TheLoop);
796 return false;
797 }
798 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
799 }
800
801 // Now we know the widest induction type, check if our found induction
802 // is the same size. If it's not, unset it here and InnerLoopVectorizer
803 // will create another.
804 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
805 PrimaryInduction = nullptr;
806
807 return Result;
808}
809
810bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
811 BasicBlock *BB = I.getParent();
812 BasicBlock *Header = TheLoop->getHeader();
813
814 if (auto *Phi = dyn_cast<PHINode>(&I)) {
815 Type *PhiTy = Phi->getType();
816 // Check that this PHI type is allowed.
817 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
818 !PhiTy->isPointerTy()) {
820 "Found a non-int non-pointer PHI",
821 "loop control flow is not understood by vectorizer",
822 "CFGNotUnderstood", ORE, TheLoop);
823 return false;
824 }
825
826 // If this PHINode is not in the header block, then we know that we
827 // can convert it to select during if-conversion. No need to check if
828 // the PHIs in this block are induction or reduction variables.
829 if (BB != Header) {
830 // Non-header phi nodes that have outside uses can be vectorized. Unsafe
831 // cyclic dependencies with header phis are identified during legalization
832 // for reduction, induction and fixed order recurrences.
833 return true;
834 }
835
836 // We only allow if-converted PHIs with exactly two incoming values.
837 if (Phi->getNumIncomingValues() != 2) {
839 "Found an invalid PHI",
840 "loop control flow is not understood by vectorizer",
841 "CFGNotUnderstood", ORE, TheLoop, Phi);
842 return false;
843 }
844
845 RecurrenceDescriptor RedDes;
846 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
847 PSE.getSE())) {
848 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
849 Reductions[Phi] = std::move(RedDes);
852 RedDes.getRecurrenceKind())) &&
853 "Only min/max recurrences are allowed to have multiple uses "
854 "currently");
855 return true;
856 }
857
858 // We prevent matching non-constant strided pointer IVS to preserve
859 // historical vectorizer behavior after a generalization of the
860 // IVDescriptor code. The intent is to remove this check, but we
861 // have to fix issues around code quality for such loops first.
862 auto IsDisallowedStridedPointerInduction =
863 [](const InductionDescriptor &ID) {
865 return false;
866 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
867 ID.getConstIntStepValue() == nullptr;
868 };
869
870 InductionDescriptor ID;
871 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
872 !IsDisallowedStridedPointerInduction(ID)) {
873 addInductionPhi(Phi, ID);
874 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
875 return true;
876 }
877
878 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
879 FixedOrderRecurrences.insert(Phi);
880 return true;
881 }
882
883 // As a last resort, coerce the PHI to a AddRec expression
884 // and re-try classifying it a an induction PHI.
885 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
886 !IsDisallowedStridedPointerInduction(ID)) {
887 addInductionPhi(Phi, ID);
888 return true;
889 }
890
891 reportVectorizationFailure("Found an unidentified PHI",
892 "value that could not be identified as "
893 "reduction is used outside the loop",
894 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
895 Phi);
896 return false;
897 } // end of PHI handling
898
899 // We handle calls that:
900 // * Have a mapping to an IR intrinsic.
901 // * Have a vector version available.
902 auto *CI = dyn_cast<CallInst>(&I);
903
904 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
905 !(CI->getCalledFunction() && TLI &&
906 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
907 // If the call is a recognized math libary call, it is likely that
908 // we can vectorize it given loosened floating-point constraints.
909 LibFunc Func;
910 bool IsMathLibCall =
911 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
912 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
913 TLI->hasOptimizedCodeGen(Func);
914
915 if (IsMathLibCall) {
916 // TODO: Ideally, we should not use clang-specific language here,
917 // but it's hard to provide meaningful yet generic advice.
918 // Also, should this be guarded by allowExtraAnalysis() and/or be part
919 // of the returned info from isFunctionVectorizable()?
921 "Found a non-intrinsic callsite",
922 "library call cannot be vectorized. "
923 "Try compiling with -fno-math-errno, -ffast-math, "
924 "or similar flags",
925 "CantVectorizeLibcall", ORE, TheLoop, CI);
926 } else {
927 reportVectorizationFailure("Found a non-intrinsic callsite",
928 "call instruction cannot be vectorized",
929 "CantVectorizeLibcall", ORE, TheLoop, CI);
930 }
931 return false;
932 }
933
934 // Some intrinsics have scalar arguments and should be same in order for
935 // them to be vectorized (i.e. loop invariant).
936 if (CI) {
937 auto *SE = PSE.getSE();
938 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
939 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
940 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
941 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
943 "Found unvectorizable intrinsic",
944 "intrinsic instruction cannot be vectorized",
945 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
946 return false;
947 }
948 }
949 }
950
951 // If we found a vectorized variant of a function, note that so LV can
952 // make better decisions about maximum VF.
953 if (CI && !VFDatabase::getMappings(*CI).empty())
954 VecCallVariantsFound = true;
955
956 auto CanWidenInstructionTy = [](Instruction const &Inst) {
957 Type *InstTy = Inst.getType();
958 if (!isa<StructType>(InstTy))
959 return canVectorizeTy(InstTy);
960
961 // For now, we only recognize struct values returned from calls where
962 // all users are extractvalue as vectorizable. All element types of the
963 // struct must be types that can be widened.
964 return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
965 all_of(Inst.users(), IsaPred<ExtractValueInst>);
966 };
967
968 // Check that the instruction return type is vectorizable.
969 // We can't vectorize casts from vector type to scalar type.
970 // Also, we can't vectorize extractelement instructions.
971 if (!CanWidenInstructionTy(I) ||
972 (isa<CastInst>(I) &&
973 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
975 reportVectorizationFailure("Found unvectorizable type",
976 "instruction return type cannot be vectorized",
977 "CantVectorizeInstructionReturnType", ORE,
978 TheLoop, &I);
979 return false;
980 }
981
982 // Check that the stored type is vectorizable.
983 if (auto *ST = dyn_cast<StoreInst>(&I)) {
984 Type *T = ST->getValueOperand()->getType();
986 reportVectorizationFailure("Store instruction cannot be vectorized",
987 "CantVectorizeStore", ORE, TheLoop, ST);
988 return false;
989 }
990
991 // For nontemporal stores, check that a nontemporal vector version is
992 // supported on the target.
993 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
994 // Arbitrarily try a vector of 2 elements.
995 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
996 assert(VecTy && "did not find vectorized version of stored type");
997 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
999 "nontemporal store instruction cannot be vectorized",
1000 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1001 return false;
1002 }
1003 }
1004
1005 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1006 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1007 // For nontemporal loads, check that a nontemporal vector version is
1008 // supported on the target (arbitrarily try a vector of 2 elements).
1009 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1010 assert(VecTy && "did not find vectorized version of load type");
1011 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1013 "nontemporal load instruction cannot be vectorized",
1014 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1015 return false;
1016 }
1017 }
1018
1019 // FP instructions can allow unsafe algebra, thus vectorizable by
1020 // non-IEEE-754 compliant SIMD units.
1021 // This applies to floating-point math operations and calls, not memory
1022 // operations, shuffles, or casts, as they don't change precision or
1023 // semantics.
1024 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1025 !I.isFast()) {
1026 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1027 Hints->setPotentiallyUnsafe();
1028 }
1029
1030 return true;
1031}
1032
1033/// Find histogram operations that match high-level code in loops:
1034/// \code
1035/// buckets[indices[i]]+=step;
1036/// \endcode
1037///
1038/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1039/// array the computed histogram. It uses a BinOp to sum all counts, storing
1040/// them using a loop-variant index Load from the 'indices' input array.
1041///
1042/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1043/// regardless of hardware support. When there is support, it additionally
1044/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1045/// used to update histogram in \p HistogramPtrs.
1046static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1047 const PredicatedScalarEvolution &PSE,
1048 SmallVectorImpl<HistogramInfo> &Histograms) {
1049
1050 // Store value must come from a Binary Operation.
1051 Instruction *HPtrInstr = nullptr;
1052 BinaryOperator *HBinOp = nullptr;
1053 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1054 return false;
1055
1056 // BinOp must be an Add or a Sub modifying the bucket value by a
1057 // loop invariant amount.
1058 // FIXME: We assume the loop invariant term is on the RHS.
1059 // Fine for an immediate/constant, but maybe not a generic value?
1060 Value *HIncVal = nullptr;
1061 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1062 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1063 return false;
1064
1065 // Make sure the increment value is loop invariant.
1066 if (!TheLoop->isLoopInvariant(HIncVal))
1067 return false;
1068
1069 // The address to store is calculated through a GEP Instruction.
1071 if (!GEP)
1072 return false;
1073
1074 // Restrict address calculation to constant indices except for the last term.
1075 Value *HIdx = nullptr;
1076 for (Value *Index : GEP->indices()) {
1077 if (HIdx)
1078 return false;
1079 if (!isa<ConstantInt>(Index))
1080 HIdx = Index;
1081 }
1082
1083 if (!HIdx)
1084 return false;
1085
1086 // Check that the index is calculated by loading from another array. Ignore
1087 // any extensions.
1088 // FIXME: Support indices from other sources than a linear load from memory?
1089 // We're currently trying to match an operation looping over an array
1090 // of indices, but there could be additional levels of indirection
1091 // in place, or possibly some additional calculation to form the index
1092 // from the loaded data.
1093 Value *VPtrVal;
1094 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1095 return false;
1096
1097 // Make sure the index address varies in this loop, not an outer loop.
1098 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1099 if (!AR || AR->getLoop() != TheLoop)
1100 return false;
1101
1102 // Ensure we'll have the same mask by checking that all parts of the histogram
1103 // (gather load, update, scatter store) are in the same block.
1104 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1105 BasicBlock *LdBB = IndexedLoad->getParent();
1106 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1107 return false;
1108
1109 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1110
1111 // Store the operations that make up the histogram.
1112 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1113 return true;
1114}
1115
1116bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1117 // For now, we only support an IndirectUnsafe dependency that calculates
1118 // a histogram
1120 return false;
1121
1122 // Find a single IndirectUnsafe dependency.
1123 const MemoryDepChecker::Dependence *IUDep = nullptr;
1124 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1125 const auto *Deps = DepChecker.getDependences();
1126 // If there were too many dependences, LAA abandons recording them. We can't
1127 // proceed safely if we don't know what the dependences are.
1128 if (!Deps)
1129 return false;
1130
1131 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1132 // Ignore dependencies that are either known to be safe or can be
1133 // checked at runtime.
1136 continue;
1137
1138 // We're only interested in IndirectUnsafe dependencies here, where the
1139 // address might come from a load from memory. We also only want to handle
1140 // one such dependency, at least for now.
1141 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1142 return false;
1143
1144 IUDep = &Dep;
1145 }
1146 if (!IUDep)
1147 return false;
1148
1149 // For now only normal loads and stores are supported.
1150 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1151 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1152
1153 if (!LI || !SI)
1154 return false;
1155
1156 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1157 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1158}
1159
1160bool LoopVectorizationLegality::canVectorizeMemory() {
1161 LAI = &LAIs.getInfo(*TheLoop);
1162 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1163 if (LAR) {
1164 ORE->emit([&]() {
1165 return OptimizationRemarkAnalysis(LV_NAME, "loop not vectorized: ", *LAR);
1166 });
1167 }
1168
1169 if (!LAI->canVectorizeMemory()) {
1172 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1173 "side effects",
1174 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1175 TheLoop);
1176 return false;
1177 }
1178
1179 return canVectorizeIndirectUnsafeDependences();
1180 }
1181
1182 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1183 reportVectorizationFailure("We don't allow storing to uniform addresses",
1184 "write to a loop invariant address could not "
1185 "be vectorized",
1186 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1187 TheLoop);
1188 return false;
1189 }
1190
1191 // We can vectorize stores to invariant address when final reduction value is
1192 // guaranteed to be stored at the end of the loop. Also, if decision to
1193 // vectorize loop is made, runtime checks are added so as to make sure that
1194 // invariant address won't alias with any other objects.
1195 if (!LAI->getStoresToInvariantAddresses().empty()) {
1196 // For each invariant address, check if last stored value is unconditional
1197 // and the address is not calculated inside the loop.
1198 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1200 continue;
1201
1202 if (blockNeedsPredication(SI->getParent())) {
1204 "We don't allow storing to uniform addresses",
1205 "write of conditional recurring variant value to a loop "
1206 "invariant address could not be vectorized",
1207 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1208 return false;
1209 }
1210
1211 // Invariant address should be defined outside of loop. LICM pass usually
1212 // makes sure it happens, but in rare cases it does not, we do not want
1213 // to overcomplicate vectorization to support this case.
1214 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1215 if (TheLoop->contains(Ptr)) {
1217 "Invariant address is calculated inside the loop",
1218 "write to a loop invariant address could not "
1219 "be vectorized",
1220 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1221 return false;
1222 }
1223 }
1224 }
1225
1226 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1227 // For each invariant address, check its last stored value is the result
1228 // of one of our reductions.
1229 //
1230 // We do not check if dependence with loads exists because that is already
1231 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1232 ScalarEvolution *SE = PSE.getSE();
1233 SmallVector<StoreInst *, 4> UnhandledStores;
1234 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1236 // Earlier stores to this address are effectively deadcode.
1237 // With opaque pointers it is possible for one pointer to be used with
1238 // different sizes of stored values:
1239 // store i32 0, ptr %x
1240 // store i8 0, ptr %x
1241 // The latest store doesn't complitely overwrite the first one in the
1242 // example. That is why we have to make sure that types of stored
1243 // values are same.
1244 // TODO: Check that bitwidth of unhandled store is smaller then the
1245 // one that overwrites it and add a test.
1246 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1247 return storeToSameAddress(SE, SI, I) &&
1248 I->getValueOperand()->getType() ==
1249 SI->getValueOperand()->getType();
1250 });
1251 continue;
1252 }
1253 UnhandledStores.push_back(SI);
1254 }
1255
1256 bool IsOK = UnhandledStores.empty();
1257 // TODO: we should also validate against InvariantMemSets.
1258 if (!IsOK) {
1260 "We don't allow storing to uniform addresses",
1261 "write to a loop invariant address could not "
1262 "be vectorized",
1263 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1264 return false;
1265 }
1266 }
1267 }
1268
1269 PSE.addPredicate(LAI->getPSE().getPredicate());
1270 return true;
1271}
1272
1274 bool EnableStrictReductions) {
1275
1276 // First check if there is any ExactFP math or if we allow reassociations
1277 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1278 return true;
1279
1280 // If the above is false, we have ExactFPMath & do not allow reordering.
1281 // If the EnableStrictReductions flag is set, first check if we have any
1282 // Exact FP induction vars, which we cannot vectorize.
1283 if (!EnableStrictReductions ||
1284 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1285 InductionDescriptor IndDesc = Induction.second;
1286 return IndDesc.getExactFPMathInst();
1287 }))
1288 return false;
1289
1290 // We can now only vectorize if all reductions with Exact FP math also
1291 // have the isOrdered flag set, which indicates that we can move the
1292 // reduction operations in-loop.
1293 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1294 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1295 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1296 }));
1297}
1298
1300 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1301 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1302 return RdxDesc.IntermediateStore == SI;
1303 });
1304}
1305
1307 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1308 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1309 if (!RdxDesc.IntermediateStore)
1310 return false;
1311
1312 ScalarEvolution *SE = PSE.getSE();
1313 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1314 return V == InvariantAddress ||
1315 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1316 });
1317}
1318
1320 Value *In0 = const_cast<Value *>(V);
1322 if (!PN)
1323 return false;
1324
1325 return Inductions.count(PN);
1326}
1327
1328const InductionDescriptor *
1330 if (!isInductionPhi(Phi))
1331 return nullptr;
1332 auto &ID = getInductionVars().find(Phi)->second;
1333 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1335 return &ID;
1336 return nullptr;
1337}
1338
1339const InductionDescriptor *
1341 if (!isInductionPhi(Phi))
1342 return nullptr;
1343 auto &ID = getInductionVars().find(Phi)->second;
1345 return &ID;
1346 return nullptr;
1347}
1348
1350 const Value *V) const {
1351 auto *Inst = dyn_cast<Instruction>(V);
1352 return (Inst && InductionCastsToIgnore.count(Inst));
1353}
1354
1358
1360 const PHINode *Phi) const {
1361 return FixedOrderRecurrences.count(Phi);
1362}
1363
1365 const BasicBlock *BB) const {
1366 BasicBlock *Latch = TheLoop->getLoopLatch();
1367
1368 // Without a latch, we cannot properly answer blockNeedsPredication,
1369 // return early.
1370 if (!Latch) {
1371 assert(ORE->allowExtraAnalysis(DEBUG_TYPE) &&
1372 !canVectorizeLoopCFG(TheLoop, /*UseVPlanNativePath=*/false) &&
1373 "Loop shape should have been rejected by earlier checks");
1374 return false;
1375 }
1376
1377 // When vectorizing early exits, create predicates for the latch block only.
1378 // For a single early exit, it must be a direct predecessor of the latch.
1379 // For multiple early exits, they form a chain where each exiting block
1380 // dominates all subsequent blocks up to the latch.
1382 return BB == Latch;
1383 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1384}
1385
1386bool LoopVectorizationLegality::blockCanBePredicated(
1387 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1388 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1389 for (Instruction &I : *BB) {
1390 // We can predicate blocks with calls to assume, as long as we drop them in
1391 // case we flatten the CFG via predication.
1393 MaskedOp.insert(&I);
1394 continue;
1395 }
1396
1397 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1398 // TODO: there might be cases that it should block the vectorization. Let's
1399 // ignore those for now.
1401 continue;
1402
1403 // We can allow masked calls if there's at least one vector variant, even
1404 // if we end up scalarizing due to the cost model calculations.
1405 // TODO: Allow other calls if they have appropriate attributes... readonly
1406 // and argmemonly?
1407 if (CallInst *CI = dyn_cast<CallInst>(&I))
1409 MaskedOp.insert(CI);
1410 continue;
1411 }
1412
1413 // Loads are handled via masking (or speculated if safe to do so.)
1414 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1415 if (!SafePtrs.count(LI->getPointerOperand()))
1416 MaskedOp.insert(LI);
1417 continue;
1418 }
1419
1420 // Predicated store requires some form of masking:
1421 // 1) masked store HW instruction,
1422 // 2) emulation via load-blend-store (only if safe and legal to do so,
1423 // be aware on the race conditions), or
1424 // 3) element-by-element predicate check and scalar store.
1425 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1426 MaskedOp.insert(SI);
1427 continue;
1428 }
1429
1430 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1431 return false;
1432 }
1433
1434 return true;
1435}
1436
1437bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1438 if (!EnableIfConversion) {
1439 reportVectorizationFailure("If-conversion is disabled",
1440 "IfConversionDisabled", ORE, TheLoop);
1441 return false;
1442 }
1443
1444 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1445
1446 // A list of pointers which are known to be dereferenceable within scope of
1447 // the loop body for each iteration of the loop which executes. That is,
1448 // the memory pointed to can be dereferenced (with the access size implied by
1449 // the value's type) unconditionally within the loop header without
1450 // introducing a new fault.
1451 SmallPtrSet<Value *, 8> SafePointers;
1452
1453 // Collect safe addresses.
1454 for (BasicBlock *BB : TheLoop->blocks()) {
1455 if (!blockNeedsPredication(BB)) {
1456 for (Instruction &I : *BB)
1457 if (auto *Ptr = getLoadStorePointerOperand(&I))
1458 SafePointers.insert(Ptr);
1459 continue;
1460 }
1461
1462 // For a block which requires predication, a address may be safe to access
1463 // in the loop w/o predication if we can prove dereferenceability facts
1464 // sufficient to ensure it'll never fault within the loop. For the moment,
1465 // we restrict this to loads; stores are more complicated due to
1466 // concurrency restrictions.
1467 ScalarEvolution &SE = *PSE.getSE();
1469 for (Instruction &I : *BB) {
1470 LoadInst *LI = dyn_cast<LoadInst>(&I);
1471
1472 // Make sure we can execute all computations feeding into Ptr in the loop
1473 // w/o triggering UB and that none of the out-of-loop operands are poison.
1474 // We do not need to check if operations inside the loop can produce
1475 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1476 // because flags will be dropped when executing them unconditionally.
1477 // TODO: Results could be improved by considering poison-propagation
1478 // properties of visited ops.
1479 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1480 SmallVector<Value *> Worklist = {Ptr};
1481 SmallPtrSet<Value *, 4> Visited;
1482 while (!Worklist.empty()) {
1483 Value *CurrV = Worklist.pop_back_val();
1484 if (!Visited.insert(CurrV).second)
1485 continue;
1486
1487 auto *CurrI = dyn_cast<Instruction>(CurrV);
1488 if (!CurrI || !TheLoop->contains(CurrI)) {
1489 BasicBlock *LoopPred = TheLoop->getLoopPredecessor();
1490 Instruction *CtxI = LoopPred ? LoopPred->getTerminator() : nullptr;
1491 assert((CtxI || ORE->allowExtraAnalysis(DEBUG_TYPE)) &&
1492 "Loop with multiple predecessors should have been rejected "
1493 "early.");
1494 // If operands from outside the loop may be poison then Ptr may also
1495 // be poison.
1496 if (!isGuaranteedNotToBePoison(CurrV, AC, CtxI, DT))
1497 return false;
1498 continue;
1499 }
1500
1501 // A loaded value may be poison, independent of any flags.
1502 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1503 return false;
1504
1505 // For other ops, assume poison can only be introduced via flags,
1506 // which can be dropped.
1507 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1508 return false;
1509 append_range(Worklist, CurrI->operands());
1510 }
1511 return true;
1512 };
1513 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1514 // that it will consider loops that need guarding by SCEV checks. The
1515 // vectoriser will generate these checks if we decide to vectorise.
1516 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1517 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1518 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1519 &Predicates))
1520 SafePointers.insert(LI->getPointerOperand());
1521 Predicates.clear();
1522 }
1523 }
1524
1525 // Collect the blocks that need predication.
1526 for (BasicBlock *BB : TheLoop->blocks()) {
1527 // We support only branches and switch statements as terminators inside the
1528 // loop.
1529 if (isa<SwitchInst>(BB->getTerminator())) {
1530 if (TheLoop->isLoopExiting(BB)) {
1531 reportVectorizationFailure("Loop contains an unsupported switch",
1532 "LoopContainsUnsupportedSwitch", ORE,
1533 TheLoop, BB->getTerminator());
1534 return false;
1535 }
1536 } else if (!isa<UncondBrInst, CondBrInst>(BB->getTerminator())) {
1537 reportVectorizationFailure("Loop contains an unsupported terminator",
1538 "LoopContainsUnsupportedTerminator", ORE,
1539 TheLoop, BB->getTerminator());
1540 return false;
1541 }
1542
1543 // We must be able to predicate all blocks that need to be predicated.
1544 if (blockNeedsPredication(BB) &&
1545 !blockCanBePredicated(BB, SafePointers, ConditionallyExecutedOps)) {
1547 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1548 ORE, TheLoop, BB->getTerminator());
1549 return false;
1550 }
1551 }
1552
1553 // We can if-convert this loop.
1554 return true;
1555}
1556
1557// Helper function to canVectorizeLoopNestCFG.
1558bool LoopVectorizationLegality::canVectorizeLoopCFG(
1559 Loop *Lp, bool UseVPlanNativePath) const {
1560 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1561 "VPlan-native path is not enabled.");
1562
1563 // TODO: ORE should be improved to show more accurate information when an
1564 // outer loop can't be vectorized because a nested loop is not understood or
1565 // legal. Something like: "outer_loop_location: loop not vectorized:
1566 // (inner_loop_location) loop control flow is not understood by vectorizer".
1567
1568 // Store the result and return it at the end instead of exiting early, in case
1569 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1570 bool Result = true;
1571 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1572
1573 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1574 // be canonicalized.
1575 if (!Lp->getLoopPreheader()) {
1577 "Loop doesn't have a legal pre-header",
1578 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1579 ORE, TheLoop);
1580 if (DoExtraAnalysis)
1581 Result = false;
1582 else
1583 return false;
1584 }
1585
1586 // We must have a single backedge.
1587 if (Lp->getNumBackEdges() != 1) {
1589 "The loop must have a single backedge",
1590 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1591 ORE, TheLoop);
1592 if (DoExtraAnalysis)
1593 Result = false;
1594 else
1595 return false;
1596 }
1597
1598 // The latch must be terminated by a branch.
1599 BasicBlock *Latch = Lp->getLoopLatch();
1600 if (Latch && !isa<UncondBrInst, CondBrInst>(Latch->getTerminator())) {
1602 "The loop latch terminator is not a UncondBrInst/CondBrInst",
1603 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1604 ORE, TheLoop);
1605 if (DoExtraAnalysis)
1606 Result = false;
1607 else
1608 return false;
1609 }
1610
1611 return Result;
1612}
1613
1614bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1615 Loop *Lp, bool UseVPlanNativePath) {
1616 // Store the result and return it at the end instead of exiting early, in case
1617 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1618 bool Result = true;
1619 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1620 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1621 if (DoExtraAnalysis)
1622 Result = false;
1623 else
1624 return false;
1625 }
1626
1627 // Recursively check whether the loop control flow of nested loops is
1628 // understood.
1629 for (Loop *SubLp : *Lp)
1630 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1631 if (DoExtraAnalysis)
1632 Result = false;
1633 else
1634 return false;
1635 }
1636
1637 return Result;
1638}
1639
1640bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1641 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1642 if (!LatchBB) {
1643 reportVectorizationFailure("Loop does not have a latch",
1644 "Cannot vectorize early exit loop",
1645 "NoLatchEarlyExit", ORE, TheLoop);
1646 return false;
1647 }
1648
1649 if (Reductions.size() || FixedOrderRecurrences.size()) {
1651 "Found reductions or recurrences in early-exit loop",
1652 "Cannot vectorize early exit loop with reductions or recurrences",
1653 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1654 return false;
1655 }
1656
1657 SmallVector<BasicBlock *, 8> ExitingBlocks;
1658 TheLoop->getExitingBlocks(ExitingBlocks);
1659
1660 // Keep a record of all the exiting blocks.
1662 SmallVector<BasicBlock *> UncountableExitingBlocks;
1663 for (BasicBlock *BB : ExitingBlocks) {
1664 const SCEV *EC =
1665 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1666 if (isa<SCEVCouldNotCompute>(EC)) {
1667 if (size(successors(BB)) != 2) {
1669 "Early exiting block does not have exactly two successors",
1670 "Incorrect number of successors from early exiting block",
1671 "EarlyExitTooManySuccessors", ORE, TheLoop);
1672 return false;
1673 }
1674
1675 UncountableExitingBlocks.push_back(BB);
1676 } else
1677 CountableExitingBlocks.push_back(BB);
1678 }
1679 // We can safely ignore the predicates here because when vectorizing the loop
1680 // the PredicatatedScalarEvolution class will keep track of all predicates
1681 // for each exiting block anyway. This happens when calling
1682 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1683 Predicates.clear();
1684
1685 if (UncountableExitingBlocks.empty()) {
1686 LLVM_DEBUG(dbgs() << "LV: Could not find any uncountable exits");
1687 return false;
1688 }
1689
1690 // The latch block must have a countable exit.
1692 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1694 "Cannot determine exact exit count for latch block",
1695 "Cannot vectorize early exit loop",
1696 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1697 return false;
1698 }
1699 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1700 "Latch block not found in list of countable exits!");
1701
1702 // Check to see if there are instructions that could potentially generate
1703 // exceptions or have side-effects.
1704 auto IsSafeOperation = [](Instruction *I) -> bool {
1705 switch (I->getOpcode()) {
1706 case Instruction::Load:
1707 case Instruction::Store:
1708 case Instruction::PHI:
1709 case Instruction::UncondBr:
1710 case Instruction::CondBr:
1711 // These are checked separately.
1712 return true;
1713 default:
1715 }
1716 };
1717
1718 bool HasSideEffects = false;
1719 for (auto *BB : TheLoop->blocks())
1720 for (auto &I : *BB) {
1721 if (I.mayWriteToMemory()) {
1722 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1723 HasSideEffects = true;
1724 continue;
1725 }
1726
1727 // We don't support complex writes to memory.
1729 "Complex writes to memory unsupported in early exit loops",
1730 "Cannot vectorize early exit loop with complex writes to memory",
1731 "WritesInEarlyExitLoop", ORE, TheLoop);
1732 return false;
1733 }
1734
1735 if (!IsSafeOperation(&I)) {
1736 reportVectorizationFailure("Early exit loop contains operations that "
1737 "cannot be speculatively executed",
1738 "UnsafeOperationsEarlyExitLoop", ORE,
1739 TheLoop);
1740 return false;
1741 }
1742 }
1743
1744 SmallVector<LoadInst *, 4> NonDerefLoads;
1745 // TODO: Handle loops that may fault.
1746 if (!HasSideEffects) {
1747 // Read-only loop.
1748 Predicates.clear();
1749 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1750 &Predicates)) {
1752 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1753 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1754 return false;
1755 }
1756 } else {
1757 // Check all uncountable exiting blocks for movable loads.
1758 for (BasicBlock *ExitingBB : UncountableExitingBlocks) {
1759 if (!canUncountableExitConditionLoadBeMoved(ExitingBB))
1760 return false;
1761 }
1762 }
1763
1764 // Check non-dereferenceable loads if any.
1765 for (LoadInst *LI : NonDerefLoads) {
1766 // Only support unit-stride access for now.
1767 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1768 if (Stride != 1) {
1770 "Loop contains potentially faulting strided load",
1771 "Cannot vectorize early exit loop with "
1772 "strided fault-only-first load",
1773 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1774 return false;
1775 }
1776 }
1777
1778 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1779 PSE.getSymbolicMaxBackedgeTakenCount();
1780 // Since we have an exact exit count for the latch and the early exit
1781 // dominates the latch, then this should guarantee a computed SCEV value.
1782 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1783 "Failed to get symbolic expression for backedge taken count");
1784 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1785 "backedge taken count: "
1786 << *SymbolicMaxBTC << '\n');
1787 UncountableExitType = HasSideEffects ? UncountableExitTrait::ReadWrite
1789 return true;
1790}
1791
1792bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1793 BasicBlock *ExitingBlock) {
1794 // Try to find a load in the critical path for the uncountable exit condition.
1795 // This is currently matching about the simplest form we can, expecting
1796 // only one in-loop load, the result of which is directly compared against
1797 // a loop-invariant value.
1798 // FIXME: We're insisting on a single use for now, because otherwise we will
1799 // need to make PHI nodes for other users. That can be done once the initial
1800 // transform code lands.
1801 auto *Br = cast<CondBrInst>(ExitingBlock->getTerminator());
1802
1803 using namespace llvm::PatternMatch;
1804 Instruction *L = nullptr;
1805 Value *Ptr = nullptr;
1806 Value *R = nullptr;
1807 if (!match(Br->getCondition(),
1809 m_Value(R))))) {
1811 "Early exit loop with store but no supported condition load",
1812 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1813 return false;
1814 }
1815
1816 // FIXME: Don't rely on operand ordering for the comparison.
1817 if (!TheLoop->isLoopInvariant(R)) {
1819 "Early exit loop with store but no supported condition load",
1820 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1821 return false;
1822 }
1823
1824 // Make sure that the load address is not loop invariant; we want an
1825 // address calculation that we can rotate to the next vector iteration.
1826 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1827 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1829 "Uncountable exit condition depends on load with an address that is "
1830 "not an add recurrence in the loop",
1831 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1832 return false;
1833 }
1834
1835 ICFLoopSafetyInfo SafetyInfo;
1836 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1837 LoadInst *Load = cast<LoadInst>(L);
1838 // We need to know that load will be executed before we can hoist a
1839 // copy out to run just before the first iteration.
1840 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1842 "Load for uncountable exit not guaranteed to execute",
1843 "ConditionalUncountableExitLoad", ORE, TheLoop);
1844 return false;
1845 }
1846
1847 // Prohibit any potential aliasing with any instruction in the loop which
1848 // might store to memory.
1849 // FIXME: Relax this constraint where possible.
1850 for (auto *BB : TheLoop->blocks()) {
1851 for (auto &I : *BB) {
1852 if (&I == Load)
1853 continue;
1854
1855 if (I.mayReadOrWriteMemory()) {
1856 // We need to mask all other memory ops.
1857 ConditionallyExecutedOps.insert(&I);
1858 if (isa<LoadInst>(&I))
1859 continue;
1860 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1861 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1862 if (AR == AliasResult::NoAlias)
1863 continue;
1864 }
1865
1867 "Cannot determine whether critical uncountable exit load address "
1868 "does not alias with a memory write",
1869 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1870 return false;
1871 }
1872 }
1873 }
1874
1875 return true;
1876}
1877
1878bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1879 // Store the result and return it at the end instead of exiting early, in case
1880 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1881 bool Result = true;
1882
1883 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1884 // Check whether the loop-related control flow in the loop nest is expected by
1885 // vectorizer.
1886 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1887 if (DoExtraAnalysis) {
1888 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1889 Result = false;
1890 } else {
1891 return false;
1892 }
1893 }
1894
1895 // We need to have a loop header.
1896 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1897 << '\n');
1898
1899 // Specific checks for outer loops. We skip the remaining legal checks at this
1900 // point because they don't support outer loops.
1901 if (!TheLoop->isInnermost()) {
1902 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1903
1904 if (!canVectorizeOuterLoop()) {
1905 reportVectorizationFailure("Unsupported outer loop",
1906 "UnsupportedOuterLoop", ORE, TheLoop);
1907 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1908 // outer loops.
1909 return false;
1910 }
1911
1912 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1913 return Result;
1914 }
1915
1916 assert(TheLoop->isInnermost() && "Inner loop expected.");
1917 // Check if we can if-convert non-single-bb loops.
1918 unsigned NumBlocks = TheLoop->getNumBlocks();
1919 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1920 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1921 if (DoExtraAnalysis)
1922 Result = false;
1923 else
1924 return false;
1925 }
1926
1927 // Check if we can vectorize the instructions and CFG in this loop.
1928 if (!canVectorizeInstrs()) {
1929 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1930 if (DoExtraAnalysis)
1931 Result = false;
1932 else
1933 return false;
1934 }
1935
1936 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1937 if (TheLoop->getExitingBlock()) {
1938 reportVectorizationFailure("Cannot vectorize uncountable loop",
1939 "UnsupportedUncountableLoop", ORE, TheLoop);
1940 if (DoExtraAnalysis)
1941 Result = false;
1942 else
1943 return false;
1944 } else {
1945 if (!isVectorizableEarlyExitLoop()) {
1946 assert(UncountableExitType == UncountableExitTrait::None &&
1947 "Must be false without vectorizable early-exit loop");
1948 if (DoExtraAnalysis)
1949 Result = false;
1950 else
1951 return false;
1952 }
1953 }
1954 }
1955
1956 // Go over each instruction and look at memory deps.
1957 if (!canVectorizeMemory()) {
1958 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1959 if (DoExtraAnalysis)
1960 Result = false;
1961 else
1962 return false;
1963 }
1964
1965 // TODO: Remove this restriction, should be straightforward to support.
1966 if (UncountableExitType != UncountableExitTrait::None &&
1967 !LAI->getStoresToInvariantAddresses().empty()) {
1968 LLVM_DEBUG(dbgs() << "LV: Cannot vectorize early exit loops with stores to "
1969 "loop-invariant addresses\n");
1970 reportVectorizationFailure("Cannot vectorize early exit loops with stores "
1971 "to loop-invariant addresses",
1972 "LoopInvariantStoresInEELoop", ORE, TheLoop);
1973 return false;
1974 }
1975
1976 if (Result) {
1977 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1978 << (LAI->getRuntimePointerChecking()->Need
1979 ? " (with a runtime bound check)"
1980 : "")
1981 << "!\n");
1982 }
1983
1984 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1985 // we can vectorize, and at this point we don't have any other mem analysis
1986 // which may limit our maximum vectorization factor, so just return true with
1987 // no restrictions.
1988 return Result;
1989}
1990
1992 // The only loops we can vectorize without a scalar epilogue, are loops with
1993 // a bottom-test and a single exiting block. We'd have to handle the fact
1994 // that not every instruction executes on the last iteration. This will
1995 // require a lane mask which varies through the vector loop body. (TODO)
1996 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1997 LLVM_DEBUG(
1998 dbgs()
1999 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2000 return false;
2001 }
2002
2003 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2004
2005 // The list of pointers that we can safely read and write to remains empty.
2006 SmallPtrSet<Value *, 8> SafePointers;
2007
2008 // Check all blocks for predication, including those that ordinarily do not
2009 // need predication such as the header block.
2011 for (BasicBlock *BB : TheLoop->blocks()) {
2012 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2013 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2014 return false;
2015 }
2016 }
2017
2018 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2019
2020 return true;
2021}
2022
2024 // The list of pointers that we can safely read and write to remains empty.
2025 SmallPtrSet<Value *, 8> SafePointers;
2026
2027 // Mark all blocks for predication, including those that ordinarily do not
2028 // need predication such as the header block, and collect instructions needing
2029 // predication in TailFoldedMaskedOp.
2030 for (BasicBlock *BB : TheLoop->blocks()) {
2031 [[maybe_unused]] bool R =
2032 blockCanBePredicated(BB, SafePointers, TailFoldedMaskedOp);
2033 assert(R && "Must be able to predicate block when tail-folding.");
2034 }
2035}
2036
2037} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
#define LV_NAME
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_AlwaysScalable, "always", "Scalable vectorization is available and always favored when " "feasible")))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:129
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, ArrayRef< const SCEVPredicate * > NoWrapPreds={}, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
LLVM_ABI bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
LLVM_ABI void collectUnitStridePredicates() const
Add unit stride predicates for memory accesses to PSE, if runtime checks are allowed and an inner loo...
LLVM_ABI bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
LLVM_ABI bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
LLVM_ABI int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
LLVM_ABI bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
LLVM_ABI bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
LLVM_ABI const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
LLVM_ABI const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
LLVM_ABI bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
LLVM_ABI bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
LLVM_ABI bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
LLVM_ABI void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has uncountable early exits, i.e.
LLVM_ABI bool isUniformMemOp(Instruction &I, std::optional< ElementCount > VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
LLVM_ABI bool isUniform(Value *V, std::optional< ElementCount > VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
LLVM_ABI bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
LLVM_ABI bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_AlwaysScalable
Always vectorize loops using scalable vectors if feasible (i.e.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
LLVM_ABI bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
LLVM_ABI bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
LLVM_ABI void emitRemarkWithHints() const
Dumps all the hint information.
LLVM_ABI void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LLVM_ABI LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:174
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:523
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1433
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1431
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1439
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
iterator find(const KeyT &Key)
Definition MapVector.h:156
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
static constexpr auto FlagAnyWrap
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:87
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:445
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:892
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:304
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static LLVM_ABI ElementCount VectorizationFactor
VF as overridden by the user.