LLVM 20.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
18#include "llvm/Analysis/Loads.h"
30
31using namespace llvm;
32using namespace PatternMatch;
33
34#define LV_NAME "loop-vectorize"
35#define DEBUG_TYPE LV_NAME
36
37static cl::opt<bool>
38 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
39 cl::desc("Enable if-conversion during vectorization."));
40
41static cl::opt<bool>
42AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
43 cl::desc("Enable recognition of non-constant strided "
44 "pointer induction variables."));
45
46namespace llvm {
48 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
49 cl::desc("Allow enabling loop hints to reorder "
50 "FP operations during vectorization."));
51} // namespace llvm
52
53// TODO: Move size-based thresholds out of legality checking, make cost based
54// decisions instead of hard thresholds.
56 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
57 cl::desc("The maximum number of SCEV checks allowed."));
58
60 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
61 cl::desc("The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
63
66 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
68 cl::desc("Control whether the compiler can use scalable vectors to "
69 "vectorize a loop"),
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
81
83 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
84 cl::desc("Enables autovectorization of some loops containing histograms"));
85
86/// Maximum vectorization interleave count.
87static const unsigned MaxInterleaveFactor = 16;
88
89namespace llvm {
90
91bool LoopVectorizeHints::Hint::validate(unsigned Val) {
92 switch (Kind) {
93 case HK_WIDTH:
95 case HK_INTERLEAVE:
96 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
97 case HK_FORCE:
98 return (Val <= 1);
99 case HK_ISVECTORIZED:
100 case HK_PREDICATE:
101 case HK_SCALABLE:
102 return (Val == 0 || Val == 1);
103 }
104 return false;
105}
106
108 bool InterleaveOnlyWhenForced,
111 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force("vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
118 // Populate values with existing loop metadata.
119 getHintsFromMetadata();
120
121 // force-vector-interleave overrides DisableInterleaving.
124
125 // If the metadata doesn't explicitly specify whether to enable scalable
126 // vectorization, then decide based on the following criteria (increasing
127 // level of priority):
128 // - Target default
129 // - Metadata width
130 // - Force option (always overrides)
132 if (TTI)
135
136 if (Width.Value)
137 // If the width is set, but the metadata says nothing about the scalable
138 // property, then assume it concerns only a fixed-width UserVF.
139 // If width is not set, the flag takes precedence.
140 Scalable.Value = SK_FixedWidthOnly;
141 }
142
143 // If the flag is set to force any use of scalable vectors, override the loop
144 // hints.
145 if (ForceScalableVectorization.getValue() !=
147 Scalable.Value = ForceScalableVectorization.getValue();
148
149 // Scalable vectorization is disabled if no preference is specified.
151 Scalable.Value = SK_FixedWidthOnly;
152
153 if (IsVectorized.Value != 1)
154 // If the vectorization width and interleaving count are both 1 then
155 // consider the loop to have been already vectorized because there's
156 // nothing more that we can do.
157 IsVectorized.Value =
159 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
160 << "LV: Interleaving disabled by the pass manager\n");
161}
162
164 LLVMContext &Context = TheLoop->getHeader()->getContext();
165
166 MDNode *IsVectorizedMD = MDNode::get(
167 Context,
168 {MDString::get(Context, "llvm.loop.isvectorized"),
169 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
170 MDNode *LoopID = TheLoop->getLoopID();
171 MDNode *NewLoopID =
172 makePostTransformationMetadata(Context, LoopID,
173 {Twine(Prefix(), "vectorize.").str(),
174 Twine(Prefix(), "interleave.").str()},
175 {IsVectorizedMD});
176 TheLoop->setLoopID(NewLoopID);
177
178 // Update internal cache.
179 IsVectorized.Value = 1;
180}
181
183 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
187 return false;
188 }
189
190 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
191 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
193 return false;
194 }
195
196 if (getIsVectorized() == 1) {
197 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
198 // FIXME: Add interleave.disable metadata. This will allow
199 // vectorize.disable to be used without disabling the pass and errors
200 // to differentiate between disabled vectorization and a width of 1.
201 ORE.emit([&]() {
203 "AllDisabled", L->getStartLoc(),
204 L->getHeader())
205 << "loop not vectorized: vectorization and interleaving are "
206 "explicitly disabled, or the loop has already been "
207 "vectorized";
208 });
209 return false;
210 }
211
212 return true;
213}
214
216 using namespace ore;
217
218 ORE.emit([&]() {
219 if (Force.Value == LoopVectorizeHints::FK_Disabled)
220 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
221 TheLoop->getStartLoc(),
222 TheLoop->getHeader())
223 << "loop not vectorized: vectorization is explicitly disabled";
224
225 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
226 TheLoop->getHeader());
227 R << "loop not vectorized";
228 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
229 R << " (Force=" << NV("Force", true);
230 if (Width.Value != 0)
231 R << ", Vector Width=" << NV("VectorWidth", getWidth());
232 if (getInterleave() != 0)
233 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
234 R << ")";
235 }
236 return R;
237 });
238}
239
242 return LV_NAME;
244 return LV_NAME;
246 return LV_NAME;
248}
249
251 // Allow the vectorizer to change the order of operations if enabling
252 // loop hints are provided
253 ElementCount EC = getWidth();
254 return HintsAllowReordering &&
256 EC.getKnownMinValue() > 1);
257}
258
259void LoopVectorizeHints::getHintsFromMetadata() {
260 MDNode *LoopID = TheLoop->getLoopID();
261 if (!LoopID)
262 return;
263
264 // First operand should refer to the loop id itself.
265 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
266 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
267
268 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
269 const MDString *S = nullptr;
271
272 // The expected hint is either a MDString or a MDNode with the first
273 // operand a MDString.
274 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
275 if (!MD || MD->getNumOperands() == 0)
276 continue;
277 S = dyn_cast<MDString>(MD->getOperand(0));
278 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
279 Args.push_back(MD->getOperand(Idx));
280 } else {
281 S = dyn_cast<MDString>(MDO);
282 assert(Args.size() == 0 && "too many arguments for MDString");
283 }
284
285 if (!S)
286 continue;
287
288 // Check if the hint starts with the loop metadata prefix.
289 StringRef Name = S->getString();
290 if (Args.size() == 1)
291 setHint(Name, Args[0]);
292 }
293}
294
295void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
296 if (!Name.starts_with(Prefix()))
297 return;
298 Name = Name.substr(Prefix().size(), StringRef::npos);
299
300 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
301 if (!C)
302 return;
303 unsigned Val = C->getZExtValue();
304
305 Hint *Hints[] = {&Width, &Interleave, &Force,
306 &IsVectorized, &Predicate, &Scalable};
307 for (auto *H : Hints) {
308 if (Name == H->Name) {
309 if (H->validate(Val))
310 H->Value = Val;
311 else
312 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
313 break;
314 }
315 }
316}
317
318// Return true if the inner loop \p Lp is uniform with regard to the outer loop
319// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
320// executing the inner loop will execute the same iterations). This check is
321// very constrained for now but it will be relaxed in the future. \p Lp is
322// considered uniform if it meets all the following conditions:
323// 1) it has a canonical IV (starting from 0 and with stride 1),
324// 2) its latch terminator is a conditional branch and,
325// 3) its latch condition is a compare instruction whose operands are the
326// canonical IV and an OuterLp invariant.
327// This check doesn't take into account the uniformity of other conditions not
328// related to the loop latch because they don't affect the loop uniformity.
329//
330// NOTE: We decided to keep all these checks and its associated documentation
331// together so that we can easily have a picture of the current supported loop
332// nests. However, some of the current checks don't depend on \p OuterLp and
333// would be redundantly executed for each \p Lp if we invoked this function for
334// different candidate outer loops. This is not the case for now because we
335// don't currently have the infrastructure to evaluate multiple candidate outer
336// loops and \p OuterLp will be a fixed parameter while we only support explicit
337// outer loop vectorization. It's also very likely that these checks go away
338// before introducing the aforementioned infrastructure. However, if this is not
339// the case, we should move the \p OuterLp independent checks to a separate
340// function that is only executed once for each \p Lp.
341static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
342 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
343
344 // If Lp is the outer loop, it's uniform by definition.
345 if (Lp == OuterLp)
346 return true;
347 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
348
349 // 1.
351 if (!IV) {
352 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
353 return false;
354 }
355
356 // 2.
357 BasicBlock *Latch = Lp->getLoopLatch();
358 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
359 if (!LatchBr || LatchBr->isUnconditional()) {
360 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
361 return false;
362 }
363
364 // 3.
365 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
366 if (!LatchCmp) {
368 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
369 return false;
370 }
371
372 Value *CondOp0 = LatchCmp->getOperand(0);
373 Value *CondOp1 = LatchCmp->getOperand(1);
374 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
375 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
376 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
377 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
378 return false;
379 }
380
381 return true;
382}
383
384// Return true if \p Lp and all its nested loops are uniform with regard to \p
385// OuterLp.
386static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
387 if (!isUniformLoop(Lp, OuterLp))
388 return false;
389
390 // Check if nested loops are uniform.
391 for (Loop *SubLp : *Lp)
392 if (!isUniformLoopNest(SubLp, OuterLp))
393 return false;
394
395 return true;
396}
397
399 if (Ty->isPointerTy())
400 return DL.getIntPtrType(Ty);
401
402 // It is possible that char's or short's overflow when we ask for the loop's
403 // trip count, work around this by changing the type size.
404 if (Ty->getScalarSizeInBits() < 32)
405 return Type::getInt32Ty(Ty->getContext());
406
407 return Ty;
408}
409
410static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
413 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
414 return Ty0;
415 return Ty1;
416}
417
418/// Check that the instruction has outside loop users and is not an
419/// identified reduction variable.
420static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
421 SmallPtrSetImpl<Value *> &AllowedExit) {
422 // Reductions, Inductions and non-header phis are allowed to have exit users. All
423 // other instructions must not have external users.
424 if (!AllowedExit.count(Inst))
425 // Check that all of the users of the loop are inside the BB.
426 for (User *U : Inst->users()) {
427 Instruction *UI = cast<Instruction>(U);
428 // This user may be a reduction exit value.
429 if (!TheLoop->contains(UI)) {
430 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
431 return true;
432 }
433 }
434 return false;
435}
436
437/// Returns true if A and B have same pointer operands or same SCEVs addresses
439 StoreInst *B) {
440 // Compare store
441 if (A == B)
442 return true;
443
444 // Otherwise Compare pointers
445 Value *APtr = A->getPointerOperand();
446 Value *BPtr = B->getPointerOperand();
447 if (APtr == BPtr)
448 return true;
449
450 // Otherwise compare address SCEVs
451 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
452}
453
455 Value *Ptr) const {
456 // FIXME: Currently, the set of symbolic strides is sometimes queried before
457 // it's collected. This happens from canVectorizeWithIfConvert, when the
458 // pointer is checked to reference consecutive elements suitable for a
459 // masked access.
460 const auto &Strides =
462
463 bool CanAddPredicate = !llvm::shouldOptimizeForSize(
464 TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
465 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
466 CanAddPredicate, false).value_or(0);
467 if (Stride == 1 || Stride == -1)
468 return Stride;
469 return 0;
470}
471
473 return LAI->isInvariant(V);
474}
475
476namespace {
477/// A rewriter to build the SCEVs for each of the VF lanes in the expected
478/// vectorized loop, which can then be compared to detect their uniformity. This
479/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
480/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
481/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
482/// uniformity.
483class SCEVAddRecForUniformityRewriter
484 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
485 /// Multiplier to be applied to the step of AddRecs in TheLoop.
486 unsigned StepMultiplier;
487
488 /// Offset to be added to the AddRecs in TheLoop.
489 unsigned Offset;
490
491 /// Loop for which to rewrite AddRecsFor.
492 Loop *TheLoop;
493
494 /// Is any sub-expressions not analyzable w.r.t. uniformity?
495 bool CannotAnalyze = false;
496
497 bool canAnalyze() const { return !CannotAnalyze; }
498
499public:
500 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
501 unsigned Offset, Loop *TheLoop)
502 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
503 TheLoop(TheLoop) {}
504
505 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
506 assert(Expr->getLoop() == TheLoop &&
507 "addrec outside of TheLoop must be invariant and should have been "
508 "handled earlier");
509 // Build a new AddRec by multiplying the step by StepMultiplier and
510 // incrementing the start by Offset * step.
511 Type *Ty = Expr->getType();
512 const SCEV *Step = Expr->getStepRecurrence(SE);
513 if (!SE.isLoopInvariant(Step, TheLoop)) {
514 CannotAnalyze = true;
515 return Expr;
516 }
517 const SCEV *NewStep =
518 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
519 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
520 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
521 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
522 }
523
524 const SCEV *visit(const SCEV *S) {
525 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
526 return S;
528 }
529
530 const SCEV *visitUnknown(const SCEVUnknown *S) {
531 if (SE.isLoopInvariant(S, TheLoop))
532 return S;
533 // The value could vary across iterations.
534 CannotAnalyze = true;
535 return S;
536 }
537
538 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
539 // Could not analyze the expression.
540 CannotAnalyze = true;
541 return S;
542 }
543
544 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
545 unsigned StepMultiplier, unsigned Offset,
546 Loop *TheLoop) {
547 /// Bail out if the expression does not contain an UDiv expression.
548 /// Uniform values which are not loop invariant require operations to strip
549 /// out the lowest bits. For now just look for UDivs and use it to avoid
550 /// re-writing UDIV-free expressions for other lanes to limit compile time.
551 if (!SCEVExprContains(S,
552 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
553 return SE.getCouldNotCompute();
554
555 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
556 TheLoop);
557 const SCEV *Result = Rewriter.visit(S);
558
559 if (Rewriter.canAnalyze())
560 return Result;
561 return SE.getCouldNotCompute();
562 }
563};
564
565} // namespace
566
568 if (isInvariant(V))
569 return true;
570 if (VF.isScalable())
571 return false;
572 if (VF.isScalar())
573 return true;
574
575 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
576 // never considered uniform.
577 auto *SE = PSE.getSE();
578 if (!SE->isSCEVable(V->getType()))
579 return false;
580 const SCEV *S = SE->getSCEV(V);
581
582 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
583 // lane 0 matches the expressions for all other lanes.
584 unsigned FixedVF = VF.getKnownMinValue();
585 const SCEV *FirstLaneExpr =
586 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
587 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
588 return false;
589
590 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
591 // lane 0. We check lanes in reverse order for compile-time, as frequently
592 // checking the last lane is sufficient to rule out uniformity.
593 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
594 const SCEV *IthLaneExpr =
595 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
596 return FirstLaneExpr == IthLaneExpr;
597 });
598}
599
601 ElementCount VF) const {
603 if (!Ptr)
604 return false;
605 // Note: There's nothing inherent which prevents predicated loads and
606 // stores from being uniform. The current lowering simply doesn't handle
607 // it; in particular, the cost model distinguishes scatter/gather from
608 // scalar w/predication, and we currently rely on the scalar path.
609 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
610}
611
612bool LoopVectorizationLegality::canVectorizeOuterLoop() {
613 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
614 // Store the result and return it at the end instead of exiting early, in case
615 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
616 bool Result = true;
617 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
618
619 for (BasicBlock *BB : TheLoop->blocks()) {
620 // Check whether the BB terminator is a BranchInst. Any other terminator is
621 // not supported yet.
622 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
623 if (!Br) {
624 reportVectorizationFailure("Unsupported basic block terminator",
625 "loop control flow is not understood by vectorizer",
626 "CFGNotUnderstood", ORE, TheLoop);
627 if (DoExtraAnalysis)
628 Result = false;
629 else
630 return false;
631 }
632
633 // Check whether the BranchInst is a supported one. Only unconditional
634 // branches, conditional branches with an outer loop invariant condition or
635 // backedges are supported.
636 // FIXME: We skip these checks when VPlan predication is enabled as we
637 // want to allow divergent branches. This whole check will be removed
638 // once VPlan predication is on by default.
639 if (Br && Br->isConditional() &&
640 !TheLoop->isLoopInvariant(Br->getCondition()) &&
641 !LI->isLoopHeader(Br->getSuccessor(0)) &&
642 !LI->isLoopHeader(Br->getSuccessor(1))) {
643 reportVectorizationFailure("Unsupported conditional branch",
644 "loop control flow is not understood by vectorizer",
645 "CFGNotUnderstood", ORE, TheLoop);
646 if (DoExtraAnalysis)
647 Result = false;
648 else
649 return false;
650 }
651 }
652
653 // Check whether inner loops are uniform. At this point, we only support
654 // simple outer loops scenarios with uniform nested loops.
655 if (!isUniformLoopNest(TheLoop /*loop nest*/,
656 TheLoop /*context outer loop*/)) {
657 reportVectorizationFailure("Outer loop contains divergent loops",
658 "loop control flow is not understood by vectorizer",
659 "CFGNotUnderstood", ORE, TheLoop);
660 if (DoExtraAnalysis)
661 Result = false;
662 else
663 return false;
664 }
665
666 // Check whether we are able to set up outer loop induction.
667 if (!setupOuterLoopInductions()) {
668 reportVectorizationFailure("Unsupported outer loop Phi(s)",
669 "UnsupportedPhi", ORE, TheLoop);
670 if (DoExtraAnalysis)
671 Result = false;
672 else
673 return false;
674 }
675
676 return Result;
677}
678
679void LoopVectorizationLegality::addInductionPhi(
680 PHINode *Phi, const InductionDescriptor &ID,
681 SmallPtrSetImpl<Value *> &AllowedExit) {
682 Inductions[Phi] = ID;
683
684 // In case this induction also comes with casts that we know we can ignore
685 // in the vectorized loop body, record them here. All casts could be recorded
686 // here for ignoring, but suffices to record only the first (as it is the
687 // only one that may bw used outside the cast sequence).
688 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
689 if (!Casts.empty())
690 InductionCastsToIgnore.insert(*Casts.begin());
691
692 Type *PhiTy = Phi->getType();
693 const DataLayout &DL = Phi->getDataLayout();
694
695 // Get the widest type.
696 if (!PhiTy->isFloatingPointTy()) {
697 if (!WidestIndTy)
698 WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
699 else
700 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
701 }
702
703 // Int inductions are special because we only allow one IV.
704 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
705 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
706 isa<Constant>(ID.getStartValue()) &&
707 cast<Constant>(ID.getStartValue())->isNullValue()) {
708
709 // Use the phi node with the widest type as induction. Use the last
710 // one if there are multiple (no good reason for doing this other
711 // than it is expedient). We've checked that it begins at zero and
712 // steps by one, so this is a canonical induction variable.
713 if (!PrimaryInduction || PhiTy == WidestIndTy)
714 PrimaryInduction = Phi;
715 }
716
717 // Both the PHI node itself, and the "post-increment" value feeding
718 // back into the PHI node may have external users.
719 // We can allow those uses, except if the SCEVs we have for them rely
720 // on predicates that only hold within the loop, since allowing the exit
721 // currently means re-using this SCEV outside the loop (see PR33706 for more
722 // details).
723 if (PSE.getPredicate().isAlwaysTrue()) {
724 AllowedExit.insert(Phi);
725 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
726 }
727
728 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
729}
730
731bool LoopVectorizationLegality::setupOuterLoopInductions() {
732 BasicBlock *Header = TheLoop->getHeader();
733
734 // Returns true if a given Phi is a supported induction.
735 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
737 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
739 addInductionPhi(&Phi, ID, AllowedExit);
740 return true;
741 }
742 // Bail out for any Phi in the outer loop header that is not a supported
743 // induction.
745 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
746 return false;
747 };
748
749 return llvm::all_of(Header->phis(), IsSupportedPhi);
750}
751
752/// Checks if a function is scalarizable according to the TLI, in
753/// the sense that it should be vectorized and then expanded in
754/// multiple scalar calls. This is represented in the
755/// TLI via mappings that do not specify a vector name, as in the
756/// following example:
757///
758/// const VecDesc VecIntrinsics[] = {
759/// {"llvm.phx.abs.i32", "", 4}
760/// };
761static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
762 const StringRef ScalarName = CI.getCalledFunction()->getName();
763 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
764 // Check that all known VFs are not associated to a vector
765 // function, i.e. the vector name is emty.
766 if (Scalarize) {
767 ElementCount WidestFixedVF, WidestScalableVF;
768 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
770 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
771 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
773 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
774 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
775 assert((WidestScalableVF.isZero() || !Scalarize) &&
776 "Caller may decide to scalarize a variant using a scalable VF");
777 }
778 return Scalarize;
779}
780
781bool LoopVectorizationLegality::canVectorizeInstrs() {
782 BasicBlock *Header = TheLoop->getHeader();
783
784 // For each block in the loop.
785 for (BasicBlock *BB : TheLoop->blocks()) {
786 // Scan the instructions in the block and look for hazards.
787 for (Instruction &I : *BB) {
788 if (auto *Phi = dyn_cast<PHINode>(&I)) {
789 Type *PhiTy = Phi->getType();
790 // Check that this PHI type is allowed.
791 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
792 !PhiTy->isPointerTy()) {
793 reportVectorizationFailure("Found a non-int non-pointer PHI",
794 "loop control flow is not understood by vectorizer",
795 "CFGNotUnderstood", ORE, TheLoop);
796 return false;
797 }
798
799 // If this PHINode is not in the header block, then we know that we
800 // can convert it to select during if-conversion. No need to check if
801 // the PHIs in this block are induction or reduction variables.
802 if (BB != Header) {
803 // Non-header phi nodes that have outside uses can be vectorized. Add
804 // them to the list of allowed exits.
805 // Unsafe cyclic dependencies with header phis are identified during
806 // legalization for reduction, induction and fixed order
807 // recurrences.
808 AllowedExit.insert(&I);
809 continue;
810 }
811
812 // We only allow if-converted PHIs with exactly two incoming values.
813 if (Phi->getNumIncomingValues() != 2) {
814 reportVectorizationFailure("Found an invalid PHI",
815 "loop control flow is not understood by vectorizer",
816 "CFGNotUnderstood", ORE, TheLoop, Phi);
817 return false;
818 }
819
821 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
822 DT, PSE.getSE())) {
823 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
824 AllowedExit.insert(RedDes.getLoopExitInstr());
825 Reductions[Phi] = RedDes;
826 continue;
827 }
828
829 // We prevent matching non-constant strided pointer IVS to preserve
830 // historical vectorizer behavior after a generalization of the
831 // IVDescriptor code. The intent is to remove this check, but we
832 // have to fix issues around code quality for such loops first.
833 auto IsDisallowedStridedPointerInduction =
834 [](const InductionDescriptor &ID) {
836 return false;
837 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
838 ID.getConstIntStepValue() == nullptr;
839 };
840
841 // TODO: Instead of recording the AllowedExit, it would be good to
842 // record the complementary set: NotAllowedExit. These include (but may
843 // not be limited to):
844 // 1. Reduction phis as they represent the one-before-last value, which
845 // is not available when vectorized
846 // 2. Induction phis and increment when SCEV predicates cannot be used
847 // outside the loop - see addInductionPhi
848 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
849 // outside the loop - see call to hasOutsideLoopUser in the non-phi
850 // handling below
851 // 4. FixedOrderRecurrence phis that can possibly be handled by
852 // extraction.
853 // By recording these, we can then reason about ways to vectorize each
854 // of these NotAllowedExit.
856 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
857 !IsDisallowedStridedPointerInduction(ID)) {
858 addInductionPhi(Phi, ID, AllowedExit);
859 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
860 continue;
861 }
862
863 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
864 AllowedExit.insert(Phi);
865 FixedOrderRecurrences.insert(Phi);
866 continue;
867 }
868
869 // As a last resort, coerce the PHI to a AddRec expression
870 // and re-try classifying it a an induction PHI.
871 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
872 !IsDisallowedStridedPointerInduction(ID)) {
873 addInductionPhi(Phi, ID, AllowedExit);
874 continue;
875 }
876
877 reportVectorizationFailure("Found an unidentified PHI",
878 "value that could not be identified as "
879 "reduction is used outside the loop",
880 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
881 return false;
882 } // end of PHI handling
883
884 // We handle calls that:
885 // * Are debug info intrinsics.
886 // * Have a mapping to an IR intrinsic.
887 // * Have a vector version available.
888 auto *CI = dyn_cast<CallInst>(&I);
889
890 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
891 !isa<DbgInfoIntrinsic>(CI) &&
892 !(CI->getCalledFunction() && TLI &&
893 (!VFDatabase::getMappings(*CI).empty() ||
894 isTLIScalarize(*TLI, *CI)))) {
895 // If the call is a recognized math libary call, it is likely that
896 // we can vectorize it given loosened floating-point constraints.
898 bool IsMathLibCall =
899 TLI && CI->getCalledFunction() &&
900 CI->getType()->isFloatingPointTy() &&
901 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
902 TLI->hasOptimizedCodeGen(Func);
903
904 if (IsMathLibCall) {
905 // TODO: Ideally, we should not use clang-specific language here,
906 // but it's hard to provide meaningful yet generic advice.
907 // Also, should this be guarded by allowExtraAnalysis() and/or be part
908 // of the returned info from isFunctionVectorizable()?
910 "Found a non-intrinsic callsite",
911 "library call cannot be vectorized. "
912 "Try compiling with -fno-math-errno, -ffast-math, "
913 "or similar flags",
914 "CantVectorizeLibcall", ORE, TheLoop, CI);
915 } else {
916 reportVectorizationFailure("Found a non-intrinsic callsite",
917 "call instruction cannot be vectorized",
918 "CantVectorizeLibcall", ORE, TheLoop, CI);
919 }
920 return false;
921 }
922
923 // Some intrinsics have scalar arguments and should be same in order for
924 // them to be vectorized (i.e. loop invariant).
925 if (CI) {
926 auto *SE = PSE.getSE();
927 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
928 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
930 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)),
931 TheLoop)) {
932 reportVectorizationFailure("Found unvectorizable intrinsic",
933 "intrinsic instruction cannot be vectorized",
934 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
935 return false;
936 }
937 }
938 }
939
940 // If we found a vectorized variant of a function, note that so LV can
941 // make better decisions about maximum VF.
942 if (CI && !VFDatabase::getMappings(*CI).empty())
943 VecCallVariantsFound = true;
944
945 // Check that the instruction return type is vectorizable.
946 // We can't vectorize casts from vector type to scalar type.
947 // Also, we can't vectorize extractelement instructions.
948 if ((!VectorType::isValidElementType(I.getType()) &&
949 !I.getType()->isVoidTy()) ||
950 (isa<CastInst>(I) &&
951 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
952 isa<ExtractElementInst>(I)) {
953 reportVectorizationFailure("Found unvectorizable type",
954 "instruction return type cannot be vectorized",
955 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
956 return false;
957 }
958
959 // Check that the stored type is vectorizable.
960 if (auto *ST = dyn_cast<StoreInst>(&I)) {
961 Type *T = ST->getValueOperand()->getType();
963 reportVectorizationFailure("Store instruction cannot be vectorized",
964 "CantVectorizeStore", ORE, TheLoop, ST);
965 return false;
966 }
967
968 // For nontemporal stores, check that a nontemporal vector version is
969 // supported on the target.
970 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
971 // Arbitrarily try a vector of 2 elements.
972 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
973 assert(VecTy && "did not find vectorized version of stored type");
974 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
976 "nontemporal store instruction cannot be vectorized",
977 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
978 return false;
979 }
980 }
981
982 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
983 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
984 // For nontemporal loads, check that a nontemporal vector version is
985 // supported on the target (arbitrarily try a vector of 2 elements).
986 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
987 assert(VecTy && "did not find vectorized version of load type");
988 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
990 "nontemporal load instruction cannot be vectorized",
991 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
992 return false;
993 }
994 }
995
996 // FP instructions can allow unsafe algebra, thus vectorizable by
997 // non-IEEE-754 compliant SIMD units.
998 // This applies to floating-point math operations and calls, not memory
999 // operations, shuffles, or casts, as they don't change precision or
1000 // semantics.
1001 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1002 !I.isFast()) {
1003 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1004 Hints->setPotentiallyUnsafe();
1005 }
1006
1007 // Reduction instructions are allowed to have exit users.
1008 // All other instructions must not have external users.
1009 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1010 // We can safely vectorize loops where instructions within the loop are
1011 // used outside the loop only if the SCEV predicates within the loop is
1012 // same as outside the loop. Allowing the exit means reusing the SCEV
1013 // outside the loop.
1014 if (PSE.getPredicate().isAlwaysTrue()) {
1015 AllowedExit.insert(&I);
1016 continue;
1017 }
1018 reportVectorizationFailure("Value cannot be used outside the loop",
1019 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1020 return false;
1021 }
1022 } // next instr.
1023 }
1024
1025 if (!PrimaryInduction) {
1026 if (Inductions.empty()) {
1027 reportVectorizationFailure("Did not find one integer induction var",
1028 "loop induction variable could not be identified",
1029 "NoInductionVariable", ORE, TheLoop);
1030 return false;
1031 }
1032 if (!WidestIndTy) {
1033 reportVectorizationFailure("Did not find one integer induction var",
1034 "integer loop induction variable could not be identified",
1035 "NoIntegerInductionVariable", ORE, TheLoop);
1036 return false;
1037 }
1038 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
1039 }
1040
1041 // Now we know the widest induction type, check if our found induction
1042 // is the same size. If it's not, unset it here and InnerLoopVectorizer
1043 // will create another.
1044 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1045 PrimaryInduction = nullptr;
1046
1047 return true;
1048}
1049
1050/// Find histogram operations that match high-level code in loops:
1051/// \code
1052/// buckets[indices[i]]+=step;
1053/// \endcode
1054///
1055/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1056/// array the computed histogram. It uses a BinOp to sum all counts, storing
1057/// them using a loop-variant index Load from the 'indices' input array.
1058///
1059/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1060/// regardless of hardware support. When there is support, it additionally
1061/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1062/// used to update histogram in \p HistogramPtrs.
1063static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1064 const PredicatedScalarEvolution &PSE,
1065 SmallVectorImpl<HistogramInfo> &Histograms) {
1066
1067 // Store value must come from a Binary Operation.
1068 Instruction *HPtrInstr = nullptr;
1069 BinaryOperator *HBinOp = nullptr;
1070 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1071 return false;
1072
1073 // BinOp must be an Add or a Sub modifying the bucket value by a
1074 // loop invariant amount.
1075 // FIXME: We assume the loop invariant term is on the RHS.
1076 // Fine for an immediate/constant, but maybe not a generic value?
1077 Value *HIncVal = nullptr;
1078 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1079 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1080 return false;
1081
1082 // Make sure the increment value is loop invariant.
1083 if (!TheLoop->isLoopInvariant(HIncVal))
1084 return false;
1085
1086 // The address to store is calculated through a GEP Instruction.
1087 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(HPtrInstr);
1088 if (!GEP)
1089 return false;
1090
1091 // Restrict address calculation to constant indices except for the last term.
1092 Value *HIdx = nullptr;
1093 for (Value *Index : GEP->indices()) {
1094 if (HIdx)
1095 return false;
1096 if (!isa<ConstantInt>(Index))
1097 HIdx = Index;
1098 }
1099
1100 if (!HIdx)
1101 return false;
1102
1103 // Check that the index is calculated by loading from another array. Ignore
1104 // any extensions.
1105 // FIXME: Support indices from other sources than a linear load from memory?
1106 // We're currently trying to match an operation looping over an array
1107 // of indices, but there could be additional levels of indirection
1108 // in place, or possibly some additional calculation to form the index
1109 // from the loaded data.
1110 Value *VPtrVal;
1111 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1112 return false;
1113
1114 // Make sure the index address varies in this loop, not an outer loop.
1115 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1116 if (!AR || AR->getLoop() != TheLoop)
1117 return false;
1118
1119 // Ensure we'll have the same mask by checking that all parts of the histogram
1120 // (gather load, update, scatter store) are in the same block.
1121 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1122 BasicBlock *LdBB = IndexedLoad->getParent();
1123 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1124 return false;
1125
1126 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1127
1128 // Store the operations that make up the histogram.
1129 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1130 return true;
1131}
1132
1133bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1134 // For now, we only support an IndirectUnsafe dependency that calculates
1135 // a histogram
1137 return false;
1138
1139 // Find a single IndirectUnsafe dependency.
1140 const MemoryDepChecker::Dependence *IUDep = nullptr;
1141 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1142 const auto *Deps = DepChecker.getDependences();
1143 // If there were too many dependences, LAA abandons recording them. We can't
1144 // proceed safely if we don't know what the dependences are.
1145 if (!Deps)
1146 return false;
1147
1148 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1149 // Ignore dependencies that are either known to be safe or can be
1150 // checked at runtime.
1153 continue;
1154
1155 // We're only interested in IndirectUnsafe dependencies here, where the
1156 // address might come from a load from memory. We also only want to handle
1157 // one such dependency, at least for now.
1158 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1159 return false;
1160
1161 IUDep = &Dep;
1162 }
1163 if (!IUDep)
1164 return false;
1165
1166 // For now only normal loads and stores are supported.
1167 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1168 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1169
1170 if (!LI || !SI)
1171 return false;
1172
1173 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1174 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1175}
1176
1177bool LoopVectorizationLegality::canVectorizeMemory() {
1178 LAI = &LAIs.getInfo(*TheLoop);
1179 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1180 if (LAR) {
1181 ORE->emit([&]() {
1182 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1183 "loop not vectorized: ", *LAR);
1184 });
1185 }
1186
1187 if (!LAI->canVectorizeMemory())
1188 return canVectorizeIndirectUnsafeDependences();
1189
1191 reportVectorizationFailure("We don't allow storing to uniform addresses",
1192 "write to a loop invariant address could not "
1193 "be vectorized",
1194 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1195 TheLoop);
1196 return false;
1197 }
1198
1199 // We can vectorize stores to invariant address when final reduction value is
1200 // guaranteed to be stored at the end of the loop. Also, if decision to
1201 // vectorize loop is made, runtime checks are added so as to make sure that
1202 // invariant address won't alias with any other objects.
1203 if (!LAI->getStoresToInvariantAddresses().empty()) {
1204 // For each invariant address, check if last stored value is unconditional
1205 // and the address is not calculated inside the loop.
1206 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1208 continue;
1209
1210 if (blockNeedsPredication(SI->getParent())) {
1212 "We don't allow storing to uniform addresses",
1213 "write of conditional recurring variant value to a loop "
1214 "invariant address could not be vectorized",
1215 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1216 return false;
1217 }
1218
1219 // Invariant address should be defined outside of loop. LICM pass usually
1220 // makes sure it happens, but in rare cases it does not, we do not want
1221 // to overcomplicate vectorization to support this case.
1222 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1223 if (TheLoop->contains(Ptr)) {
1225 "Invariant address is calculated inside the loop",
1226 "write to a loop invariant address could not "
1227 "be vectorized",
1228 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1229 return false;
1230 }
1231 }
1232 }
1233
1235 // For each invariant address, check its last stored value is the result
1236 // of one of our reductions.
1237 //
1238 // We do not check if dependence with loads exists because that is already
1239 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1240 ScalarEvolution *SE = PSE.getSE();
1241 SmallVector<StoreInst *, 4> UnhandledStores;
1242 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1244 // Earlier stores to this address are effectively deadcode.
1245 // With opaque pointers it is possible for one pointer to be used with
1246 // different sizes of stored values:
1247 // store i32 0, ptr %x
1248 // store i8 0, ptr %x
1249 // The latest store doesn't complitely overwrite the first one in the
1250 // example. That is why we have to make sure that types of stored
1251 // values are same.
1252 // TODO: Check that bitwidth of unhandled store is smaller then the
1253 // one that overwrites it and add a test.
1254 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1255 return storeToSameAddress(SE, SI, I) &&
1256 I->getValueOperand()->getType() ==
1257 SI->getValueOperand()->getType();
1258 });
1259 continue;
1260 }
1261 UnhandledStores.push_back(SI);
1262 }
1263
1264 bool IsOK = UnhandledStores.empty();
1265 // TODO: we should also validate against InvariantMemSets.
1266 if (!IsOK) {
1268 "We don't allow storing to uniform addresses",
1269 "write to a loop invariant address could not "
1270 "be vectorized",
1271 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1272 return false;
1273 }
1274 }
1275 }
1276
1277 PSE.addPredicate(LAI->getPSE().getPredicate());
1278 return true;
1279}
1280
1282 bool EnableStrictReductions) {
1283
1284 // First check if there is any ExactFP math or if we allow reassociations
1285 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1286 return true;
1287
1288 // If the above is false, we have ExactFPMath & do not allow reordering.
1289 // If the EnableStrictReductions flag is set, first check if we have any
1290 // Exact FP induction vars, which we cannot vectorize.
1291 if (!EnableStrictReductions ||
1292 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1293 InductionDescriptor IndDesc = Induction.second;
1294 return IndDesc.getExactFPMathInst();
1295 }))
1296 return false;
1297
1298 // We can now only vectorize if all reductions with Exact FP math also
1299 // have the isOrdered flag set, which indicates that we can move the
1300 // reduction operations in-loop.
1301 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1302 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1303 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1304 }));
1305}
1306
1308 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1309 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1310 return RdxDesc.IntermediateStore == SI;
1311 });
1312}
1313
1315 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1316 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1317 if (!RdxDesc.IntermediateStore)
1318 return false;
1319
1320 ScalarEvolution *SE = PSE.getSE();
1321 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1322 return V == InvariantAddress ||
1323 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1324 });
1325}
1326
1328 Value *In0 = const_cast<Value *>(V);
1329 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1330 if (!PN)
1331 return false;
1332
1333 return Inductions.count(PN);
1334}
1335
1336const InductionDescriptor *
1338 if (!isInductionPhi(Phi))
1339 return nullptr;
1340 auto &ID = getInductionVars().find(Phi)->second;
1341 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1343 return &ID;
1344 return nullptr;
1345}
1346
1347const InductionDescriptor *
1349 if (!isInductionPhi(Phi))
1350 return nullptr;
1351 auto &ID = getInductionVars().find(Phi)->second;
1353 return &ID;
1354 return nullptr;
1355}
1356
1358 const Value *V) const {
1359 auto *Inst = dyn_cast<Instruction>(V);
1360 return (Inst && InductionCastsToIgnore.count(Inst));
1361}
1362
1365}
1366
1368 const PHINode *Phi) const {
1369 return FixedOrderRecurrences.count(Phi);
1370}
1371
1373 // When vectorizing early exits, create predicates for the latch block only.
1374 // The early exiting block must be a direct predecessor of the latch at the
1375 // moment.
1376 BasicBlock *Latch = TheLoop->getLoopLatch();
1378 assert(
1380 "Uncountable exiting block must be a direct predecessor of latch");
1381 return BB == Latch;
1382 }
1383 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1384}
1385
1386bool LoopVectorizationLegality::blockCanBePredicated(
1387 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1388 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1389 for (Instruction &I : *BB) {
1390 // We can predicate blocks with calls to assume, as long as we drop them in
1391 // case we flatten the CFG via predication.
1392 if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1393 MaskedOp.insert(&I);
1394 continue;
1395 }
1396
1397 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1398 // TODO: there might be cases that it should block the vectorization. Let's
1399 // ignore those for now.
1400 if (isa<NoAliasScopeDeclInst>(&I))
1401 continue;
1402
1403 // We can allow masked calls if there's at least one vector variant, even
1404 // if we end up scalarizing due to the cost model calculations.
1405 // TODO: Allow other calls if they have appropriate attributes... readonly
1406 // and argmemonly?
1407 if (CallInst *CI = dyn_cast<CallInst>(&I))
1409 MaskedOp.insert(CI);
1410 continue;
1411 }
1412
1413 // Loads are handled via masking (or speculated if safe to do so.)
1414 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1415 if (!SafePtrs.count(LI->getPointerOperand()))
1416 MaskedOp.insert(LI);
1417 continue;
1418 }
1419
1420 // Predicated store requires some form of masking:
1421 // 1) masked store HW instruction,
1422 // 2) emulation via load-blend-store (only if safe and legal to do so,
1423 // be aware on the race conditions), or
1424 // 3) element-by-element predicate check and scalar store.
1425 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1426 MaskedOp.insert(SI);
1427 continue;
1428 }
1429
1430 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1431 return false;
1432 }
1433
1434 return true;
1435}
1436
1437bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1438 if (!EnableIfConversion) {
1439 reportVectorizationFailure("If-conversion is disabled",
1440 "IfConversionDisabled", ORE, TheLoop);
1441 return false;
1442 }
1443
1444 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1445
1446 // A list of pointers which are known to be dereferenceable within scope of
1447 // the loop body for each iteration of the loop which executes. That is,
1448 // the memory pointed to can be dereferenced (with the access size implied by
1449 // the value's type) unconditionally within the loop header without
1450 // introducing a new fault.
1451 SmallPtrSet<Value *, 8> SafePointers;
1452
1453 // Collect safe addresses.
1454 for (BasicBlock *BB : TheLoop->blocks()) {
1455 if (!blockNeedsPredication(BB)) {
1456 for (Instruction &I : *BB)
1457 if (auto *Ptr = getLoadStorePointerOperand(&I))
1458 SafePointers.insert(Ptr);
1459 continue;
1460 }
1461
1462 // For a block which requires predication, a address may be safe to access
1463 // in the loop w/o predication if we can prove dereferenceability facts
1464 // sufficient to ensure it'll never fault within the loop. For the moment,
1465 // we restrict this to loads; stores are more complicated due to
1466 // concurrency restrictions.
1467 ScalarEvolution &SE = *PSE.getSE();
1469 for (Instruction &I : *BB) {
1470 LoadInst *LI = dyn_cast<LoadInst>(&I);
1471 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1472 // that it will consider loops that need guarding by SCEV checks. The
1473 // vectoriser will generate these checks if we decide to vectorise.
1474 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1475 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1476 &Predicates))
1477 SafePointers.insert(LI->getPointerOperand());
1478 Predicates.clear();
1479 }
1480 }
1481
1482 // Collect the blocks that need predication.
1483 for (BasicBlock *BB : TheLoop->blocks()) {
1484 // We support only branches and switch statements as terminators inside the
1485 // loop.
1486 if (isa<SwitchInst>(BB->getTerminator())) {
1487 if (TheLoop->isLoopExiting(BB)) {
1488 reportVectorizationFailure("Loop contains an unsupported switch",
1489 "LoopContainsUnsupportedSwitch", ORE,
1490 TheLoop, BB->getTerminator());
1491 return false;
1492 }
1493 } else if (!isa<BranchInst>(BB->getTerminator())) {
1494 reportVectorizationFailure("Loop contains an unsupported terminator",
1495 "LoopContainsUnsupportedTerminator", ORE,
1496 TheLoop, BB->getTerminator());
1497 return false;
1498 }
1499
1500 // We must be able to predicate all blocks that need to be predicated.
1501 if (blockNeedsPredication(BB) &&
1502 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1504 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1505 ORE, TheLoop, BB->getTerminator());
1506 return false;
1507 }
1508 }
1509
1510 // We can if-convert this loop.
1511 return true;
1512}
1513
1514// Helper function to canVectorizeLoopNestCFG.
1515bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1516 bool UseVPlanNativePath) {
1517 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1518 "VPlan-native path is not enabled.");
1519
1520 // TODO: ORE should be improved to show more accurate information when an
1521 // outer loop can't be vectorized because a nested loop is not understood or
1522 // legal. Something like: "outer_loop_location: loop not vectorized:
1523 // (inner_loop_location) loop control flow is not understood by vectorizer".
1524
1525 // Store the result and return it at the end instead of exiting early, in case
1526 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1527 bool Result = true;
1528 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1529
1530 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1531 // be canonicalized.
1532 if (!Lp->getLoopPreheader()) {
1533 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1534 "loop control flow is not understood by vectorizer",
1535 "CFGNotUnderstood", ORE, TheLoop);
1536 if (DoExtraAnalysis)
1537 Result = false;
1538 else
1539 return false;
1540 }
1541
1542 // We must have a single backedge.
1543 if (Lp->getNumBackEdges() != 1) {
1544 reportVectorizationFailure("The loop must have a single backedge",
1545 "loop control flow is not understood by vectorizer",
1546 "CFGNotUnderstood", ORE, TheLoop);
1547 if (DoExtraAnalysis)
1548 Result = false;
1549 else
1550 return false;
1551 }
1552
1553 return Result;
1554}
1555
1556bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1557 Loop *Lp, bool UseVPlanNativePath) {
1558 // Store the result and return it at the end instead of exiting early, in case
1559 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1560 bool Result = true;
1561 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1562 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1563 if (DoExtraAnalysis)
1564 Result = false;
1565 else
1566 return false;
1567 }
1568
1569 // Recursively check whether the loop control flow of nested loops is
1570 // understood.
1571 for (Loop *SubLp : *Lp)
1572 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1573 if (DoExtraAnalysis)
1574 Result = false;
1575 else
1576 return false;
1577 }
1578
1579 return Result;
1580}
1581
1582bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1583 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1584 if (!LatchBB) {
1585 reportVectorizationFailure("Loop does not have a latch",
1586 "Cannot vectorize early exit loop",
1587 "NoLatchEarlyExit", ORE, TheLoop);
1588 return false;
1589 }
1590
1591 if (Reductions.size() || FixedOrderRecurrences.size()) {
1593 "Found reductions or recurrences in early-exit loop",
1594 "Cannot vectorize early exit loop with reductions or recurrences",
1595 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1596 return false;
1597 }
1598
1599 SmallVector<BasicBlock *, 8> ExitingBlocks;
1600 TheLoop->getExitingBlocks(ExitingBlocks);
1601
1602 // Keep a record of all the exiting blocks.
1604 for (BasicBlock *BB : ExitingBlocks) {
1605 const SCEV *EC =
1606 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1607 if (isa<SCEVCouldNotCompute>(EC)) {
1608 UncountableExitingBlocks.push_back(BB);
1609
1611 if (Succs.size() != 2) {
1613 "Early exiting block does not have exactly two successors",
1614 "Incorrect number of successors from early exiting block",
1615 "EarlyExitTooManySuccessors", ORE, TheLoop);
1616 return false;
1617 }
1618
1619 BasicBlock *ExitBlock;
1620 if (!TheLoop->contains(Succs[0]))
1621 ExitBlock = Succs[0];
1622 else {
1623 assert(!TheLoop->contains(Succs[1]));
1624 ExitBlock = Succs[1];
1625 }
1626 UncountableExitBlocks.push_back(ExitBlock);
1627 } else
1628 CountableExitingBlocks.push_back(BB);
1629 }
1630 // We can safely ignore the predicates here because when vectorizing the loop
1631 // the PredicatatedScalarEvolution class will keep track of all predicates
1632 // for each exiting block anyway. This happens when calling
1633 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1634 Predicates.clear();
1635
1636 // We only support one uncountable early exit.
1637 if (getUncountableExitingBlocks().size() != 1) {
1639 "Loop has too many uncountable exits",
1640 "Cannot vectorize early exit loop with more than one early exit",
1641 "TooManyUncountableEarlyExits", ORE, TheLoop);
1642 return false;
1643 }
1644
1645 // The only supported early exit loops so far are ones where the early
1646 // exiting block is a unique predecessor of the latch block.
1647 BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1648 if (LatchPredBB != getUncountableEarlyExitingBlock()) {
1649 reportVectorizationFailure("Early exit is not the latch predecessor",
1650 "Cannot vectorize early exit loop",
1651 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1652 return false;
1653 }
1654
1655 // The latch block must have a countable exit.
1656 if (isa<SCEVCouldNotCompute>(
1657 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1659 "Cannot determine exact exit count for latch block",
1660 "Cannot vectorize early exit loop",
1661 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1662 return false;
1663 }
1664 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1665 "Latch block not found in list of countable exits!");
1666
1667 // Check to see if there are instructions that could potentially generate
1668 // exceptions or have side-effects.
1669 auto IsSafeOperation = [](Instruction *I) -> bool {
1670 switch (I->getOpcode()) {
1671 case Instruction::Load:
1672 case Instruction::Store:
1673 case Instruction::PHI:
1674 case Instruction::Br:
1675 // These are checked separately.
1676 return true;
1677 default:
1679 }
1680 };
1681
1682 for (auto *BB : TheLoop->blocks())
1683 for (auto &I : *BB) {
1684 if (I.mayWriteToMemory()) {
1685 // We don't support writes to memory.
1687 "Writes to memory unsupported in early exit loops",
1688 "Cannot vectorize early exit loop with writes to memory",
1689 "WritesInEarlyExitLoop", ORE, TheLoop);
1690 return false;
1691 } else if (!IsSafeOperation(&I)) {
1692 reportVectorizationFailure("Early exit loop contains operations that "
1693 "cannot be speculatively executed",
1694 "UnsafeOperationsEarlyExitLoop", ORE,
1695 TheLoop);
1696 return false;
1697 }
1698 }
1699
1700 // The vectoriser cannot handle loads that occur after the early exit block.
1702 "Expected latch predecessor to be the early exiting block");
1703
1704 // TODO: Handle loops that may fault.
1705 Predicates.clear();
1706 if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1707 &Predicates)) {
1709 "Loop may fault",
1710 "Cannot vectorize potentially faulting early exit loop",
1711 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1712 return false;
1713 }
1714
1715 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1717 // Since we have an exact exit count for the latch and the early exit
1718 // dominates the latch, then this should guarantee a computed SCEV value.
1719 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1720 "Failed to get symbolic expression for backedge taken count");
1721 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1722 "backedge taken count: "
1723 << *SymbolicMaxBTC << '\n');
1724 return true;
1725}
1726
1727bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1728 // Store the result and return it at the end instead of exiting early, in case
1729 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1730 bool Result = true;
1731
1732 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1733 // Check whether the loop-related control flow in the loop nest is expected by
1734 // vectorizer.
1735 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1736 if (DoExtraAnalysis) {
1737 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1738 Result = false;
1739 } else {
1740 return false;
1741 }
1742 }
1743
1744 // We need to have a loop header.
1745 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1746 << '\n');
1747
1748 // Specific checks for outer loops. We skip the remaining legal checks at this
1749 // point because they don't support outer loops.
1750 if (!TheLoop->isInnermost()) {
1751 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1752
1753 if (!canVectorizeOuterLoop()) {
1754 reportVectorizationFailure("Unsupported outer loop",
1755 "UnsupportedOuterLoop", ORE, TheLoop);
1756 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1757 // outer loops.
1758 return false;
1759 }
1760
1761 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1762 return Result;
1763 }
1764
1765 assert(TheLoop->isInnermost() && "Inner loop expected.");
1766 // Check if we can if-convert non-single-bb loops.
1767 unsigned NumBlocks = TheLoop->getNumBlocks();
1768 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1769 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1770 if (DoExtraAnalysis)
1771 Result = false;
1772 else
1773 return false;
1774 }
1775
1776 // Check if we can vectorize the instructions and CFG in this loop.
1777 if (!canVectorizeInstrs()) {
1778 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1779 if (DoExtraAnalysis)
1780 Result = false;
1781 else
1782 return false;
1783 }
1784
1785 HasUncountableEarlyExit = false;
1786 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1787 HasUncountableEarlyExit = true;
1788 if (!isVectorizableEarlyExitLoop()) {
1789 UncountableExitingBlocks.clear();
1790 HasUncountableEarlyExit = false;
1791 if (DoExtraAnalysis)
1792 Result = false;
1793 else
1794 return false;
1795 }
1796 }
1797
1798 // Go over each instruction and look at memory deps.
1799 if (!canVectorizeMemory()) {
1800 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1801 if (DoExtraAnalysis)
1802 Result = false;
1803 else
1804 return false;
1805 }
1806
1807 if (Result) {
1808 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1810 ? " (with a runtime bound check)"
1811 : "")
1812 << "!\n");
1813 }
1814
1815 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1816 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1817 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1818
1819 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1820 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
1821 "due to SCEVThreshold");
1822 reportVectorizationFailure("Too many SCEV checks needed",
1823 "Too many SCEV assumptions need to be made and checked at runtime",
1824 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1825 if (DoExtraAnalysis)
1826 Result = false;
1827 else
1828 return false;
1829 }
1830
1831 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1832 // we can vectorize, and at this point we don't have any other mem analysis
1833 // which may limit our maximum vectorization factor, so just return true with
1834 // no restrictions.
1835 return Result;
1836}
1837
1839
1840 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1841
1842 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1843
1844 for (const auto &Reduction : getReductionVars())
1845 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1846
1847 // TODO: handle non-reduction outside users when tail is folded by masking.
1848 for (auto *AE : AllowedExit) {
1849 // Check that all users of allowed exit values are inside the loop or
1850 // are the live-out of a reduction.
1851 if (ReductionLiveOuts.count(AE))
1852 continue;
1853 for (User *U : AE->users()) {
1854 Instruction *UI = cast<Instruction>(U);
1855 if (TheLoop->contains(UI))
1856 continue;
1857 LLVM_DEBUG(
1858 dbgs()
1859 << "LV: Cannot fold tail by masking, loop has an outside user for "
1860 << *UI << "\n");
1861 return false;
1862 }
1863 }
1864
1865 for (const auto &Entry : getInductionVars()) {
1866 PHINode *OrigPhi = Entry.first;
1867 for (User *U : OrigPhi->users()) {
1868 auto *UI = cast<Instruction>(U);
1869 if (!TheLoop->contains(UI)) {
1870 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1871 "outside user for "
1872 << *UI << "\n");
1873 return false;
1874 }
1875 }
1876 }
1877
1878 // The list of pointers that we can safely read and write to remains empty.
1879 SmallPtrSet<Value *, 8> SafePointers;
1880
1881 // Check all blocks for predication, including those that ordinarily do not
1882 // need predication such as the header block.
1884 for (BasicBlock *BB : TheLoop->blocks()) {
1885 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1886 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1887 return false;
1888 }
1889 }
1890
1891 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1892
1893 return true;
1894}
1895
1897 // The list of pointers that we can safely read and write to remains empty.
1898 SmallPtrSet<Value *, 8> SafePointers;
1899
1900 // Mark all blocks for predication, including those that ordinarily do not
1901 // need predication such as the header block.
1902 for (BasicBlock *BB : TheLoop->blocks()) {
1903 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1904 assert(R && "Must be able to predicate block when tail-folding.");
1905 }
1906}
1907
1908} // namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::string Name
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:533
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
if(PassOpts->AAPipeline)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:261
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
const LoopAccessInfo & getInfo(Loop &L)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
bool isLoopHeader(const BlockT *BB) const
const SmallVector< BasicBlock *, 4 > & getUncountableExitingBlocks() const
Returns all the exiting blocks with an uncountable exit.
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has an uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:632
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:151
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:502
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
size_type count(const KeyT &Key) const
Definition: MapVector.h:165
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
size_type size() const
Definition: MapVector.h:60
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition: Metadata.h:62
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getPredicatedExitCount(const Loop *L, const BasicBlock *ExitingBlock, SmallVectorImpl< const SCEVPredicate * > *Predicates, ExitCountKind Kind=Exact)
Same as above except this uses the predicated backedge taken info and may require predicates.
const SCEV * getCouldNotCompute()
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Value * getPointerOperand()
Definition: Instructions.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
static constexpr size_t npos
Definition: StringRef.h:53
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
Value * getOperand(unsigned i) const
Definition: User.h:228
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:83
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:72
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
const ParentTy * getParent() const
Definition: ilist_node.h:32
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:826
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: Loads.cpp:370
bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if the loop L cannot fault on any iteration and only contains read-only memory accesses.
Definition: Loads.cpp:809
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1158
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:276
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
An object of this class is returned by queries that could not be answered.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.