LLVM 23.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
19#include "llvm/Analysis/Loads.h"
28#include "llvm/IR/Dominators.h"
33
34using namespace llvm;
35using namespace PatternMatch;
36
37#define LV_NAME "loop-vectorize"
38#define DEBUG_TYPE LV_NAME
39
40static cl::opt<bool>
41 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
42 cl::desc("Enable if-conversion during vectorization."));
43
44static cl::opt<bool>
45AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
46 cl::desc("Enable recognition of non-constant strided "
47 "pointer induction variables."));
48
49static cl::opt<bool>
50 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
51 cl::desc("Allow enabling loop hints to reorder "
52 "FP operations during vectorization."));
53
54// TODO: Move size-based thresholds out of legality checking, make cost based
55// decisions instead of hard thresholds.
57 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
58 cl::desc("The maximum number of SCEV checks allowed."));
59
61 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
62 cl::desc("The maximum number of SCEV checks allowed with a "
63 "vectorize(enable) pragma"));
64
67 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
69 cl::desc("Control whether the compiler can use scalable vectors to "
70 "vectorize a loop"),
73 "Scalable vectorization is disabled."),
76 "Scalable vectorization is available and favored when the "
77 "cost is inconclusive."),
80 "Scalable vectorization is available and favored when the "
81 "cost is inconclusive.")));
82
84 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
85 cl::desc("Enables autovectorization of some loops containing histograms"));
86
87/// Maximum vectorization interleave count.
88static const unsigned MaxInterleaveFactor = 16;
89
90namespace llvm {
91
92bool LoopVectorizeHints::Hint::validate(unsigned Val) {
93 switch (Kind) {
94 case HK_WIDTH:
96 case HK_INTERLEAVE:
97 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
98 case HK_FORCE:
99 return (Val <= 1);
100 case HK_ISVECTORIZED:
101 case HK_PREDICATE:
102 case HK_SCALABLE:
103 return (Val == 0 || Val == 1);
104 }
105 return false;
106}
107
109 bool InterleaveOnlyWhenForced,
112 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
113 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
114 Force("vectorize.enable", FK_Undefined, HK_FORCE),
115 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
116 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
117 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
118 TheLoop(L), ORE(ORE) {
119 // Populate values with existing loop metadata.
120 getHintsFromMetadata();
121
122 // force-vector-interleave overrides DisableInterleaving.
125
126 // If the metadata doesn't explicitly specify whether to enable scalable
127 // vectorization, then decide based on the following criteria (increasing
128 // level of priority):
129 // - Target default
130 // - Metadata width
131 // - Force option (always overrides)
133 if (TTI)
134 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
136
137 if (Width.Value)
138 // If the width is set, but the metadata says nothing about the scalable
139 // property, then assume it concerns only a fixed-width UserVF.
140 // If width is not set, the flag takes precedence.
141 Scalable.Value = SK_FixedWidthOnly;
142 }
143
144 // If the flag is set to force any use of scalable vectors, override the loop
145 // hints.
146 if (ForceScalableVectorization.getValue() !=
148 Scalable.Value = ForceScalableVectorization.getValue();
149
150 // Scalable vectorization is disabled if no preference is specified.
152 Scalable.Value = SK_FixedWidthOnly;
153
154 if (IsVectorized.Value != 1)
155 // If the vectorization width and interleaving count are both 1 then
156 // consider the loop to have been already vectorized because there's
157 // nothing more that we can do.
158 IsVectorized.Value =
160 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
161 << "LV: Interleaving disabled by the pass manager\n");
162}
163
165 LLVMContext &Context = TheLoop->getHeader()->getContext();
166
167 MDNode *IsVectorizedMD = MDNode::get(
168 Context,
169 {MDString::get(Context, "llvm.loop.isvectorized"),
170 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
171 MDNode *LoopID = TheLoop->getLoopID();
172 MDNode *NewLoopID =
173 makePostTransformationMetadata(Context, LoopID,
174 {Twine(Prefix(), "vectorize.").str(),
175 Twine(Prefix(), "interleave.").str()},
176 {IsVectorizedMD});
177 TheLoop->setLoopID(NewLoopID);
178
179 // Update internal cache.
180 IsVectorized.Value = 1;
181}
182
183void LoopVectorizeHints::reportDisallowedVectorization(
184 const StringRef DebugMsg, const StringRef RemarkName,
185 const StringRef RemarkMsg, const Loop *L) const {
186 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
187 ORE.emit(OptimizationRemarkMissed(LV_NAME, RemarkName, L->getStartLoc(),
188 L->getHeader())
189 << "loop not vectorized: " << RemarkMsg);
190}
191
193 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
195 if (Force.Value == LoopVectorizeHints::FK_Disabled) {
196 reportDisallowedVectorization("#pragma vectorize disable",
197 "MissedExplicitlyDisabled",
198 "vectorization is explicitly disabled", L);
199 } else if (hasDisableAllTransformsHint(L)) {
200 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
201 "MissedTransformsDisabled",
202 "loop transformations are disabled", L);
203 } else {
204 llvm_unreachable("loop vect disabled for an unknown reason");
205 }
206 return false;
207 }
208
209 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
210 reportDisallowedVectorization(
211 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
212 "MissedForceOnly", "only vectorizing loops that explicitly request it",
213 L);
214 return false;
215 }
216
217 if (getIsVectorized() == 1) {
218 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
219 // FIXME: Add interleave.disable metadata. This will allow
220 // vectorize.disable to be used without disabling the pass and errors
221 // to differentiate between disabled vectorization and a width of 1.
222 ORE.emit([&]() {
224 "AllDisabled", L->getStartLoc(),
225 L->getHeader())
226 << "loop not vectorized: vectorization and interleaving are "
227 "explicitly disabled, or the loop has already been "
228 "vectorized";
229 });
230 return false;
231 }
232
233 return true;
234}
235
237 using namespace ore;
238
239 ORE.emit([&]() {
240 if (Force.Value == LoopVectorizeHints::FK_Disabled)
241 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
242 TheLoop->getStartLoc(),
243 TheLoop->getHeader())
244 << "loop not vectorized: vectorization is explicitly disabled";
245
246 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
247 TheLoop->getHeader());
248 R << "loop not vectorized";
249 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
250 R << " (Force=" << NV("Force", true);
251 if (Width.Value != 0)
252 R << ", Vector Width=" << NV("VectorWidth", getWidth());
253 if (getInterleave() != 0)
254 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
255 R << ")";
256 }
257 return R;
258 });
259}
260
270
272 // Allow the vectorizer to change the order of operations if enabling
273 // loop hints are provided
274 ElementCount EC = getWidth();
275 return HintsAllowReordering &&
277 EC.getKnownMinValue() > 1);
278}
279
280void LoopVectorizeHints::getHintsFromMetadata() {
281 MDNode *LoopID = TheLoop->getLoopID();
282 if (!LoopID)
283 return;
284
285 // First operand should refer to the loop id itself.
286 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
287 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
288
289 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
290 const MDString *S = nullptr;
292
293 // The expected hint is either a MDString or a MDNode with the first
294 // operand a MDString.
295 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
296 if (!MD || MD->getNumOperands() == 0)
297 continue;
298 S = dyn_cast<MDString>(MD->getOperand(0));
299 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
300 Args.push_back(MD->getOperand(Idx));
301 } else {
302 S = dyn_cast<MDString>(MDO);
303 assert(Args.size() == 0 && "too many arguments for MDString");
304 }
305
306 if (!S)
307 continue;
308
309 // Check if the hint starts with the loop metadata prefix.
310 StringRef Name = S->getString();
311 if (Args.size() == 1)
312 setHint(Name, Args[0]);
313 }
314}
315
316void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
317 if (!Name.consume_front(Prefix()))
318 return;
319
320 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
321 if (!C)
322 return;
323 unsigned Val = C->getZExtValue();
324
325 Hint *Hints[] = {&Width, &Interleave, &Force,
326 &IsVectorized, &Predicate, &Scalable};
327 for (auto *H : Hints) {
328 if (Name == H->Name) {
329 if (H->validate(Val))
330 H->Value = Val;
331 else
332 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
333 break;
334 }
335 }
336}
337
338// Return true if the inner loop \p Lp is uniform with regard to the outer loop
339// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
340// executing the inner loop will execute the same iterations). This check is
341// very constrained for now but it will be relaxed in the future. \p Lp is
342// considered uniform if it meets all the following conditions:
343// 1) it has a canonical IV (starting from 0 and with stride 1),
344// 2) its latch terminator is a conditional branch and,
345// 3) its latch condition is a compare instruction whose operands are the
346// canonical IV and an OuterLp invariant.
347// This check doesn't take into account the uniformity of other conditions not
348// related to the loop latch because they don't affect the loop uniformity.
349//
350// NOTE: We decided to keep all these checks and its associated documentation
351// together so that we can easily have a picture of the current supported loop
352// nests. However, some of the current checks don't depend on \p OuterLp and
353// would be redundantly executed for each \p Lp if we invoked this function for
354// different candidate outer loops. This is not the case for now because we
355// don't currently have the infrastructure to evaluate multiple candidate outer
356// loops and \p OuterLp will be a fixed parameter while we only support explicit
357// outer loop vectorization. It's also very likely that these checks go away
358// before introducing the aforementioned infrastructure. However, if this is not
359// the case, we should move the \p OuterLp independent checks to a separate
360// function that is only executed once for each \p Lp.
361static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
362 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
363
364 // If Lp is the outer loop, it's uniform by definition.
365 if (Lp == OuterLp)
366 return true;
367 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
368
369 // 1.
371 if (!IV) {
372 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
373 return false;
374 }
375
376 // 2.
377 BasicBlock *Latch = Lp->getLoopLatch();
378 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
379 if (!LatchBr || LatchBr->isUnconditional()) {
380 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
381 return false;
382 }
383
384 // 3.
385 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
386 if (!LatchCmp) {
388 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
389 return false;
390 }
391
392 Value *CondOp0 = LatchCmp->getOperand(0);
393 Value *CondOp1 = LatchCmp->getOperand(1);
394 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
395 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
396 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
397 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
398 return false;
399 }
400
401 return true;
402}
403
404// Return true if \p Lp and all its nested loops are uniform with regard to \p
405// OuterLp.
406static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
407 if (!isUniformLoop(Lp, OuterLp))
408 return false;
409
410 // Check if nested loops are uniform.
411 for (Loop *SubLp : *Lp)
412 if (!isUniformLoopNest(SubLp, OuterLp))
413 return false;
414
415 return true;
416}
417
419 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
420
421 if (Ty->isPointerTy())
422 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
423
424 // It is possible that char's or short's overflow when we ask for the loop's
425 // trip count, work around this by changing the type size.
426 if (Ty->getScalarSizeInBits() < 32)
427 return Type::getInt32Ty(Ty->getContext());
428
429 return cast<IntegerType>(Ty);
430}
431
433 Type *Ty1) {
436 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
437}
438
439/// Check that the instruction has outside loop users and is not an
440/// identified reduction variable.
441static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
442 SmallPtrSetImpl<Value *> &AllowedExit) {
443 // Reductions, Inductions and non-header phis are allowed to have exit users. All
444 // other instructions must not have external users.
445 if (!AllowedExit.count(Inst))
446 // Check that all of the users of the loop are inside the BB.
447 for (User *U : Inst->users()) {
449 // This user may be a reduction exit value.
450 if (!TheLoop->contains(UI)) {
451 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
452 return true;
453 }
454 }
455 return false;
456}
457
458/// Returns true if A and B have same pointer operands or same SCEVs addresses
460 StoreInst *B) {
461 // Compare store
462 if (A == B)
463 return true;
464
465 // Otherwise Compare pointers
466 Value *APtr = A->getPointerOperand();
467 Value *BPtr = B->getPointerOperand();
468 if (APtr == BPtr)
469 return true;
470
471 // Otherwise compare address SCEVs
472 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
473}
474
476 Value *Ptr) const {
477 // FIXME: Currently, the set of symbolic strides is sometimes queried before
478 // it's collected. This happens from canVectorizeWithIfConvert, when the
479 // pointer is checked to reference consecutive elements suitable for a
480 // masked access.
481 const auto &Strides =
482 LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
483
484 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
485 AllowRuntimeSCEVChecks, false)
486 .value_or(0);
487 if (Stride == 1 || Stride == -1)
488 return Stride;
489 return 0;
490}
491
493 return LAI->isInvariant(V);
494}
495
496namespace {
497/// A rewriter to build the SCEVs for each of the VF lanes in the expected
498/// vectorized loop, which can then be compared to detect their uniformity. This
499/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
500/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
501/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
502/// uniformity.
503class SCEVAddRecForUniformityRewriter
504 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
505 /// Multiplier to be applied to the step of AddRecs in TheLoop.
506 unsigned StepMultiplier;
507
508 /// Offset to be added to the AddRecs in TheLoop.
509 unsigned Offset;
510
511 /// Loop for which to rewrite AddRecsFor.
512 Loop *TheLoop;
513
514 /// Is any sub-expressions not analyzable w.r.t. uniformity?
515 bool CannotAnalyze = false;
516
517 bool canAnalyze() const { return !CannotAnalyze; }
518
519public:
520 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
521 unsigned Offset, Loop *TheLoop)
522 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
523 TheLoop(TheLoop) {}
524
525 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
526 assert(Expr->getLoop() == TheLoop &&
527 "addrec outside of TheLoop must be invariant and should have been "
528 "handled earlier");
529 // Build a new AddRec by multiplying the step by StepMultiplier and
530 // incrementing the start by Offset * step.
531 Type *Ty = Expr->getType();
532 const SCEV *Step = Expr->getStepRecurrence(SE);
533 if (!SE.isLoopInvariant(Step, TheLoop)) {
534 CannotAnalyze = true;
535 return Expr;
536 }
537 const SCEV *NewStep =
538 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
539 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
540 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
541 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
542 }
543
544 const SCEV *visit(const SCEV *S) {
545 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
546 return S;
548 }
549
550 const SCEV *visitUnknown(const SCEVUnknown *S) {
551 if (SE.isLoopInvariant(S, TheLoop))
552 return S;
553 // The value could vary across iterations.
554 CannotAnalyze = true;
555 return S;
556 }
557
558 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
559 // Could not analyze the expression.
560 CannotAnalyze = true;
561 return S;
562 }
563
564 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
565 unsigned StepMultiplier, unsigned Offset,
566 Loop *TheLoop) {
567 /// Bail out if the expression does not contain an UDiv expression.
568 /// Uniform values which are not loop invariant require operations to strip
569 /// out the lowest bits. For now just look for UDivs and use it to avoid
570 /// re-writing UDIV-free expressions for other lanes to limit compile time.
571 if (!SCEVExprContains(S,
572 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
573 return SE.getCouldNotCompute();
574
575 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
576 TheLoop);
577 const SCEV *Result = Rewriter.visit(S);
578
579 if (Rewriter.canAnalyze())
580 return Result;
581 return SE.getCouldNotCompute();
582 }
583};
584
585} // namespace
586
588 if (isInvariant(V))
589 return true;
590 if (VF.isScalable())
591 return false;
592 if (VF.isScalar())
593 return true;
594
595 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
596 // never considered uniform.
597 auto *SE = PSE.getSE();
598 if (!SE->isSCEVable(V->getType()))
599 return false;
600 const SCEV *S = SE->getSCEV(V);
601
602 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
603 // lane 0 matches the expressions for all other lanes.
604 unsigned FixedVF = VF.getKnownMinValue();
605 const SCEV *FirstLaneExpr =
606 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
607 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
608 return false;
609
610 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
611 // lane 0. We check lanes in reverse order for compile-time, as frequently
612 // checking the last lane is sufficient to rule out uniformity.
613 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
614 const SCEV *IthLaneExpr =
615 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
616 return FirstLaneExpr == IthLaneExpr;
617 });
618}
619
621 ElementCount VF) const {
623 if (!Ptr)
624 return false;
625 // Note: There's nothing inherent which prevents predicated loads and
626 // stores from being uniform. The current lowering simply doesn't handle
627 // it; in particular, the cost model distinguishes scatter/gather from
628 // scalar w/predication, and we currently rely on the scalar path.
629 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
630}
631
632bool LoopVectorizationLegality::canVectorizeOuterLoop() {
633 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
634 // Store the result and return it at the end instead of exiting early, in case
635 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
636 bool Result = true;
637 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
638
639 for (BasicBlock *BB : TheLoop->blocks()) {
640 // Check whether the BB terminator is a BranchInst. Any other terminator is
641 // not supported yet.
642 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
643 if (!Br) {
644 reportVectorizationFailure("Unsupported basic block terminator",
645 "loop control flow is not understood by vectorizer",
646 "CFGNotUnderstood", ORE, TheLoop);
647 if (DoExtraAnalysis)
648 Result = false;
649 else
650 return false;
651 }
652
653 // Check whether the BranchInst is a supported one. Only unconditional
654 // branches, conditional branches with an outer loop invariant condition or
655 // backedges are supported.
656 // FIXME: We skip these checks when VPlan predication is enabled as we
657 // want to allow divergent branches. This whole check will be removed
658 // once VPlan predication is on by default.
659 if (Br && Br->isConditional() &&
660 !TheLoop->isLoopInvariant(Br->getCondition()) &&
661 !LI->isLoopHeader(Br->getSuccessor(0)) &&
662 !LI->isLoopHeader(Br->getSuccessor(1))) {
663 reportVectorizationFailure("Unsupported conditional branch",
664 "loop control flow is not understood by vectorizer",
665 "CFGNotUnderstood", ORE, TheLoop);
666 if (DoExtraAnalysis)
667 Result = false;
668 else
669 return false;
670 }
671 }
672
673 // Check whether inner loops are uniform. At this point, we only support
674 // simple outer loops scenarios with uniform nested loops.
675 if (!isUniformLoopNest(TheLoop /*loop nest*/,
676 TheLoop /*context outer loop*/)) {
677 reportVectorizationFailure("Outer loop contains divergent loops",
678 "loop control flow is not understood by vectorizer",
679 "CFGNotUnderstood", ORE, TheLoop);
680 if (DoExtraAnalysis)
681 Result = false;
682 else
683 return false;
684 }
685
686 // Check whether we are able to set up outer loop induction.
687 if (!setupOuterLoopInductions()) {
688 reportVectorizationFailure("Unsupported outer loop Phi(s)",
689 "UnsupportedPhi", ORE, TheLoop);
690 if (DoExtraAnalysis)
691 Result = false;
692 else
693 return false;
694 }
695
696 return Result;
697}
698
699void LoopVectorizationLegality::addInductionPhi(
700 PHINode *Phi, const InductionDescriptor &ID,
701 SmallPtrSetImpl<Value *> &AllowedExit) {
702 Inductions[Phi] = ID;
703
704 // In case this induction also comes with casts that we know we can ignore
705 // in the vectorized loop body, record them here. All casts could be recorded
706 // here for ignoring, but suffices to record only the first (as it is the
707 // only one that may bw used outside the cast sequence).
708 ArrayRef<Instruction *> Casts = ID.getCastInsts();
709 if (!Casts.empty())
710 InductionCastsToIgnore.insert(*Casts.begin());
711
712 Type *PhiTy = Phi->getType();
713 const DataLayout &DL = Phi->getDataLayout();
714
715 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
716 "Expected int, ptr, or FP induction phi type");
717
718 // Get the widest type.
719 if (PhiTy->isIntOrPtrTy()) {
720 if (!WidestIndTy)
721 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
722 else
723 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
724 }
725
726 // Int inductions are special because we only allow one IV.
727 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
728 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
729 isa<Constant>(ID.getStartValue()) &&
730 cast<Constant>(ID.getStartValue())->isNullValue()) {
731
732 // Use the phi node with the widest type as induction. Use the last
733 // one if there are multiple (no good reason for doing this other
734 // than it is expedient). We've checked that it begins at zero and
735 // steps by one, so this is a canonical induction variable.
736 if (!PrimaryInduction || PhiTy == WidestIndTy)
737 PrimaryInduction = Phi;
738 }
739
740 // Both the PHI node itself, and the "post-increment" value feeding
741 // back into the PHI node may have external users.
742 // We can allow those uses, except if the SCEVs we have for them rely
743 // on predicates that only hold within the loop, since allowing the exit
744 // currently means re-using this SCEV outside the loop (see PR33706 for more
745 // details).
746 if (PSE.getPredicate().isAlwaysTrue()) {
747 AllowedExit.insert(Phi);
748 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
749 }
750
751 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
752}
753
754bool LoopVectorizationLegality::setupOuterLoopInductions() {
755 BasicBlock *Header = TheLoop->getHeader();
756
757 // Returns true if a given Phi is a supported induction.
758 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
759 InductionDescriptor ID;
760 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
762 addInductionPhi(&Phi, ID, AllowedExit);
763 return true;
764 }
765 // Bail out for any Phi in the outer loop header that is not a supported
766 // induction.
768 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
769 return false;
770 };
771
772 return llvm::all_of(Header->phis(), IsSupportedPhi);
773}
774
775/// Checks if a function is scalarizable according to the TLI, in
776/// the sense that it should be vectorized and then expanded in
777/// multiple scalar calls. This is represented in the
778/// TLI via mappings that do not specify a vector name, as in the
779/// following example:
780///
781/// const VecDesc VecIntrinsics[] = {
782/// {"llvm.phx.abs.i32", "", 4}
783/// };
784static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
785 const StringRef ScalarName = CI.getCalledFunction()->getName();
786 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
787 // Check that all known VFs are not associated to a vector
788 // function, i.e. the vector name is emty.
789 if (Scalarize) {
790 ElementCount WidestFixedVF, WidestScalableVF;
791 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
793 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
794 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
796 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
797 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
798 assert((WidestScalableVF.isZero() || !Scalarize) &&
799 "Caller may decide to scalarize a variant using a scalable VF");
800 }
801 return Scalarize;
802}
803
804bool LoopVectorizationLegality::canVectorizeInstrs() {
805 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
806 bool Result = true;
807
808 // For each block in the loop.
809 for (BasicBlock *BB : TheLoop->blocks()) {
810 // Scan the instructions in the block and look for hazards.
811 for (Instruction &I : *BB) {
812 Result &= canVectorizeInstr(I);
813 if (!DoExtraAnalysis && !Result)
814 return false;
815 }
816 }
817
818 if (!PrimaryInduction) {
819 if (Inductions.empty()) {
821 "Did not find one integer induction var",
822 "loop induction variable could not be identified",
823 "NoInductionVariable", ORE, TheLoop);
824 return false;
825 }
826 if (!WidestIndTy) {
828 "Did not find one integer induction var",
829 "integer loop induction variable could not be identified",
830 "NoIntegerInductionVariable", ORE, TheLoop);
831 return false;
832 }
833 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
834 }
835
836 // Now we know the widest induction type, check if our found induction
837 // is the same size. If it's not, unset it here and InnerLoopVectorizer
838 // will create another.
839 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
840 PrimaryInduction = nullptr;
841
842 return Result;
843}
844
845bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
846 BasicBlock *BB = I.getParent();
847 BasicBlock *Header = TheLoop->getHeader();
848
849 if (auto *Phi = dyn_cast<PHINode>(&I)) {
850 Type *PhiTy = Phi->getType();
851 // Check that this PHI type is allowed.
852 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
853 !PhiTy->isPointerTy()) {
855 "Found a non-int non-pointer PHI",
856 "loop control flow is not understood by vectorizer",
857 "CFGNotUnderstood", ORE, TheLoop);
858 return false;
859 }
860
861 // If this PHINode is not in the header block, then we know that we
862 // can convert it to select during if-conversion. No need to check if
863 // the PHIs in this block are induction or reduction variables.
864 if (BB != Header) {
865 // Non-header phi nodes that have outside uses can be vectorized. Add
866 // them to the list of allowed exits.
867 // Unsafe cyclic dependencies with header phis are identified during
868 // legalization for reduction, induction and fixed order
869 // recurrences.
870 AllowedExit.insert(&I);
871 return true;
872 }
873
874 // We only allow if-converted PHIs with exactly two incoming values.
875 if (Phi->getNumIncomingValues() != 2) {
877 "Found an invalid PHI",
878 "loop control flow is not understood by vectorizer",
879 "CFGNotUnderstood", ORE, TheLoop, Phi);
880 return false;
881 }
882
883 RecurrenceDescriptor RedDes;
884 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
885 PSE.getSE())) {
886 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
887 AllowedExit.insert(RedDes.getLoopExitInstr());
888 Reductions[Phi] = RedDes;
891 RedDes.getRecurrenceKind())) &&
892 "Only min/max recurrences are allowed to have multiple uses "
893 "currently");
894 return true;
895 }
896
897 // We prevent matching non-constant strided pointer IVS to preserve
898 // historical vectorizer behavior after a generalization of the
899 // IVDescriptor code. The intent is to remove this check, but we
900 // have to fix issues around code quality for such loops first.
901 auto IsDisallowedStridedPointerInduction =
902 [](const InductionDescriptor &ID) {
904 return false;
905 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
906 ID.getConstIntStepValue() == nullptr;
907 };
908
909 // TODO: Instead of recording the AllowedExit, it would be good to
910 // record the complementary set: NotAllowedExit. These include (but may
911 // not be limited to):
912 // 1. Reduction phis as they represent the one-before-last value, which
913 // is not available when vectorized
914 // 2. Induction phis and increment when SCEV predicates cannot be used
915 // outside the loop - see addInductionPhi
916 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
917 // outside the loop - see call to hasOutsideLoopUser in the non-phi
918 // handling below
919 // 4. FixedOrderRecurrence phis that can possibly be handled by
920 // extraction.
921 // By recording these, we can then reason about ways to vectorize each
922 // of these NotAllowedExit.
923 InductionDescriptor ID;
924 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
925 !IsDisallowedStridedPointerInduction(ID)) {
926 addInductionPhi(Phi, ID, AllowedExit);
927 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
928 return true;
929 }
930
931 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
932 AllowedExit.insert(Phi);
933 FixedOrderRecurrences.insert(Phi);
934 return true;
935 }
936
937 // As a last resort, coerce the PHI to a AddRec expression
938 // and re-try classifying it a an induction PHI.
939 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
940 !IsDisallowedStridedPointerInduction(ID)) {
941 addInductionPhi(Phi, ID, AllowedExit);
942 return true;
943 }
944
945 reportVectorizationFailure("Found an unidentified PHI",
946 "value that could not be identified as "
947 "reduction is used outside the loop",
948 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
949 Phi);
950 return false;
951 } // end of PHI handling
952
953 // We handle calls that:
954 // * Have a mapping to an IR intrinsic.
955 // * Have a vector version available.
956 auto *CI = dyn_cast<CallInst>(&I);
957
958 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
959 !(CI->getCalledFunction() && TLI &&
960 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
961 // If the call is a recognized math libary call, it is likely that
962 // we can vectorize it given loosened floating-point constraints.
963 LibFunc Func;
964 bool IsMathLibCall =
965 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
966 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
967 TLI->hasOptimizedCodeGen(Func);
968
969 if (IsMathLibCall) {
970 // TODO: Ideally, we should not use clang-specific language here,
971 // but it's hard to provide meaningful yet generic advice.
972 // Also, should this be guarded by allowExtraAnalysis() and/or be part
973 // of the returned info from isFunctionVectorizable()?
975 "Found a non-intrinsic callsite",
976 "library call cannot be vectorized. "
977 "Try compiling with -fno-math-errno, -ffast-math, "
978 "or similar flags",
979 "CantVectorizeLibcall", ORE, TheLoop, CI);
980 } else {
981 reportVectorizationFailure("Found a non-intrinsic callsite",
982 "call instruction cannot be vectorized",
983 "CantVectorizeLibcall", ORE, TheLoop, CI);
984 }
985 return false;
986 }
987
988 // Some intrinsics have scalar arguments and should be same in order for
989 // them to be vectorized (i.e. loop invariant).
990 if (CI) {
991 auto *SE = PSE.getSE();
992 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
993 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
994 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
995 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
997 "Found unvectorizable intrinsic",
998 "intrinsic instruction cannot be vectorized",
999 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
1000 return false;
1001 }
1002 }
1003 }
1004
1005 // If we found a vectorized variant of a function, note that so LV can
1006 // make better decisions about maximum VF.
1007 if (CI && !VFDatabase::getMappings(*CI).empty())
1008 VecCallVariantsFound = true;
1009
1010 auto CanWidenInstructionTy = [](Instruction const &Inst) {
1011 Type *InstTy = Inst.getType();
1012 if (!isa<StructType>(InstTy))
1013 return canVectorizeTy(InstTy);
1014
1015 // For now, we only recognize struct values returned from calls where
1016 // all users are extractvalue as vectorizable. All element types of the
1017 // struct must be types that can be widened.
1018 return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
1019 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1020 };
1021
1022 // Check that the instruction return type is vectorizable.
1023 // We can't vectorize casts from vector type to scalar type.
1024 // Also, we can't vectorize extractelement instructions.
1025 if (!CanWidenInstructionTy(I) ||
1026 (isa<CastInst>(I) &&
1027 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1029 reportVectorizationFailure("Found unvectorizable type",
1030 "instruction return type cannot be vectorized",
1031 "CantVectorizeInstructionReturnType", ORE,
1032 TheLoop, &I);
1033 return false;
1034 }
1035
1036 // Check that the stored type is vectorizable.
1037 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1038 Type *T = ST->getValueOperand()->getType();
1040 reportVectorizationFailure("Store instruction cannot be vectorized",
1041 "CantVectorizeStore", ORE, TheLoop, ST);
1042 return false;
1043 }
1044
1045 // For nontemporal stores, check that a nontemporal vector version is
1046 // supported on the target.
1047 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1048 // Arbitrarily try a vector of 2 elements.
1049 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1050 assert(VecTy && "did not find vectorized version of stored type");
1051 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1053 "nontemporal store instruction cannot be vectorized",
1054 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1055 return false;
1056 }
1057 }
1058
1059 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1060 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1061 // For nontemporal loads, check that a nontemporal vector version is
1062 // supported on the target (arbitrarily try a vector of 2 elements).
1063 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1064 assert(VecTy && "did not find vectorized version of load type");
1065 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1067 "nontemporal load instruction cannot be vectorized",
1068 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1069 return false;
1070 }
1071 }
1072
1073 // FP instructions can allow unsafe algebra, thus vectorizable by
1074 // non-IEEE-754 compliant SIMD units.
1075 // This applies to floating-point math operations and calls, not memory
1076 // operations, shuffles, or casts, as they don't change precision or
1077 // semantics.
1078 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1079 !I.isFast()) {
1080 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1081 Hints->setPotentiallyUnsafe();
1082 }
1083
1084 // Reduction instructions are allowed to have exit users.
1085 // All other instructions must not have external users.
1086 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1087 // We can safely vectorize loops where instructions within the loop are
1088 // used outside the loop only if the SCEV predicates within the loop is
1089 // same as outside the loop. Allowing the exit means reusing the SCEV
1090 // outside the loop.
1091 if (PSE.getPredicate().isAlwaysTrue()) {
1092 AllowedExit.insert(&I);
1093 return true;
1094 }
1095 reportVectorizationFailure("Value cannot be used outside the loop",
1096 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1097 return false;
1098 }
1099
1100 return true;
1101}
1102
1103/// Find histogram operations that match high-level code in loops:
1104/// \code
1105/// buckets[indices[i]]+=step;
1106/// \endcode
1107///
1108/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1109/// array the computed histogram. It uses a BinOp to sum all counts, storing
1110/// them using a loop-variant index Load from the 'indices' input array.
1111///
1112/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1113/// regardless of hardware support. When there is support, it additionally
1114/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1115/// used to update histogram in \p HistogramPtrs.
1116static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1117 const PredicatedScalarEvolution &PSE,
1118 SmallVectorImpl<HistogramInfo> &Histograms) {
1119
1120 // Store value must come from a Binary Operation.
1121 Instruction *HPtrInstr = nullptr;
1122 BinaryOperator *HBinOp = nullptr;
1123 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1124 return false;
1125
1126 // BinOp must be an Add or a Sub modifying the bucket value by a
1127 // loop invariant amount.
1128 // FIXME: We assume the loop invariant term is on the RHS.
1129 // Fine for an immediate/constant, but maybe not a generic value?
1130 Value *HIncVal = nullptr;
1131 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1132 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1133 return false;
1134
1135 // Make sure the increment value is loop invariant.
1136 if (!TheLoop->isLoopInvariant(HIncVal))
1137 return false;
1138
1139 // The address to store is calculated through a GEP Instruction.
1141 if (!GEP)
1142 return false;
1143
1144 // Restrict address calculation to constant indices except for the last term.
1145 Value *HIdx = nullptr;
1146 for (Value *Index : GEP->indices()) {
1147 if (HIdx)
1148 return false;
1149 if (!isa<ConstantInt>(Index))
1150 HIdx = Index;
1151 }
1152
1153 if (!HIdx)
1154 return false;
1155
1156 // Check that the index is calculated by loading from another array. Ignore
1157 // any extensions.
1158 // FIXME: Support indices from other sources than a linear load from memory?
1159 // We're currently trying to match an operation looping over an array
1160 // of indices, but there could be additional levels of indirection
1161 // in place, or possibly some additional calculation to form the index
1162 // from the loaded data.
1163 Value *VPtrVal;
1164 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1165 return false;
1166
1167 // Make sure the index address varies in this loop, not an outer loop.
1168 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1169 if (!AR || AR->getLoop() != TheLoop)
1170 return false;
1171
1172 // Ensure we'll have the same mask by checking that all parts of the histogram
1173 // (gather load, update, scatter store) are in the same block.
1174 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1175 BasicBlock *LdBB = IndexedLoad->getParent();
1176 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1177 return false;
1178
1179 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1180
1181 // Store the operations that make up the histogram.
1182 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1183 return true;
1184}
1185
1186bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1187 // For now, we only support an IndirectUnsafe dependency that calculates
1188 // a histogram
1190 return false;
1191
1192 // Find a single IndirectUnsafe dependency.
1193 const MemoryDepChecker::Dependence *IUDep = nullptr;
1194 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1195 const auto *Deps = DepChecker.getDependences();
1196 // If there were too many dependences, LAA abandons recording them. We can't
1197 // proceed safely if we don't know what the dependences are.
1198 if (!Deps)
1199 return false;
1200
1201 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1202 // Ignore dependencies that are either known to be safe or can be
1203 // checked at runtime.
1206 continue;
1207
1208 // We're only interested in IndirectUnsafe dependencies here, where the
1209 // address might come from a load from memory. We also only want to handle
1210 // one such dependency, at least for now.
1211 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1212 return false;
1213
1214 IUDep = &Dep;
1215 }
1216 if (!IUDep)
1217 return false;
1218
1219 // For now only normal loads and stores are supported.
1220 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1221 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1222
1223 if (!LI || !SI)
1224 return false;
1225
1226 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1227 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1228}
1229
1230bool LoopVectorizationLegality::canVectorizeMemory() {
1231 LAI = &LAIs.getInfo(*TheLoop);
1232 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1233 if (LAR) {
1234 ORE->emit([&]() {
1235 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1236 "loop not vectorized: ", *LAR);
1237 });
1238 }
1239
1240 if (!LAI->canVectorizeMemory()) {
1243 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1244 "side effects",
1245 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1246 TheLoop);
1247 return false;
1248 }
1249
1250 return canVectorizeIndirectUnsafeDependences();
1251 }
1252
1253 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1254 reportVectorizationFailure("We don't allow storing to uniform addresses",
1255 "write to a loop invariant address could not "
1256 "be vectorized",
1257 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1258 TheLoop);
1259 return false;
1260 }
1261
1262 // We can vectorize stores to invariant address when final reduction value is
1263 // guaranteed to be stored at the end of the loop. Also, if decision to
1264 // vectorize loop is made, runtime checks are added so as to make sure that
1265 // invariant address won't alias with any other objects.
1266 if (!LAI->getStoresToInvariantAddresses().empty()) {
1267 // For each invariant address, check if last stored value is unconditional
1268 // and the address is not calculated inside the loop.
1269 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1271 continue;
1272
1273 if (blockNeedsPredication(SI->getParent())) {
1275 "We don't allow storing to uniform addresses",
1276 "write of conditional recurring variant value to a loop "
1277 "invariant address could not be vectorized",
1278 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1279 return false;
1280 }
1281
1282 // Invariant address should be defined outside of loop. LICM pass usually
1283 // makes sure it happens, but in rare cases it does not, we do not want
1284 // to overcomplicate vectorization to support this case.
1285 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1286 if (TheLoop->contains(Ptr)) {
1288 "Invariant address is calculated inside the loop",
1289 "write to a loop invariant address could not "
1290 "be vectorized",
1291 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1292 return false;
1293 }
1294 }
1295 }
1296
1297 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1298 // For each invariant address, check its last stored value is the result
1299 // of one of our reductions.
1300 //
1301 // We do not check if dependence with loads exists because that is already
1302 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1303 ScalarEvolution *SE = PSE.getSE();
1304 SmallVector<StoreInst *, 4> UnhandledStores;
1305 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1307 // Earlier stores to this address are effectively deadcode.
1308 // With opaque pointers it is possible for one pointer to be used with
1309 // different sizes of stored values:
1310 // store i32 0, ptr %x
1311 // store i8 0, ptr %x
1312 // The latest store doesn't complitely overwrite the first one in the
1313 // example. That is why we have to make sure that types of stored
1314 // values are same.
1315 // TODO: Check that bitwidth of unhandled store is smaller then the
1316 // one that overwrites it and add a test.
1317 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1318 return storeToSameAddress(SE, SI, I) &&
1319 I->getValueOperand()->getType() ==
1320 SI->getValueOperand()->getType();
1321 });
1322 continue;
1323 }
1324 UnhandledStores.push_back(SI);
1325 }
1326
1327 bool IsOK = UnhandledStores.empty();
1328 // TODO: we should also validate against InvariantMemSets.
1329 if (!IsOK) {
1331 "We don't allow storing to uniform addresses",
1332 "write to a loop invariant address could not "
1333 "be vectorized",
1334 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1335 return false;
1336 }
1337 }
1338 }
1339
1340 PSE.addPredicate(LAI->getPSE().getPredicate());
1341 return true;
1342}
1343
1345 bool EnableStrictReductions) {
1346
1347 // First check if there is any ExactFP math or if we allow reassociations
1348 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1349 return true;
1350
1351 // If the above is false, we have ExactFPMath & do not allow reordering.
1352 // If the EnableStrictReductions flag is set, first check if we have any
1353 // Exact FP induction vars, which we cannot vectorize.
1354 if (!EnableStrictReductions ||
1355 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1356 InductionDescriptor IndDesc = Induction.second;
1357 return IndDesc.getExactFPMathInst();
1358 }))
1359 return false;
1360
1361 // We can now only vectorize if all reductions with Exact FP math also
1362 // have the isOrdered flag set, which indicates that we can move the
1363 // reduction operations in-loop.
1364 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1365 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1366 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1367 }));
1368}
1369
1371 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1372 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1373 return RdxDesc.IntermediateStore == SI;
1374 });
1375}
1376
1378 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1379 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1380 if (!RdxDesc.IntermediateStore)
1381 return false;
1382
1383 ScalarEvolution *SE = PSE.getSE();
1384 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1385 return V == InvariantAddress ||
1386 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1387 });
1388}
1389
1391 Value *In0 = const_cast<Value *>(V);
1393 if (!PN)
1394 return false;
1395
1396 return Inductions.count(PN);
1397}
1398
1399const InductionDescriptor *
1401 if (!isInductionPhi(Phi))
1402 return nullptr;
1403 auto &ID = getInductionVars().find(Phi)->second;
1404 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1406 return &ID;
1407 return nullptr;
1408}
1409
1410const InductionDescriptor *
1412 if (!isInductionPhi(Phi))
1413 return nullptr;
1414 auto &ID = getInductionVars().find(Phi)->second;
1416 return &ID;
1417 return nullptr;
1418}
1419
1421 const Value *V) const {
1422 auto *Inst = dyn_cast<Instruction>(V);
1423 return (Inst && InductionCastsToIgnore.count(Inst));
1424}
1425
1429
1431 const PHINode *Phi) const {
1432 return FixedOrderRecurrences.count(Phi);
1433}
1434
1436 const BasicBlock *BB) const {
1437 // When vectorizing early exits, create predicates for the latch block only.
1438 // For a single early exit, it must be a direct predecessor of the latch.
1439 // For multiple early exits, they form a chain where each exiting block
1440 // dominates all subsequent blocks up to the latch.
1441 BasicBlock *Latch = TheLoop->getLoopLatch();
1443 return BB == Latch;
1444 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1445}
1446
1447bool LoopVectorizationLegality::blockCanBePredicated(
1448 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1449 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1450 for (Instruction &I : *BB) {
1451 // We can predicate blocks with calls to assume, as long as we drop them in
1452 // case we flatten the CFG via predication.
1454 MaskedOp.insert(&I);
1455 continue;
1456 }
1457
1458 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1459 // TODO: there might be cases that it should block the vectorization. Let's
1460 // ignore those for now.
1462 continue;
1463
1464 // We can allow masked calls if there's at least one vector variant, even
1465 // if we end up scalarizing due to the cost model calculations.
1466 // TODO: Allow other calls if they have appropriate attributes... readonly
1467 // and argmemonly?
1468 if (CallInst *CI = dyn_cast<CallInst>(&I))
1470 MaskedOp.insert(CI);
1471 continue;
1472 }
1473
1474 // Loads are handled via masking (or speculated if safe to do so.)
1475 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1476 if (!SafePtrs.count(LI->getPointerOperand()))
1477 MaskedOp.insert(LI);
1478 continue;
1479 }
1480
1481 // Predicated store requires some form of masking:
1482 // 1) masked store HW instruction,
1483 // 2) emulation via load-blend-store (only if safe and legal to do so,
1484 // be aware on the race conditions), or
1485 // 3) element-by-element predicate check and scalar store.
1486 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1487 MaskedOp.insert(SI);
1488 continue;
1489 }
1490
1491 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1492 return false;
1493 }
1494
1495 return true;
1496}
1497
1498bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1499 if (!EnableIfConversion) {
1500 reportVectorizationFailure("If-conversion is disabled",
1501 "IfConversionDisabled", ORE, TheLoop);
1502 return false;
1503 }
1504
1505 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1506
1507 // A list of pointers which are known to be dereferenceable within scope of
1508 // the loop body for each iteration of the loop which executes. That is,
1509 // the memory pointed to can be dereferenced (with the access size implied by
1510 // the value's type) unconditionally within the loop header without
1511 // introducing a new fault.
1512 SmallPtrSet<Value *, 8> SafePointers;
1513
1514 // Collect safe addresses.
1515 for (BasicBlock *BB : TheLoop->blocks()) {
1516 if (!blockNeedsPredication(BB)) {
1517 for (Instruction &I : *BB)
1518 if (auto *Ptr = getLoadStorePointerOperand(&I))
1519 SafePointers.insert(Ptr);
1520 continue;
1521 }
1522
1523 // For a block which requires predication, a address may be safe to access
1524 // in the loop w/o predication if we can prove dereferenceability facts
1525 // sufficient to ensure it'll never fault within the loop. For the moment,
1526 // we restrict this to loads; stores are more complicated due to
1527 // concurrency restrictions.
1528 ScalarEvolution &SE = *PSE.getSE();
1530 for (Instruction &I : *BB) {
1531 LoadInst *LI = dyn_cast<LoadInst>(&I);
1532
1533 // Make sure we can execute all computations feeding into Ptr in the loop
1534 // w/o triggering UB and that none of the out-of-loop operands are poison.
1535 // We do not need to check if operations inside the loop can produce
1536 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1537 // because flags will be dropped when executing them unconditionally.
1538 // TODO: Results could be improved by considering poison-propagation
1539 // properties of visited ops.
1540 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1541 SmallVector<Value *> Worklist = {Ptr};
1542 SmallPtrSet<Value *, 4> Visited;
1543 while (!Worklist.empty()) {
1544 Value *CurrV = Worklist.pop_back_val();
1545 if (!Visited.insert(CurrV).second)
1546 continue;
1547
1548 auto *CurrI = dyn_cast<Instruction>(CurrV);
1549 if (!CurrI || !TheLoop->contains(CurrI)) {
1550 // If operands from outside the loop may be poison then Ptr may also
1551 // be poison.
1552 if (!isGuaranteedNotToBePoison(CurrV, AC,
1553 TheLoop->getLoopPredecessor()
1554 ->getTerminator()
1555 ->getIterator(),
1556 DT))
1557 return false;
1558 continue;
1559 }
1560
1561 // A loaded value may be poison, independent of any flags.
1562 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1563 return false;
1564
1565 // For other ops, assume poison can only be introduced via flags,
1566 // which can be dropped.
1567 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1568 return false;
1569 append_range(Worklist, CurrI->operands());
1570 }
1571 return true;
1572 };
1573 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1574 // that it will consider loops that need guarding by SCEV checks. The
1575 // vectoriser will generate these checks if we decide to vectorise.
1576 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1577 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1578 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1579 &Predicates))
1580 SafePointers.insert(LI->getPointerOperand());
1581 Predicates.clear();
1582 }
1583 }
1584
1585 // Collect the blocks that need predication.
1586 for (BasicBlock *BB : TheLoop->blocks()) {
1587 // We support only branches and switch statements as terminators inside the
1588 // loop.
1589 if (isa<SwitchInst>(BB->getTerminator())) {
1590 if (TheLoop->isLoopExiting(BB)) {
1591 reportVectorizationFailure("Loop contains an unsupported switch",
1592 "LoopContainsUnsupportedSwitch", ORE,
1593 TheLoop, BB->getTerminator());
1594 return false;
1595 }
1596 } else if (!isa<BranchInst>(BB->getTerminator())) {
1597 reportVectorizationFailure("Loop contains an unsupported terminator",
1598 "LoopContainsUnsupportedTerminator", ORE,
1599 TheLoop, BB->getTerminator());
1600 return false;
1601 }
1602
1603 // We must be able to predicate all blocks that need to be predicated.
1604 if (blockNeedsPredication(BB) &&
1605 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1607 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1608 ORE, TheLoop, BB->getTerminator());
1609 return false;
1610 }
1611 }
1612
1613 // We can if-convert this loop.
1614 return true;
1615}
1616
1617// Helper function to canVectorizeLoopNestCFG.
1618bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1619 bool UseVPlanNativePath) {
1620 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1621 "VPlan-native path is not enabled.");
1622
1623 // TODO: ORE should be improved to show more accurate information when an
1624 // outer loop can't be vectorized because a nested loop is not understood or
1625 // legal. Something like: "outer_loop_location: loop not vectorized:
1626 // (inner_loop_location) loop control flow is not understood by vectorizer".
1627
1628 // Store the result and return it at the end instead of exiting early, in case
1629 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1630 bool Result = true;
1631 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1632
1633 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1634 // be canonicalized.
1635 if (!Lp->getLoopPreheader()) {
1636 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1637 "loop control flow is not understood by vectorizer",
1638 "CFGNotUnderstood", ORE, TheLoop);
1639 if (DoExtraAnalysis)
1640 Result = false;
1641 else
1642 return false;
1643 }
1644
1645 // We must have a single backedge.
1646 if (Lp->getNumBackEdges() != 1) {
1647 reportVectorizationFailure("The loop must have a single backedge",
1648 "loop control flow is not understood by vectorizer",
1649 "CFGNotUnderstood", ORE, TheLoop);
1650 if (DoExtraAnalysis)
1651 Result = false;
1652 else
1653 return false;
1654 }
1655
1656 // The latch must be terminated by a BranchInst.
1657 BasicBlock *Latch = Lp->getLoopLatch();
1658 if (Latch && !isa<BranchInst>(Latch->getTerminator())) {
1660 "The loop latch terminator is not a BranchInst",
1661 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1662 ORE, TheLoop);
1663 if (DoExtraAnalysis)
1664 Result = false;
1665 else
1666 return false;
1667 }
1668
1669 return Result;
1670}
1671
1672bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1673 Loop *Lp, bool UseVPlanNativePath) {
1674 // Store the result and return it at the end instead of exiting early, in case
1675 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1676 bool Result = true;
1677 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1678 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1679 if (DoExtraAnalysis)
1680 Result = false;
1681 else
1682 return false;
1683 }
1684
1685 // Recursively check whether the loop control flow of nested loops is
1686 // understood.
1687 for (Loop *SubLp : *Lp)
1688 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1689 if (DoExtraAnalysis)
1690 Result = false;
1691 else
1692 return false;
1693 }
1694
1695 return Result;
1696}
1697
1698bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1699 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1700 if (!LatchBB) {
1701 reportVectorizationFailure("Loop does not have a latch",
1702 "Cannot vectorize early exit loop",
1703 "NoLatchEarlyExit", ORE, TheLoop);
1704 return false;
1705 }
1706
1707 if (Reductions.size() || FixedOrderRecurrences.size()) {
1709 "Found reductions or recurrences in early-exit loop",
1710 "Cannot vectorize early exit loop with reductions or recurrences",
1711 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1712 return false;
1713 }
1714
1715 SmallVector<BasicBlock *, 8> ExitingBlocks;
1716 TheLoop->getExitingBlocks(ExitingBlocks);
1717
1718 // Keep a record of all the exiting blocks.
1720 SmallVector<BasicBlock *> UncountableExitingBlocks;
1721 for (BasicBlock *BB : ExitingBlocks) {
1722 const SCEV *EC =
1723 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1724 if (isa<SCEVCouldNotCompute>(EC)) {
1725 if (size(successors(BB)) != 2) {
1727 "Early exiting block does not have exactly two successors",
1728 "Incorrect number of successors from early exiting block",
1729 "EarlyExitTooManySuccessors", ORE, TheLoop);
1730 return false;
1731 }
1732
1733 UncountableExitingBlocks.push_back(BB);
1734 } else
1735 CountableExitingBlocks.push_back(BB);
1736 }
1737 // We can safely ignore the predicates here because when vectorizing the loop
1738 // the PredicatatedScalarEvolution class will keep track of all predicates
1739 // for each exiting block anyway. This happens when calling
1740 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1741 Predicates.clear();
1742
1743 if (UncountableExitingBlocks.empty()) {
1744 LLVM_DEBUG(dbgs() << "LV: Could not find any uncountable exits");
1745 return false;
1746 }
1747
1748 // Sort exiting blocks by dominance order to establish a clear chain.
1749 llvm::sort(UncountableExitingBlocks, [this](BasicBlock *A, BasicBlock *B) {
1750 return DT->properlyDominates(A, B);
1751 });
1752
1753 // Verify that exits form a strict dominance chain: each block must
1754 // dominate the next. This ensures each exit is only dominated by its
1755 // predecessors in the chain.
1756 for (unsigned I = 0; I + 1 < UncountableExitingBlocks.size(); ++I) {
1757 if (!DT->properlyDominates(UncountableExitingBlocks[I],
1758 UncountableExitingBlocks[I + 1])) {
1760 "Uncountable early exits do not form a dominance chain",
1761 "Cannot vectorize early exit loop with non-dominating exits",
1762 "NonDominatingEarlyExits", ORE, TheLoop);
1763 return false;
1764 }
1765 }
1766
1767 BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1768 if (LatchPredBB != UncountableExitingBlocks.back()) {
1770 "Last early exiting block in the chain is not the latch predecessor",
1771 "Cannot vectorize early exit loop", "EarlyExitNotLatchPredecessor", ORE,
1772 TheLoop);
1773 return false;
1774 }
1775
1776 // The latch block must have a countable exit.
1778 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1780 "Cannot determine exact exit count for latch block",
1781 "Cannot vectorize early exit loop",
1782 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1783 return false;
1784 }
1785 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1786 "Latch block not found in list of countable exits!");
1787
1788 // Check to see if there are instructions that could potentially generate
1789 // exceptions or have side-effects.
1790 auto IsSafeOperation = [](Instruction *I) -> bool {
1791 switch (I->getOpcode()) {
1792 case Instruction::Load:
1793 case Instruction::Store:
1794 case Instruction::PHI:
1795 case Instruction::Br:
1796 // These are checked separately.
1797 return true;
1798 default:
1800 }
1801 };
1802
1803 bool HasSideEffects = false;
1804 for (auto *BB : TheLoop->blocks())
1805 for (auto &I : *BB) {
1806 if (I.mayWriteToMemory()) {
1807 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1808 HasSideEffects = true;
1809 continue;
1810 }
1811
1812 // We don't support complex writes to memory.
1814 "Complex writes to memory unsupported in early exit loops",
1815 "Cannot vectorize early exit loop with complex writes to memory",
1816 "WritesInEarlyExitLoop", ORE, TheLoop);
1817 return false;
1818 }
1819
1820 if (!IsSafeOperation(&I)) {
1821 reportVectorizationFailure("Early exit loop contains operations that "
1822 "cannot be speculatively executed",
1823 "UnsafeOperationsEarlyExitLoop", ORE,
1824 TheLoop);
1825 return false;
1826 }
1827 }
1828
1829 SmallVector<LoadInst *, 4> NonDerefLoads;
1830 // TODO: Handle loops that may fault.
1831 if (!HasSideEffects) {
1832 // Read-only loop.
1833 Predicates.clear();
1834 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1835 &Predicates)) {
1837 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1838 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1839 return false;
1840 }
1841 } else {
1842 // Check all uncountable exiting blocks for movable loads.
1843 for (BasicBlock *ExitingBB : UncountableExitingBlocks) {
1844 if (!canUncountableExitConditionLoadBeMoved(ExitingBB))
1845 return false;
1846 }
1847 }
1848
1849 // Check non-dereferenceable loads if any.
1850 for (LoadInst *LI : NonDerefLoads) {
1851 // Only support unit-stride access for now.
1852 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1853 if (Stride != 1) {
1855 "Loop contains potentially faulting strided load",
1856 "Cannot vectorize early exit loop with "
1857 "strided fault-only-first load",
1858 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1859 return false;
1860 }
1861 PotentiallyFaultingLoads.insert(LI);
1862 LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
1863 << "\n");
1864 }
1865
1866 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1867 PSE.getSymbolicMaxBackedgeTakenCount();
1868 // Since we have an exact exit count for the latch and the early exit
1869 // dominates the latch, then this should guarantee a computed SCEV value.
1870 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1871 "Failed to get symbolic expression for backedge taken count");
1872 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1873 "backedge taken count: "
1874 << *SymbolicMaxBTC << '\n');
1875 HasUncountableEarlyExit = true;
1876 UncountableExitWithSideEffects = HasSideEffects;
1877 return true;
1878}
1879
1880bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1881 BasicBlock *ExitingBlock) {
1882 // Try to find a load in the critical path for the uncountable exit condition.
1883 // This is currently matching about the simplest form we can, expecting
1884 // only one in-loop load, the result of which is directly compared against
1885 // a loop-invariant value.
1886 // FIXME: We're insisting on a single use for now, because otherwise we will
1887 // need to make PHI nodes for other users. That can be done once the initial
1888 // transform code lands.
1889 auto *Br = cast<BranchInst>(ExitingBlock->getTerminator());
1890
1891 using namespace llvm::PatternMatch;
1892 Instruction *L = nullptr;
1893 Value *Ptr = nullptr;
1894 Value *R = nullptr;
1895 if (!match(Br->getCondition(),
1897 m_Value(R))))) {
1899 "Early exit loop with store but no supported condition load",
1900 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1901 return false;
1902 }
1903
1904 // FIXME: Don't rely on operand ordering for the comparison.
1905 if (!TheLoop->isLoopInvariant(R)) {
1907 "Early exit loop with store but no supported condition load",
1908 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1909 return false;
1910 }
1911
1912 // Make sure that the load address is not loop invariant; we want an
1913 // address calculation that we can rotate to the next vector iteration.
1914 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1915 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1917 "Uncountable exit condition depends on load with an address that is "
1918 "not an add recurrence in the loop",
1919 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1920 return false;
1921 }
1922
1923 // FIXME: Support gathers after first-faulting load support lands.
1925 LoadInst *Load = cast<LoadInst>(L);
1926 if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
1927 &Predicates)) {
1929 "Loop may fault",
1930 "Cannot vectorize potentially faulting early exit loop",
1931 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1932 return false;
1933 }
1934
1935 ICFLoopSafetyInfo SafetyInfo;
1936 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1937 // We need to know that load will be executed before we can hoist a
1938 // copy out to run just before the first iteration.
1939 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1941 "Load for uncountable exit not guaranteed to execute",
1942 "ConditionalUncountableExitLoad", ORE, TheLoop);
1943 return false;
1944 }
1945
1946 // Prohibit any potential aliasing with any instruction in the loop which
1947 // might store to memory.
1948 // FIXME: Relax this constraint where possible.
1949 for (auto *BB : TheLoop->blocks()) {
1950 for (auto &I : *BB) {
1951 if (&I == Load)
1952 continue;
1953
1954 if (I.mayWriteToMemory()) {
1955 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1956 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1957 if (AR == AliasResult::NoAlias)
1958 continue;
1959 }
1960
1962 "Cannot determine whether critical uncountable exit load address "
1963 "does not alias with a memory write",
1964 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1965 return false;
1966 }
1967 }
1968 }
1969
1970 return true;
1971}
1972
1973bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1974 // Store the result and return it at the end instead of exiting early, in case
1975 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1976 bool Result = true;
1977
1978 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1979 // Check whether the loop-related control flow in the loop nest is expected by
1980 // vectorizer.
1981 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1982 if (DoExtraAnalysis) {
1983 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1984 Result = false;
1985 } else {
1986 return false;
1987 }
1988 }
1989
1990 // We need to have a loop header.
1991 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1992 << '\n');
1993
1994 // Specific checks for outer loops. We skip the remaining legal checks at this
1995 // point because they don't support outer loops.
1996 if (!TheLoop->isInnermost()) {
1997 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1998
1999 if (!canVectorizeOuterLoop()) {
2000 reportVectorizationFailure("Unsupported outer loop",
2001 "UnsupportedOuterLoop", ORE, TheLoop);
2002 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
2003 // outer loops.
2004 return false;
2005 }
2006
2007 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
2008 return Result;
2009 }
2010
2011 assert(TheLoop->isInnermost() && "Inner loop expected.");
2012 // Check if we can if-convert non-single-bb loops.
2013 unsigned NumBlocks = TheLoop->getNumBlocks();
2014 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
2015 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
2016 if (DoExtraAnalysis)
2017 Result = false;
2018 else
2019 return false;
2020 }
2021
2022 // Check if we can vectorize the instructions and CFG in this loop.
2023 if (!canVectorizeInstrs()) {
2024 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
2025 if (DoExtraAnalysis)
2026 Result = false;
2027 else
2028 return false;
2029 }
2030
2031 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
2032 if (TheLoop->getExitingBlock()) {
2033 reportVectorizationFailure("Cannot vectorize uncountable loop",
2034 "UnsupportedUncountableLoop", ORE, TheLoop);
2035 if (DoExtraAnalysis)
2036 Result = false;
2037 else
2038 return false;
2039 } else {
2040 if (!isVectorizableEarlyExitLoop()) {
2043 "Must be false without vectorizable early-exit loop");
2044 if (DoExtraAnalysis)
2045 Result = false;
2046 else
2047 return false;
2048 }
2049 }
2050 }
2051
2052 // Go over each instruction and look at memory deps.
2053 if (!canVectorizeMemory()) {
2054 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
2055 if (DoExtraAnalysis)
2056 Result = false;
2057 else
2058 return false;
2059 }
2060
2061 // Bail out for state-changing loops with uncountable exits for now.
2062 if (UncountableExitWithSideEffects) {
2064 "Writes to memory unsupported in early exit loops",
2065 "Cannot vectorize early exit loop with writes to memory",
2066 "WritesInEarlyExitLoop", ORE, TheLoop);
2067 return false;
2068 }
2069
2070 if (Result) {
2071 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2072 << (LAI->getRuntimePointerChecking()->Need
2073 ? " (with a runtime bound check)"
2074 : "")
2075 << "!\n");
2076 }
2077
2078 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
2079 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
2080 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
2081
2082 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2083 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2084 "due to SCEVThreshold");
2085 reportVectorizationFailure("Too many SCEV checks needed",
2086 "Too many SCEV assumptions need to be made and checked at runtime",
2087 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2088 if (DoExtraAnalysis)
2089 Result = false;
2090 else
2091 return false;
2092 }
2093
2094 // Okay! We've done all the tests. If any have failed, return false. Otherwise
2095 // we can vectorize, and at this point we don't have any other mem analysis
2096 // which may limit our maximum vectorization factor, so just return true with
2097 // no restrictions.
2098 return Result;
2099}
2100
2102 // The only loops we can vectorize without a scalar epilogue, are loops with
2103 // a bottom-test and a single exiting block. We'd have to handle the fact
2104 // that not every instruction executes on the last iteration. This will
2105 // require a lane mask which varies through the vector loop body. (TODO)
2106 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2107 LLVM_DEBUG(
2108 dbgs()
2109 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2110 return false;
2111 }
2112
2113 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2114
2115 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
2116
2117 for (const auto &Reduction : getReductionVars())
2118 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
2119
2120 for (const auto &Entry : getInductionVars()) {
2121 PHINode *OrigPhi = Entry.first;
2122 for (User *U : OrigPhi->users()) {
2123 auto *UI = cast<Instruction>(U);
2124 if (!TheLoop->contains(UI)) {
2125 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
2126 "outside user for "
2127 << *UI << "\n");
2128 return false;
2129 }
2130 }
2131 }
2132
2133 // The list of pointers that we can safely read and write to remains empty.
2134 SmallPtrSet<Value *, 8> SafePointers;
2135
2136 // Check all blocks for predication, including those that ordinarily do not
2137 // need predication such as the header block.
2139 for (BasicBlock *BB : TheLoop->blocks()) {
2140 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2141 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2142 return false;
2143 }
2144 }
2145
2146 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2147
2148 return true;
2149}
2150
2152 // The list of pointers that we can safely read and write to remains empty.
2153 SmallPtrSet<Value *, 8> SafePointers;
2154
2155 // Mark all blocks for predication, including those that ordinarily do not
2156 // need predication such as the header block.
2157 for (BasicBlock *BB : TheLoop->blocks()) {
2158 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
2159 assert(R && "Must be able to predicate block when tail-folding.");
2160 }
2161}
2162
2163} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has uncountable early exits, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:151
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:502
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1440
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:624
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
iterator find(const KeyT &Key)
Definition MapVector.h:154
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:207
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:74
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:420
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:875
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.