LLVM 23.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
19#include "llvm/Analysis/Loads.h"
28#include "llvm/IR/Dominators.h"
33
34using namespace llvm;
35using namespace PatternMatch;
36
37#define LV_NAME "loop-vectorize"
38#define DEBUG_TYPE LV_NAME
39
40static cl::opt<bool>
41 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
42 cl::desc("Enable if-conversion during vectorization."));
43
44static cl::opt<bool>
45AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
46 cl::desc("Enable recognition of non-constant strided "
47 "pointer induction variables."));
48
49static cl::opt<bool>
50 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
51 cl::desc("Allow enabling loop hints to reorder "
52 "FP operations during vectorization."));
53
54// TODO: Move size-based thresholds out of legality checking, make cost based
55// decisions instead of hard thresholds.
57 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
58 cl::desc("The maximum number of SCEV checks allowed."));
59
61 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
62 cl::desc("The maximum number of SCEV checks allowed with a "
63 "vectorize(enable) pragma"));
64
67 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
69 cl::desc("Control whether the compiler can use scalable vectors to "
70 "vectorize a loop"),
73 "Scalable vectorization is disabled."),
76 "Scalable vectorization is available and favored when the "
77 "cost is inconclusive."),
80 "Scalable vectorization is available and favored when the "
81 "cost is inconclusive."),
84 "Scalable vectorization is available and always favored when "
85 "feasible")));
86
88 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
89 cl::desc("Enables autovectorization of some loops containing histograms"));
90
91/// Maximum vectorization interleave count.
92static const unsigned MaxInterleaveFactor = 16;
93
94namespace llvm {
95
96bool LoopVectorizeHints::Hint::validate(unsigned Val) {
97 switch (Kind) {
98 case HK_WIDTH:
100 case HK_INTERLEAVE:
101 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
102 case HK_FORCE:
103 return (Val <= 1);
104 case HK_ISVECTORIZED:
105 case HK_PREDICATE:
106 case HK_SCALABLE:
107 return (Val == 0 || Val == 1);
108 }
109 return false;
110}
111
113 bool InterleaveOnlyWhenForced,
116 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
117 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
118 Force("vectorize.enable", FK_Undefined, HK_FORCE),
119 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
120 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
121 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
122 TheLoop(L), ORE(ORE) {
123 // Populate values with existing loop metadata.
124 getHintsFromMetadata();
125
126 // force-vector-interleave overrides DisableInterleaving.
129
130 // If the metadata doesn't explicitly specify whether to enable scalable
131 // vectorization, then decide based on the following criteria (increasing
132 // level of priority):
133 // - Target default
134 // - Metadata width
135 // - Force option (always overrides)
137 if (TTI)
138 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
140
141 if (Width.Value)
142 // If the width is set, but the metadata says nothing about the scalable
143 // property, then assume it concerns only a fixed-width UserVF.
144 // If width is not set, the flag takes precedence.
145 Scalable.Value = SK_FixedWidthOnly;
146 }
147
148 // If the flag is set to force any use of scalable vectors, override the loop
149 // hints.
150 if (ForceScalableVectorization.getValue() !=
152 Scalable.Value = ForceScalableVectorization.getValue();
153
154 // Scalable vectorization is disabled if no preference is specified.
156 Scalable.Value = SK_FixedWidthOnly;
157
158 if (IsVectorized.Value != 1)
159 // If the vectorization width and interleaving count are both 1 then
160 // consider the loop to have been already vectorized because there's
161 // nothing more that we can do.
162 IsVectorized.Value =
164 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
165 << "LV: Interleaving disabled by the pass manager\n");
166}
167
169 LLVMContext &Context = TheLoop->getHeader()->getContext();
170
171 MDNode *IsVectorizedMD = MDNode::get(
172 Context,
173 {MDString::get(Context, "llvm.loop.isvectorized"),
174 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
175 MDNode *LoopID = TheLoop->getLoopID();
176 MDNode *NewLoopID =
177 makePostTransformationMetadata(Context, LoopID,
178 {Twine(Prefix(), "vectorize.").str(),
179 Twine(Prefix(), "interleave.").str()},
180 {IsVectorizedMD});
181 TheLoop->setLoopID(NewLoopID);
182
183 // Update internal cache.
184 IsVectorized.Value = 1;
185}
186
187void LoopVectorizeHints::reportDisallowedVectorization(
188 const StringRef DebugMsg, const StringRef RemarkName,
189 const StringRef RemarkMsg, const Loop *L) const {
190 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
191 ORE.emit(OptimizationRemarkMissed(LV_NAME, RemarkName, L->getStartLoc(),
192 L->getHeader())
193 << "loop not vectorized: " << RemarkMsg);
194}
195
197 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
199 if (Force.Value == LoopVectorizeHints::FK_Disabled) {
200 reportDisallowedVectorization("#pragma vectorize disable",
201 "MissedExplicitlyDisabled",
202 "vectorization is explicitly disabled", L);
203 } else if (hasDisableAllTransformsHint(L)) {
204 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
205 "MissedTransformsDisabled",
206 "loop transformations are disabled", L);
207 } else {
208 llvm_unreachable("loop vect disabled for an unknown reason");
209 }
210 return false;
211 }
212
213 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
214 reportDisallowedVectorization(
215 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
216 "MissedForceOnly", "only vectorizing loops that explicitly request it",
217 L);
218 return false;
219 }
220
221 if (getIsVectorized() == 1) {
222 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
223 // FIXME: Add interleave.disable metadata. This will allow
224 // vectorize.disable to be used without disabling the pass and errors
225 // to differentiate between disabled vectorization and a width of 1.
226 ORE.emit([&]() {
228 "AllDisabled", L->getStartLoc(),
229 L->getHeader())
230 << "loop not vectorized: vectorization and interleaving are "
231 "explicitly disabled, or the loop has already been "
232 "vectorized";
233 });
234 return false;
235 }
236
237 return true;
238}
239
241 using namespace ore;
242
243 ORE.emit([&]() {
244 if (Force.Value == LoopVectorizeHints::FK_Disabled)
245 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
246 TheLoop->getStartLoc(),
247 TheLoop->getHeader())
248 << "loop not vectorized: vectorization is explicitly disabled";
249
250 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
251 TheLoop->getHeader());
252 R << "loop not vectorized";
253 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
254 R << " (Force=" << NV("Force", true);
255 if (Width.Value != 0)
256 R << ", Vector Width=" << NV("VectorWidth", getWidth());
257 if (getInterleave() != 0)
258 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
259 R << ")";
260 }
261 return R;
262 });
263}
264
274
276 // Allow the vectorizer to change the order of operations if enabling
277 // loop hints are provided
278 ElementCount EC = getWidth();
279 return HintsAllowReordering &&
281 EC.getKnownMinValue() > 1);
282}
283
284void LoopVectorizeHints::getHintsFromMetadata() {
285 MDNode *LoopID = TheLoop->getLoopID();
286 if (!LoopID)
287 return;
288
289 // First operand should refer to the loop id itself.
290 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
291 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
292
293 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
294 const MDString *S = nullptr;
296
297 // The expected hint is either a MDString or a MDNode with the first
298 // operand a MDString.
299 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
300 if (!MD || MD->getNumOperands() == 0)
301 continue;
302 S = dyn_cast<MDString>(MD->getOperand(0));
303 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
304 Args.push_back(MD->getOperand(Idx));
305 } else {
306 S = dyn_cast<MDString>(MDO);
307 assert(Args.size() == 0 && "too many arguments for MDString");
308 }
309
310 if (!S)
311 continue;
312
313 // Check if the hint starts with the loop metadata prefix.
314 StringRef Name = S->getString();
315 if (Args.size() == 1)
316 setHint(Name, Args[0]);
317 }
318}
319
320void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
321 if (!Name.consume_front(Prefix()))
322 return;
323
324 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
325 if (!C)
326 return;
327 unsigned Val = C->getZExtValue();
328
329 Hint *Hints[] = {&Width, &Interleave, &Force,
330 &IsVectorized, &Predicate, &Scalable};
331 for (auto *H : Hints) {
332 if (Name == H->Name) {
333 if (H->validate(Val))
334 H->Value = Val;
335 else
336 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
337 break;
338 }
339 }
340}
341
342// Return true if the inner loop \p Lp is uniform with regard to the outer loop
343// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
344// executing the inner loop will execute the same iterations). This check is
345// very constrained for now but it will be relaxed in the future. \p Lp is
346// considered uniform if it meets all the following conditions:
347// 1) it has a canonical IV (starting from 0 and with stride 1),
348// 2) its latch terminator is a conditional branch and,
349// 3) its latch condition is a compare instruction whose operands are the
350// canonical IV and an OuterLp invariant.
351// This check doesn't take into account the uniformity of other conditions not
352// related to the loop latch because they don't affect the loop uniformity.
353//
354// NOTE: We decided to keep all these checks and its associated documentation
355// together so that we can easily have a picture of the current supported loop
356// nests. However, some of the current checks don't depend on \p OuterLp and
357// would be redundantly executed for each \p Lp if we invoked this function for
358// different candidate outer loops. This is not the case for now because we
359// don't currently have the infrastructure to evaluate multiple candidate outer
360// loops and \p OuterLp will be a fixed parameter while we only support explicit
361// outer loop vectorization. It's also very likely that these checks go away
362// before introducing the aforementioned infrastructure. However, if this is not
363// the case, we should move the \p OuterLp independent checks to a separate
364// function that is only executed once for each \p Lp.
365static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
366 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
367
368 // If Lp is the outer loop, it's uniform by definition.
369 if (Lp == OuterLp)
370 return true;
371 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
372
373 // 1.
375 if (!IV) {
376 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
377 return false;
378 }
379
380 // 2.
381 BasicBlock *Latch = Lp->getLoopLatch();
382 auto *LatchBr = dyn_cast<CondBrInst>(Latch->getTerminator());
383 if (!LatchBr) {
384 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
385 return false;
386 }
387
388 // 3.
389 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
390 if (!LatchCmp) {
392 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
393 return false;
394 }
395
396 Value *CondOp0 = LatchCmp->getOperand(0);
397 Value *CondOp1 = LatchCmp->getOperand(1);
398 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
399 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
400 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
401 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
402 return false;
403 }
404
405 return true;
406}
407
408// Return true if \p Lp and all its nested loops are uniform with regard to \p
409// OuterLp.
410static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
411 if (!isUniformLoop(Lp, OuterLp))
412 return false;
413
414 // Check if nested loops are uniform.
415 for (Loop *SubLp : *Lp)
416 if (!isUniformLoopNest(SubLp, OuterLp))
417 return false;
418
419 return true;
420}
421
423 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
424
425 if (Ty->isPointerTy())
426 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
427
428 // It is possible that char's or short's overflow when we ask for the loop's
429 // trip count, work around this by changing the type size.
430 if (Ty->getScalarSizeInBits() < 32)
431 return Type::getInt32Ty(Ty->getContext());
432
433 return cast<IntegerType>(Ty);
434}
435
437 Type *Ty1) {
440 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
441}
442
443/// Check that the instruction has outside loop users and is not an
444/// identified reduction variable.
445static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
446 SmallPtrSetImpl<Value *> &AllowedExit) {
447 // Reductions, Inductions and non-header phis are allowed to have exit users. All
448 // other instructions must not have external users.
449 if (!AllowedExit.count(Inst))
450 // Check that all of the users of the loop are inside the BB.
451 for (User *U : Inst->users()) {
453 // This user may be a reduction exit value.
454 if (!TheLoop->contains(UI)) {
455 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
456 return true;
457 }
458 }
459 return false;
460}
461
462/// Returns true if A and B have same pointer operands or same SCEVs addresses
464 StoreInst *B) {
465 // Compare store
466 if (A == B)
467 return true;
468
469 // Otherwise Compare pointers
470 Value *APtr = A->getPointerOperand();
471 Value *BPtr = B->getPointerOperand();
472 if (APtr == BPtr)
473 return true;
474
475 // Otherwise compare address SCEVs
476 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
477}
478
480 Value *Ptr) const {
481 // FIXME: Currently, the set of symbolic strides is sometimes queried before
482 // it's collected. This happens from canVectorizeWithIfConvert, when the
483 // pointer is checked to reference consecutive elements suitable for a
484 // masked access.
485 const auto &Strides =
486 LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
487
488 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
489 AllowRuntimeSCEVChecks, false)
490 .value_or(0);
491 if (Stride == 1 || Stride == -1)
492 return Stride;
493 return 0;
494}
495
497 return LAI->isInvariant(V);
498}
499
500namespace {
501/// A rewriter to build the SCEVs for each of the VF lanes in the expected
502/// vectorized loop, which can then be compared to detect their uniformity. This
503/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
504/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
505/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
506/// uniformity.
507class SCEVAddRecForUniformityRewriter
508 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
509 /// Multiplier to be applied to the step of AddRecs in TheLoop.
510 unsigned StepMultiplier;
511
512 /// Offset to be added to the AddRecs in TheLoop.
513 unsigned Offset;
514
515 /// Loop for which to rewrite AddRecsFor.
516 Loop *TheLoop;
517
518 /// Is any sub-expressions not analyzable w.r.t. uniformity?
519 bool CannotAnalyze = false;
520
521 bool canAnalyze() const { return !CannotAnalyze; }
522
523public:
524 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
525 unsigned Offset, Loop *TheLoop)
526 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
527 TheLoop(TheLoop) {}
528
529 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
530 assert(Expr->getLoop() == TheLoop &&
531 "addrec outside of TheLoop must be invariant and should have been "
532 "handled earlier");
533 // Build a new AddRec by multiplying the step by StepMultiplier and
534 // incrementing the start by Offset * step.
535 Type *Ty = Expr->getType();
536 const SCEV *Step = Expr->getStepRecurrence(SE);
537 if (!SE.isLoopInvariant(Step, TheLoop)) {
538 CannotAnalyze = true;
539 return Expr;
540 }
541 const SCEV *NewStep =
542 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
543 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
544 const SCEV *NewStart =
545 SE.getAddExpr(Expr->getStart(), SCEVUse(ScaledOffset));
546 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
547 }
548
549 const SCEV *visit(const SCEV *S) {
550 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
551 return S;
553 }
554
555 const SCEV *visitUnknown(const SCEVUnknown *S) {
556 if (SE.isLoopInvariant(S, TheLoop))
557 return S;
558 // The value could vary across iterations.
559 CannotAnalyze = true;
560 return S;
561 }
562
563 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
564 // Could not analyze the expression.
565 CannotAnalyze = true;
566 return S;
567 }
568
569 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
570 unsigned StepMultiplier, unsigned Offset,
571 Loop *TheLoop) {
572 /// Bail out if the expression does not contain an UDiv expression.
573 /// Uniform values which are not loop invariant require operations to strip
574 /// out the lowest bits. For now just look for UDivs and use it to avoid
575 /// re-writing UDIV-free expressions for other lanes to limit compile time.
576 if (!SCEVExprContains(S,
577 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
578 return SE.getCouldNotCompute();
579
580 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
581 TheLoop);
582 const SCEV *Result = Rewriter.visit(S);
583
584 if (Rewriter.canAnalyze())
585 return Result;
586 return SE.getCouldNotCompute();
587 }
588};
589
590} // namespace
591
593 if (isInvariant(V))
594 return true;
595 if (VF.isScalable())
596 return false;
597 if (VF.isScalar())
598 return true;
599
600 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
601 // never considered uniform.
602 auto *SE = PSE.getSE();
603 if (!SE->isSCEVable(V->getType()))
604 return false;
605 const SCEV *S = SE->getSCEV(V);
606
607 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
608 // lane 0 matches the expressions for all other lanes.
609 unsigned FixedVF = VF.getKnownMinValue();
610 const SCEV *FirstLaneExpr =
611 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
612 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
613 return false;
614
615 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
616 // lane 0. We check lanes in reverse order for compile-time, as frequently
617 // checking the last lane is sufficient to rule out uniformity.
618 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
619 const SCEV *IthLaneExpr =
620 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
621 return FirstLaneExpr == IthLaneExpr;
622 });
623}
624
626 ElementCount VF) const {
628 if (!Ptr)
629 return false;
630 // Note: There's nothing inherent which prevents predicated loads and
631 // stores from being uniform. The current lowering simply doesn't handle
632 // it; in particular, the cost model distinguishes scatter/gather from
633 // scalar w/predication, and we currently rely on the scalar path.
634 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
635}
636
637bool LoopVectorizationLegality::canVectorizeOuterLoop() {
638 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
639 // Store the result and return it at the end instead of exiting early, in case
640 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
641 bool Result = true;
642 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
643
644 for (BasicBlock *BB : TheLoop->blocks()) {
645 // Check whether the BB terminator is a branch. Any other terminator is
646 // not supported yet.
647 Instruction *Term = BB->getTerminator();
649 reportVectorizationFailure("Unsupported basic block terminator",
650 "loop control flow is not understood by vectorizer",
651 "CFGNotUnderstood", ORE, TheLoop);
652 if (DoExtraAnalysis)
653 Result = false;
654 else
655 return false;
656 }
657
658 // Check whether the branch is a supported one. Only unconditional
659 // branches, conditional branches with an outer loop invariant condition or
660 // backedges are supported.
661 // FIXME: We skip these checks when VPlan predication is enabled as we
662 // want to allow divergent branches. This whole check will be removed
663 // once VPlan predication is on by default.
664 auto *Br = dyn_cast<CondBrInst>(Term);
665 if (Br && !TheLoop->isLoopInvariant(Br->getCondition()) &&
666 !LI->isLoopHeader(Br->getSuccessor(0)) &&
667 !LI->isLoopHeader(Br->getSuccessor(1))) {
668 reportVectorizationFailure("Unsupported conditional branch",
669 "loop control flow is not understood by vectorizer",
670 "CFGNotUnderstood", ORE, TheLoop);
671 if (DoExtraAnalysis)
672 Result = false;
673 else
674 return false;
675 }
676 }
677
678 // Check whether inner loops are uniform. At this point, we only support
679 // simple outer loops scenarios with uniform nested loops.
680 if (!isUniformLoopNest(TheLoop /*loop nest*/,
681 TheLoop /*context outer loop*/)) {
682 reportVectorizationFailure("Outer loop contains divergent loops",
683 "loop control flow is not understood by vectorizer",
684 "CFGNotUnderstood", ORE, TheLoop);
685 if (DoExtraAnalysis)
686 Result = false;
687 else
688 return false;
689 }
690
691 // Check whether we are able to set up outer loop induction.
692 if (!setupOuterLoopInductions()) {
693 reportVectorizationFailure("Unsupported outer loop Phi(s)",
694 "UnsupportedPhi", ORE, TheLoop);
695 if (DoExtraAnalysis)
696 Result = false;
697 else
698 return false;
699 }
700
701 return Result;
702}
703
704void LoopVectorizationLegality::addInductionPhi(
705 PHINode *Phi, const InductionDescriptor &ID,
706 SmallPtrSetImpl<Value *> &AllowedExit) {
707 Inductions[Phi] = ID;
708
709 // In case this induction also comes with casts that we know we can ignore
710 // in the vectorized loop body, record them here. All casts could be recorded
711 // here for ignoring, but suffices to record only the first (as it is the
712 // only one that may bw used outside the cast sequence).
713 ArrayRef<Instruction *> Casts = ID.getCastInsts();
714 if (!Casts.empty())
715 InductionCastsToIgnore.insert(*Casts.begin());
716
717 Type *PhiTy = Phi->getType();
718 const DataLayout &DL = Phi->getDataLayout();
719
720 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
721 "Expected int, ptr, or FP induction phi type");
722
723 // Get the widest type.
724 if (PhiTy->isIntOrPtrTy()) {
725 if (!WidestIndTy)
726 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
727 else
728 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
729 }
730
731 // Int inductions are special because we only allow one IV.
732 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
733 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
734 isa<Constant>(ID.getStartValue()) &&
735 cast<Constant>(ID.getStartValue())->isNullValue()) {
736
737 // Use the phi node with the widest type as induction. Use the last
738 // one if there are multiple (no good reason for doing this other
739 // than it is expedient). We've checked that it begins at zero and
740 // steps by one, so this is a canonical induction variable.
741 if (!PrimaryInduction || PhiTy == WidestIndTy)
742 PrimaryInduction = Phi;
743 }
744
745 // Both the PHI node itself, and the "post-increment" value feeding
746 // back into the PHI node may have external users.
747 // We can allow those uses, except if the SCEVs we have for them rely
748 // on predicates that only hold within the loop, since allowing the exit
749 // currently means re-using this SCEV outside the loop (see PR33706 for more
750 // details).
751 if (PSE.getPredicate().isAlwaysTrue()) {
752 AllowedExit.insert(Phi);
753 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
754 }
755
756 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
757}
758
759bool LoopVectorizationLegality::setupOuterLoopInductions() {
760 BasicBlock *Header = TheLoop->getHeader();
761
762 // Returns true if a given Phi is a supported induction.
763 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
764 InductionDescriptor ID;
765 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
767 addInductionPhi(&Phi, ID, AllowedExit);
768 return true;
769 }
770 // Bail out for any Phi in the outer loop header that is not a supported
771 // induction.
773 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
774 return false;
775 };
776
777 return llvm::all_of(Header->phis(), IsSupportedPhi);
778}
779
780/// Checks if a function is scalarizable according to the TLI, in
781/// the sense that it should be vectorized and then expanded in
782/// multiple scalar calls. This is represented in the
783/// TLI via mappings that do not specify a vector name, as in the
784/// following example:
785///
786/// const VecDesc VecIntrinsics[] = {
787/// {"llvm.phx.abs.i32", "", 4}
788/// };
789static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
790 const StringRef ScalarName = CI.getCalledFunction()->getName();
791 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
792 // Check that all known VFs are not associated to a vector
793 // function, i.e. the vector name is emty.
794 if (Scalarize) {
795 ElementCount WidestFixedVF, WidestScalableVF;
796 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
798 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
799 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
801 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
802 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
803 assert((WidestScalableVF.isZero() || !Scalarize) &&
804 "Caller may decide to scalarize a variant using a scalable VF");
805 }
806 return Scalarize;
807}
808
809bool LoopVectorizationLegality::canVectorizeInstrs() {
810 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
811 bool Result = true;
812
813 // For each block in the loop.
814 for (BasicBlock *BB : TheLoop->blocks()) {
815 // Scan the instructions in the block and look for hazards.
816 for (Instruction &I : *BB) {
817 Result &= canVectorizeInstr(I);
818 if (!DoExtraAnalysis && !Result)
819 return false;
820 }
821 }
822
823 if (!PrimaryInduction) {
824 if (Inductions.empty()) {
826 "Did not find one integer induction var",
827 "loop induction variable could not be identified",
828 "NoInductionVariable", ORE, TheLoop);
829 return false;
830 }
831 if (!WidestIndTy) {
833 "Did not find one integer induction var",
834 "integer loop induction variable could not be identified",
835 "NoIntegerInductionVariable", ORE, TheLoop);
836 return false;
837 }
838 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
839 }
840
841 // Now we know the widest induction type, check if our found induction
842 // is the same size. If it's not, unset it here and InnerLoopVectorizer
843 // will create another.
844 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
845 PrimaryInduction = nullptr;
846
847 return Result;
848}
849
850bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
851 BasicBlock *BB = I.getParent();
852 BasicBlock *Header = TheLoop->getHeader();
853
854 if (auto *Phi = dyn_cast<PHINode>(&I)) {
855 Type *PhiTy = Phi->getType();
856 // Check that this PHI type is allowed.
857 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
858 !PhiTy->isPointerTy()) {
860 "Found a non-int non-pointer PHI",
861 "loop control flow is not understood by vectorizer",
862 "CFGNotUnderstood", ORE, TheLoop);
863 return false;
864 }
865
866 // If this PHINode is not in the header block, then we know that we
867 // can convert it to select during if-conversion. No need to check if
868 // the PHIs in this block are induction or reduction variables.
869 if (BB != Header) {
870 // Non-header phi nodes that have outside uses can be vectorized. Add
871 // them to the list of allowed exits.
872 // Unsafe cyclic dependencies with header phis are identified during
873 // legalization for reduction, induction and fixed order
874 // recurrences.
875 AllowedExit.insert(&I);
876 return true;
877 }
878
879 // We only allow if-converted PHIs with exactly two incoming values.
880 if (Phi->getNumIncomingValues() != 2) {
882 "Found an invalid PHI",
883 "loop control flow is not understood by vectorizer",
884 "CFGNotUnderstood", ORE, TheLoop, Phi);
885 return false;
886 }
887
888 RecurrenceDescriptor RedDes;
889 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
890 PSE.getSE())) {
891 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
892 AllowedExit.insert(RedDes.getLoopExitInstr());
893 Reductions[Phi] = std::move(RedDes);
896 RedDes.getRecurrenceKind())) &&
897 "Only min/max recurrences are allowed to have multiple uses "
898 "currently");
899 return true;
900 }
901
902 // We prevent matching non-constant strided pointer IVS to preserve
903 // historical vectorizer behavior after a generalization of the
904 // IVDescriptor code. The intent is to remove this check, but we
905 // have to fix issues around code quality for such loops first.
906 auto IsDisallowedStridedPointerInduction =
907 [](const InductionDescriptor &ID) {
909 return false;
910 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
911 ID.getConstIntStepValue() == nullptr;
912 };
913
914 // TODO: Instead of recording the AllowedExit, it would be good to
915 // record the complementary set: NotAllowedExit. These include (but may
916 // not be limited to):
917 // 1. Reduction phis as they represent the one-before-last value, which
918 // is not available when vectorized
919 // 2. Induction phis and increment when SCEV predicates cannot be used
920 // outside the loop - see addInductionPhi
921 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
922 // outside the loop - see call to hasOutsideLoopUser in the non-phi
923 // handling below
924 // 4. FixedOrderRecurrence phis that can possibly be handled by
925 // extraction.
926 // By recording these, we can then reason about ways to vectorize each
927 // of these NotAllowedExit.
928 InductionDescriptor ID;
929 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
930 !IsDisallowedStridedPointerInduction(ID)) {
931 addInductionPhi(Phi, ID, AllowedExit);
932 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
933 return true;
934 }
935
936 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
937 AllowedExit.insert(Phi);
938 FixedOrderRecurrences.insert(Phi);
939 return true;
940 }
941
942 // As a last resort, coerce the PHI to a AddRec expression
943 // and re-try classifying it a an induction PHI.
944 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
945 !IsDisallowedStridedPointerInduction(ID)) {
946 addInductionPhi(Phi, ID, AllowedExit);
947 return true;
948 }
949
950 reportVectorizationFailure("Found an unidentified PHI",
951 "value that could not be identified as "
952 "reduction is used outside the loop",
953 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
954 Phi);
955 return false;
956 } // end of PHI handling
957
958 // We handle calls that:
959 // * Have a mapping to an IR intrinsic.
960 // * Have a vector version available.
961 auto *CI = dyn_cast<CallInst>(&I);
962
963 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
964 !(CI->getCalledFunction() && TLI &&
965 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
966 // If the call is a recognized math libary call, it is likely that
967 // we can vectorize it given loosened floating-point constraints.
968 LibFunc Func;
969 bool IsMathLibCall =
970 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
971 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
972 TLI->hasOptimizedCodeGen(Func);
973
974 if (IsMathLibCall) {
975 // TODO: Ideally, we should not use clang-specific language here,
976 // but it's hard to provide meaningful yet generic advice.
977 // Also, should this be guarded by allowExtraAnalysis() and/or be part
978 // of the returned info from isFunctionVectorizable()?
980 "Found a non-intrinsic callsite",
981 "library call cannot be vectorized. "
982 "Try compiling with -fno-math-errno, -ffast-math, "
983 "or similar flags",
984 "CantVectorizeLibcall", ORE, TheLoop, CI);
985 } else {
986 reportVectorizationFailure("Found a non-intrinsic callsite",
987 "call instruction cannot be vectorized",
988 "CantVectorizeLibcall", ORE, TheLoop, CI);
989 }
990 return false;
991 }
992
993 // Some intrinsics have scalar arguments and should be same in order for
994 // them to be vectorized (i.e. loop invariant).
995 if (CI) {
996 auto *SE = PSE.getSE();
997 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
998 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
999 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
1000 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
1002 "Found unvectorizable intrinsic",
1003 "intrinsic instruction cannot be vectorized",
1004 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
1005 return false;
1006 }
1007 }
1008 }
1009
1010 // If we found a vectorized variant of a function, note that so LV can
1011 // make better decisions about maximum VF.
1012 if (CI && !VFDatabase::getMappings(*CI).empty())
1013 VecCallVariantsFound = true;
1014
1015 auto CanWidenInstructionTy = [](Instruction const &Inst) {
1016 Type *InstTy = Inst.getType();
1017 if (!isa<StructType>(InstTy))
1018 return canVectorizeTy(InstTy);
1019
1020 // For now, we only recognize struct values returned from calls where
1021 // all users are extractvalue as vectorizable. All element types of the
1022 // struct must be types that can be widened.
1023 return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
1024 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1025 };
1026
1027 // Check that the instruction return type is vectorizable.
1028 // We can't vectorize casts from vector type to scalar type.
1029 // Also, we can't vectorize extractelement instructions.
1030 if (!CanWidenInstructionTy(I) ||
1031 (isa<CastInst>(I) &&
1032 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1034 reportVectorizationFailure("Found unvectorizable type",
1035 "instruction return type cannot be vectorized",
1036 "CantVectorizeInstructionReturnType", ORE,
1037 TheLoop, &I);
1038 return false;
1039 }
1040
1041 // Check that the stored type is vectorizable.
1042 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1043 Type *T = ST->getValueOperand()->getType();
1045 reportVectorizationFailure("Store instruction cannot be vectorized",
1046 "CantVectorizeStore", ORE, TheLoop, ST);
1047 return false;
1048 }
1049
1050 // For nontemporal stores, check that a nontemporal vector version is
1051 // supported on the target.
1052 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1053 // Arbitrarily try a vector of 2 elements.
1054 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1055 assert(VecTy && "did not find vectorized version of stored type");
1056 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1058 "nontemporal store instruction cannot be vectorized",
1059 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1060 return false;
1061 }
1062 }
1063
1064 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1065 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1066 // For nontemporal loads, check that a nontemporal vector version is
1067 // supported on the target (arbitrarily try a vector of 2 elements).
1068 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1069 assert(VecTy && "did not find vectorized version of load type");
1070 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1072 "nontemporal load instruction cannot be vectorized",
1073 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1074 return false;
1075 }
1076 }
1077
1078 // FP instructions can allow unsafe algebra, thus vectorizable by
1079 // non-IEEE-754 compliant SIMD units.
1080 // This applies to floating-point math operations and calls, not memory
1081 // operations, shuffles, or casts, as they don't change precision or
1082 // semantics.
1083 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1084 !I.isFast()) {
1085 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1086 Hints->setPotentiallyUnsafe();
1087 }
1088
1089 // Reduction instructions are allowed to have exit users.
1090 // All other instructions must not have external users.
1091 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1092 // We can safely vectorize loops where instructions within the loop are
1093 // used outside the loop only if the SCEV predicates within the loop is
1094 // same as outside the loop. Allowing the exit means reusing the SCEV
1095 // outside the loop.
1096 if (PSE.getPredicate().isAlwaysTrue()) {
1097 AllowedExit.insert(&I);
1098 return true;
1099 }
1100 reportVectorizationFailure("Value cannot be used outside the loop",
1101 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1102 return false;
1103 }
1104
1105 return true;
1106}
1107
1108/// Find histogram operations that match high-level code in loops:
1109/// \code
1110/// buckets[indices[i]]+=step;
1111/// \endcode
1112///
1113/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1114/// array the computed histogram. It uses a BinOp to sum all counts, storing
1115/// them using a loop-variant index Load from the 'indices' input array.
1116///
1117/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1118/// regardless of hardware support. When there is support, it additionally
1119/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1120/// used to update histogram in \p HistogramPtrs.
1121static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1122 const PredicatedScalarEvolution &PSE,
1123 SmallVectorImpl<HistogramInfo> &Histograms) {
1124
1125 // Store value must come from a Binary Operation.
1126 Instruction *HPtrInstr = nullptr;
1127 BinaryOperator *HBinOp = nullptr;
1128 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1129 return false;
1130
1131 // BinOp must be an Add or a Sub modifying the bucket value by a
1132 // loop invariant amount.
1133 // FIXME: We assume the loop invariant term is on the RHS.
1134 // Fine for an immediate/constant, but maybe not a generic value?
1135 Value *HIncVal = nullptr;
1136 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1137 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1138 return false;
1139
1140 // Make sure the increment value is loop invariant.
1141 if (!TheLoop->isLoopInvariant(HIncVal))
1142 return false;
1143
1144 // The address to store is calculated through a GEP Instruction.
1146 if (!GEP)
1147 return false;
1148
1149 // Restrict address calculation to constant indices except for the last term.
1150 Value *HIdx = nullptr;
1151 for (Value *Index : GEP->indices()) {
1152 if (HIdx)
1153 return false;
1154 if (!isa<ConstantInt>(Index))
1155 HIdx = Index;
1156 }
1157
1158 if (!HIdx)
1159 return false;
1160
1161 // Check that the index is calculated by loading from another array. Ignore
1162 // any extensions.
1163 // FIXME: Support indices from other sources than a linear load from memory?
1164 // We're currently trying to match an operation looping over an array
1165 // of indices, but there could be additional levels of indirection
1166 // in place, or possibly some additional calculation to form the index
1167 // from the loaded data.
1168 Value *VPtrVal;
1169 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1170 return false;
1171
1172 // Make sure the index address varies in this loop, not an outer loop.
1173 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1174 if (!AR || AR->getLoop() != TheLoop)
1175 return false;
1176
1177 // Ensure we'll have the same mask by checking that all parts of the histogram
1178 // (gather load, update, scatter store) are in the same block.
1179 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1180 BasicBlock *LdBB = IndexedLoad->getParent();
1181 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1182 return false;
1183
1184 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1185
1186 // Store the operations that make up the histogram.
1187 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1188 return true;
1189}
1190
1191bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1192 // For now, we only support an IndirectUnsafe dependency that calculates
1193 // a histogram
1195 return false;
1196
1197 // Find a single IndirectUnsafe dependency.
1198 const MemoryDepChecker::Dependence *IUDep = nullptr;
1199 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1200 const auto *Deps = DepChecker.getDependences();
1201 // If there were too many dependences, LAA abandons recording them. We can't
1202 // proceed safely if we don't know what the dependences are.
1203 if (!Deps)
1204 return false;
1205
1206 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1207 // Ignore dependencies that are either known to be safe or can be
1208 // checked at runtime.
1211 continue;
1212
1213 // We're only interested in IndirectUnsafe dependencies here, where the
1214 // address might come from a load from memory. We also only want to handle
1215 // one such dependency, at least for now.
1216 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1217 return false;
1218
1219 IUDep = &Dep;
1220 }
1221 if (!IUDep)
1222 return false;
1223
1224 // For now only normal loads and stores are supported.
1225 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1226 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1227
1228 if (!LI || !SI)
1229 return false;
1230
1231 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1232 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1233}
1234
1235bool LoopVectorizationLegality::canVectorizeMemory() {
1236 LAI = &LAIs.getInfo(*TheLoop);
1237 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1238 if (LAR) {
1239 ORE->emit([&]() {
1240 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1241 "loop not vectorized: ", *LAR);
1242 });
1243 }
1244
1245 if (!LAI->canVectorizeMemory()) {
1248 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1249 "side effects",
1250 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1251 TheLoop);
1252 return false;
1253 }
1254
1255 return canVectorizeIndirectUnsafeDependences();
1256 }
1257
1258 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1259 reportVectorizationFailure("We don't allow storing to uniform addresses",
1260 "write to a loop invariant address could not "
1261 "be vectorized",
1262 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1263 TheLoop);
1264 return false;
1265 }
1266
1267 // We can vectorize stores to invariant address when final reduction value is
1268 // guaranteed to be stored at the end of the loop. Also, if decision to
1269 // vectorize loop is made, runtime checks are added so as to make sure that
1270 // invariant address won't alias with any other objects.
1271 if (!LAI->getStoresToInvariantAddresses().empty()) {
1272 // For each invariant address, check if last stored value is unconditional
1273 // and the address is not calculated inside the loop.
1274 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1276 continue;
1277
1278 if (blockNeedsPredication(SI->getParent())) {
1280 "We don't allow storing to uniform addresses",
1281 "write of conditional recurring variant value to a loop "
1282 "invariant address could not be vectorized",
1283 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1284 return false;
1285 }
1286
1287 // Invariant address should be defined outside of loop. LICM pass usually
1288 // makes sure it happens, but in rare cases it does not, we do not want
1289 // to overcomplicate vectorization to support this case.
1290 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1291 if (TheLoop->contains(Ptr)) {
1293 "Invariant address is calculated inside the loop",
1294 "write to a loop invariant address could not "
1295 "be vectorized",
1296 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1297 return false;
1298 }
1299 }
1300 }
1301
1302 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1303 // For each invariant address, check its last stored value is the result
1304 // of one of our reductions.
1305 //
1306 // We do not check if dependence with loads exists because that is already
1307 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1308 ScalarEvolution *SE = PSE.getSE();
1309 SmallVector<StoreInst *, 4> UnhandledStores;
1310 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1312 // Earlier stores to this address are effectively deadcode.
1313 // With opaque pointers it is possible for one pointer to be used with
1314 // different sizes of stored values:
1315 // store i32 0, ptr %x
1316 // store i8 0, ptr %x
1317 // The latest store doesn't complitely overwrite the first one in the
1318 // example. That is why we have to make sure that types of stored
1319 // values are same.
1320 // TODO: Check that bitwidth of unhandled store is smaller then the
1321 // one that overwrites it and add a test.
1322 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1323 return storeToSameAddress(SE, SI, I) &&
1324 I->getValueOperand()->getType() ==
1325 SI->getValueOperand()->getType();
1326 });
1327 continue;
1328 }
1329 UnhandledStores.push_back(SI);
1330 }
1331
1332 bool IsOK = UnhandledStores.empty();
1333 // TODO: we should also validate against InvariantMemSets.
1334 if (!IsOK) {
1336 "We don't allow storing to uniform addresses",
1337 "write to a loop invariant address could not "
1338 "be vectorized",
1339 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1340 return false;
1341 }
1342 }
1343 }
1344
1345 PSE.addPredicate(LAI->getPSE().getPredicate());
1346 return true;
1347}
1348
1350 bool EnableStrictReductions) {
1351
1352 // First check if there is any ExactFP math or if we allow reassociations
1353 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1354 return true;
1355
1356 // If the above is false, we have ExactFPMath & do not allow reordering.
1357 // If the EnableStrictReductions flag is set, first check if we have any
1358 // Exact FP induction vars, which we cannot vectorize.
1359 if (!EnableStrictReductions ||
1360 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1361 InductionDescriptor IndDesc = Induction.second;
1362 return IndDesc.getExactFPMathInst();
1363 }))
1364 return false;
1365
1366 // We can now only vectorize if all reductions with Exact FP math also
1367 // have the isOrdered flag set, which indicates that we can move the
1368 // reduction operations in-loop.
1369 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1370 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1371 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1372 }));
1373}
1374
1376 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1377 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1378 return RdxDesc.IntermediateStore == SI;
1379 });
1380}
1381
1383 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1384 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1385 if (!RdxDesc.IntermediateStore)
1386 return false;
1387
1388 ScalarEvolution *SE = PSE.getSE();
1389 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1390 return V == InvariantAddress ||
1391 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1392 });
1393}
1394
1396 Value *In0 = const_cast<Value *>(V);
1398 if (!PN)
1399 return false;
1400
1401 return Inductions.count(PN);
1402}
1403
1404const InductionDescriptor *
1406 if (!isInductionPhi(Phi))
1407 return nullptr;
1408 auto &ID = getInductionVars().find(Phi)->second;
1409 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1411 return &ID;
1412 return nullptr;
1413}
1414
1415const InductionDescriptor *
1417 if (!isInductionPhi(Phi))
1418 return nullptr;
1419 auto &ID = getInductionVars().find(Phi)->second;
1421 return &ID;
1422 return nullptr;
1423}
1424
1426 const Value *V) const {
1427 auto *Inst = dyn_cast<Instruction>(V);
1428 return (Inst && InductionCastsToIgnore.count(Inst));
1429}
1430
1434
1436 const PHINode *Phi) const {
1437 return FixedOrderRecurrences.count(Phi);
1438}
1439
1441 const BasicBlock *BB) const {
1442 // When vectorizing early exits, create predicates for the latch block only.
1443 // For a single early exit, it must be a direct predecessor of the latch.
1444 // For multiple early exits, they form a chain where each exiting block
1445 // dominates all subsequent blocks up to the latch.
1446 BasicBlock *Latch = TheLoop->getLoopLatch();
1448 return BB == Latch;
1449 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1450}
1451
1452bool LoopVectorizationLegality::blockCanBePredicated(
1453 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1454 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1455 for (Instruction &I : *BB) {
1456 // We can predicate blocks with calls to assume, as long as we drop them in
1457 // case we flatten the CFG via predication.
1459 MaskedOp.insert(&I);
1460 continue;
1461 }
1462
1463 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1464 // TODO: there might be cases that it should block the vectorization. Let's
1465 // ignore those for now.
1467 continue;
1468
1469 // We can allow masked calls if there's at least one vector variant, even
1470 // if we end up scalarizing due to the cost model calculations.
1471 // TODO: Allow other calls if they have appropriate attributes... readonly
1472 // and argmemonly?
1473 if (CallInst *CI = dyn_cast<CallInst>(&I))
1475 MaskedOp.insert(CI);
1476 continue;
1477 }
1478
1479 // Loads are handled via masking (or speculated if safe to do so.)
1480 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1481 if (!SafePtrs.count(LI->getPointerOperand()))
1482 MaskedOp.insert(LI);
1483 continue;
1484 }
1485
1486 // Predicated store requires some form of masking:
1487 // 1) masked store HW instruction,
1488 // 2) emulation via load-blend-store (only if safe and legal to do so,
1489 // be aware on the race conditions), or
1490 // 3) element-by-element predicate check and scalar store.
1491 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1492 MaskedOp.insert(SI);
1493 continue;
1494 }
1495
1496 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1497 return false;
1498 }
1499
1500 return true;
1501}
1502
1503bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1504 if (!EnableIfConversion) {
1505 reportVectorizationFailure("If-conversion is disabled",
1506 "IfConversionDisabled", ORE, TheLoop);
1507 return false;
1508 }
1509
1510 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1511
1512 // A list of pointers which are known to be dereferenceable within scope of
1513 // the loop body for each iteration of the loop which executes. That is,
1514 // the memory pointed to can be dereferenced (with the access size implied by
1515 // the value's type) unconditionally within the loop header without
1516 // introducing a new fault.
1517 SmallPtrSet<Value *, 8> SafePointers;
1518
1519 // Collect safe addresses.
1520 for (BasicBlock *BB : TheLoop->blocks()) {
1521 if (!blockNeedsPredication(BB)) {
1522 for (Instruction &I : *BB)
1523 if (auto *Ptr = getLoadStorePointerOperand(&I))
1524 SafePointers.insert(Ptr);
1525 continue;
1526 }
1527
1528 // For a block which requires predication, a address may be safe to access
1529 // in the loop w/o predication if we can prove dereferenceability facts
1530 // sufficient to ensure it'll never fault within the loop. For the moment,
1531 // we restrict this to loads; stores are more complicated due to
1532 // concurrency restrictions.
1533 ScalarEvolution &SE = *PSE.getSE();
1535 for (Instruction &I : *BB) {
1536 LoadInst *LI = dyn_cast<LoadInst>(&I);
1537
1538 // Make sure we can execute all computations feeding into Ptr in the loop
1539 // w/o triggering UB and that none of the out-of-loop operands are poison.
1540 // We do not need to check if operations inside the loop can produce
1541 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1542 // because flags will be dropped when executing them unconditionally.
1543 // TODO: Results could be improved by considering poison-propagation
1544 // properties of visited ops.
1545 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1546 SmallVector<Value *> Worklist = {Ptr};
1547 SmallPtrSet<Value *, 4> Visited;
1548 while (!Worklist.empty()) {
1549 Value *CurrV = Worklist.pop_back_val();
1550 if (!Visited.insert(CurrV).second)
1551 continue;
1552
1553 auto *CurrI = dyn_cast<Instruction>(CurrV);
1554 if (!CurrI || !TheLoop->contains(CurrI)) {
1555 // If operands from outside the loop may be poison then Ptr may also
1556 // be poison.
1557 if (!isGuaranteedNotToBePoison(CurrV, AC,
1558 TheLoop->getLoopPredecessor()
1559 ->getTerminator()
1560 ->getIterator(),
1561 DT))
1562 return false;
1563 continue;
1564 }
1565
1566 // A loaded value may be poison, independent of any flags.
1567 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1568 return false;
1569
1570 // For other ops, assume poison can only be introduced via flags,
1571 // which can be dropped.
1572 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1573 return false;
1574 append_range(Worklist, CurrI->operands());
1575 }
1576 return true;
1577 };
1578 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1579 // that it will consider loops that need guarding by SCEV checks. The
1580 // vectoriser will generate these checks if we decide to vectorise.
1581 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1582 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1583 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1584 &Predicates))
1585 SafePointers.insert(LI->getPointerOperand());
1586 Predicates.clear();
1587 }
1588 }
1589
1590 // Collect the blocks that need predication.
1591 for (BasicBlock *BB : TheLoop->blocks()) {
1592 // We support only branches and switch statements as terminators inside the
1593 // loop.
1594 if (isa<SwitchInst>(BB->getTerminator())) {
1595 if (TheLoop->isLoopExiting(BB)) {
1596 reportVectorizationFailure("Loop contains an unsupported switch",
1597 "LoopContainsUnsupportedSwitch", ORE,
1598 TheLoop, BB->getTerminator());
1599 return false;
1600 }
1601 } else if (!isa<UncondBrInst, CondBrInst>(BB->getTerminator())) {
1602 reportVectorizationFailure("Loop contains an unsupported terminator",
1603 "LoopContainsUnsupportedTerminator", ORE,
1604 TheLoop, BB->getTerminator());
1605 return false;
1606 }
1607
1608 // We must be able to predicate all blocks that need to be predicated.
1609 if (blockNeedsPredication(BB) &&
1610 !blockCanBePredicated(BB, SafePointers, ConditionallyExecutedOps)) {
1612 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1613 ORE, TheLoop, BB->getTerminator());
1614 return false;
1615 }
1616 }
1617
1618 // We can if-convert this loop.
1619 return true;
1620}
1621
1622// Helper function to canVectorizeLoopNestCFG.
1623bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1624 bool UseVPlanNativePath) {
1625 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1626 "VPlan-native path is not enabled.");
1627
1628 // TODO: ORE should be improved to show more accurate information when an
1629 // outer loop can't be vectorized because a nested loop is not understood or
1630 // legal. Something like: "outer_loop_location: loop not vectorized:
1631 // (inner_loop_location) loop control flow is not understood by vectorizer".
1632
1633 // Store the result and return it at the end instead of exiting early, in case
1634 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1635 bool Result = true;
1636 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1637
1638 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1639 // be canonicalized.
1640 if (!Lp->getLoopPreheader()) {
1641 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1642 "loop control flow is not understood by vectorizer",
1643 "CFGNotUnderstood", ORE, TheLoop);
1644 if (DoExtraAnalysis)
1645 Result = false;
1646 else
1647 return false;
1648 }
1649
1650 // We must have a single backedge.
1651 if (Lp->getNumBackEdges() != 1) {
1652 reportVectorizationFailure("The loop must have a single backedge",
1653 "loop control flow is not understood by vectorizer",
1654 "CFGNotUnderstood", ORE, TheLoop);
1655 if (DoExtraAnalysis)
1656 Result = false;
1657 else
1658 return false;
1659 }
1660
1661 // The latch must be terminated by a branch.
1662 BasicBlock *Latch = Lp->getLoopLatch();
1663 if (Latch && !isa<UncondBrInst, CondBrInst>(Latch->getTerminator())) {
1665 "The loop latch terminator is not a UncondBrInst/CondBrInst",
1666 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1667 ORE, TheLoop);
1668 if (DoExtraAnalysis)
1669 Result = false;
1670 else
1671 return false;
1672 }
1673
1674 return Result;
1675}
1676
1677bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1678 Loop *Lp, bool UseVPlanNativePath) {
1679 // Store the result and return it at the end instead of exiting early, in case
1680 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1681 bool Result = true;
1682 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1683 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1684 if (DoExtraAnalysis)
1685 Result = false;
1686 else
1687 return false;
1688 }
1689
1690 // Recursively check whether the loop control flow of nested loops is
1691 // understood.
1692 for (Loop *SubLp : *Lp)
1693 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1694 if (DoExtraAnalysis)
1695 Result = false;
1696 else
1697 return false;
1698 }
1699
1700 return Result;
1701}
1702
1703bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1704 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1705 if (!LatchBB) {
1706 reportVectorizationFailure("Loop does not have a latch",
1707 "Cannot vectorize early exit loop",
1708 "NoLatchEarlyExit", ORE, TheLoop);
1709 return false;
1710 }
1711
1712 if (Reductions.size() || FixedOrderRecurrences.size()) {
1714 "Found reductions or recurrences in early-exit loop",
1715 "Cannot vectorize early exit loop with reductions or recurrences",
1716 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1717 return false;
1718 }
1719
1720 SmallVector<BasicBlock *, 8> ExitingBlocks;
1721 TheLoop->getExitingBlocks(ExitingBlocks);
1722
1723 // Keep a record of all the exiting blocks.
1725 SmallVector<BasicBlock *> UncountableExitingBlocks;
1726 for (BasicBlock *BB : ExitingBlocks) {
1727 const SCEV *EC =
1728 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1729 if (isa<SCEVCouldNotCompute>(EC)) {
1730 if (size(successors(BB)) != 2) {
1732 "Early exiting block does not have exactly two successors",
1733 "Incorrect number of successors from early exiting block",
1734 "EarlyExitTooManySuccessors", ORE, TheLoop);
1735 return false;
1736 }
1737
1738 UncountableExitingBlocks.push_back(BB);
1739 } else
1740 CountableExitingBlocks.push_back(BB);
1741 }
1742 // We can safely ignore the predicates here because when vectorizing the loop
1743 // the PredicatatedScalarEvolution class will keep track of all predicates
1744 // for each exiting block anyway. This happens when calling
1745 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1746 Predicates.clear();
1747
1748 if (UncountableExitingBlocks.empty()) {
1749 LLVM_DEBUG(dbgs() << "LV: Could not find any uncountable exits");
1750 return false;
1751 }
1752
1753 // The latch block must have a countable exit.
1755 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1757 "Cannot determine exact exit count for latch block",
1758 "Cannot vectorize early exit loop",
1759 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1760 return false;
1761 }
1762 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1763 "Latch block not found in list of countable exits!");
1764
1765 // Check to see if there are instructions that could potentially generate
1766 // exceptions or have side-effects.
1767 auto IsSafeOperation = [](Instruction *I) -> bool {
1768 switch (I->getOpcode()) {
1769 case Instruction::Load:
1770 case Instruction::Store:
1771 case Instruction::PHI:
1772 case Instruction::UncondBr:
1773 case Instruction::CondBr:
1774 // These are checked separately.
1775 return true;
1776 default:
1778 }
1779 };
1780
1781 bool HasSideEffects = false;
1782 for (auto *BB : TheLoop->blocks())
1783 for (auto &I : *BB) {
1784 if (I.mayWriteToMemory()) {
1785 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1786 HasSideEffects = true;
1787 continue;
1788 }
1789
1790 // We don't support complex writes to memory.
1792 "Complex writes to memory unsupported in early exit loops",
1793 "Cannot vectorize early exit loop with complex writes to memory",
1794 "WritesInEarlyExitLoop", ORE, TheLoop);
1795 return false;
1796 }
1797
1798 if (!IsSafeOperation(&I)) {
1799 reportVectorizationFailure("Early exit loop contains operations that "
1800 "cannot be speculatively executed",
1801 "UnsafeOperationsEarlyExitLoop", ORE,
1802 TheLoop);
1803 return false;
1804 }
1805 }
1806
1807 SmallVector<LoadInst *, 4> NonDerefLoads;
1808 // TODO: Handle loops that may fault.
1809 if (!HasSideEffects) {
1810 // Read-only loop.
1811 Predicates.clear();
1812 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1813 &Predicates)) {
1815 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1816 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1817 return false;
1818 }
1819 } else {
1820 // Check all uncountable exiting blocks for movable loads.
1821 for (BasicBlock *ExitingBB : UncountableExitingBlocks) {
1822 if (!canUncountableExitConditionLoadBeMoved(ExitingBB))
1823 return false;
1824 }
1825 }
1826
1827 // Check non-dereferenceable loads if any.
1828 for (LoadInst *LI : NonDerefLoads) {
1829 // Only support unit-stride access for now.
1830 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1831 if (Stride != 1) {
1833 "Loop contains potentially faulting strided load",
1834 "Cannot vectorize early exit loop with "
1835 "strided fault-only-first load",
1836 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1837 return false;
1838 }
1839 }
1840
1841 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1842 PSE.getSymbolicMaxBackedgeTakenCount();
1843 // Since we have an exact exit count for the latch and the early exit
1844 // dominates the latch, then this should guarantee a computed SCEV value.
1845 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1846 "Failed to get symbolic expression for backedge taken count");
1847 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1848 "backedge taken count: "
1849 << *SymbolicMaxBTC << '\n');
1850 UncountableExitType = HasSideEffects ? UncountableExitTrait::ReadWrite
1852 return true;
1853}
1854
1855bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1856 BasicBlock *ExitingBlock) {
1857 // Try to find a load in the critical path for the uncountable exit condition.
1858 // This is currently matching about the simplest form we can, expecting
1859 // only one in-loop load, the result of which is directly compared against
1860 // a loop-invariant value.
1861 // FIXME: We're insisting on a single use for now, because otherwise we will
1862 // need to make PHI nodes for other users. That can be done once the initial
1863 // transform code lands.
1864 auto *Br = cast<CondBrInst>(ExitingBlock->getTerminator());
1865
1866 using namespace llvm::PatternMatch;
1867 Instruction *L = nullptr;
1868 Value *Ptr = nullptr;
1869 Value *R = nullptr;
1870 if (!match(Br->getCondition(),
1872 m_Value(R))))) {
1874 "Early exit loop with store but no supported condition load",
1875 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1876 return false;
1877 }
1878
1879 // FIXME: Don't rely on operand ordering for the comparison.
1880 if (!TheLoop->isLoopInvariant(R)) {
1882 "Early exit loop with store but no supported condition load",
1883 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1884 return false;
1885 }
1886
1887 // Make sure that the load address is not loop invariant; we want an
1888 // address calculation that we can rotate to the next vector iteration.
1889 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1890 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1892 "Uncountable exit condition depends on load with an address that is "
1893 "not an add recurrence in the loop",
1894 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1895 return false;
1896 }
1897
1898 ICFLoopSafetyInfo SafetyInfo;
1899 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1900 LoadInst *Load = cast<LoadInst>(L);
1901 // We need to know that load will be executed before we can hoist a
1902 // copy out to run just before the first iteration.
1903 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1905 "Load for uncountable exit not guaranteed to execute",
1906 "ConditionalUncountableExitLoad", ORE, TheLoop);
1907 return false;
1908 }
1909
1910 // Prohibit any potential aliasing with any instruction in the loop which
1911 // might store to memory.
1912 // FIXME: Relax this constraint where possible.
1913 for (auto *BB : TheLoop->blocks()) {
1914 for (auto &I : *BB) {
1915 if (&I == Load)
1916 continue;
1917
1918 if (I.mayWriteToMemory()) {
1919 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1920 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1921 if (AR == AliasResult::NoAlias)
1922 continue;
1923 }
1924
1926 "Cannot determine whether critical uncountable exit load address "
1927 "does not alias with a memory write",
1928 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1929 return false;
1930 }
1931 }
1932 }
1933
1934 return true;
1935}
1936
1937bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1938 // Store the result and return it at the end instead of exiting early, in case
1939 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1940 bool Result = true;
1941
1942 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1943 // Check whether the loop-related control flow in the loop nest is expected by
1944 // vectorizer.
1945 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1946 if (DoExtraAnalysis) {
1947 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1948 Result = false;
1949 } else {
1950 return false;
1951 }
1952 }
1953
1954 // We need to have a loop header.
1955 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1956 << '\n');
1957
1958 // Specific checks for outer loops. We skip the remaining legal checks at this
1959 // point because they don't support outer loops.
1960 if (!TheLoop->isInnermost()) {
1961 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1962
1963 if (!canVectorizeOuterLoop()) {
1964 reportVectorizationFailure("Unsupported outer loop",
1965 "UnsupportedOuterLoop", ORE, TheLoop);
1966 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1967 // outer loops.
1968 return false;
1969 }
1970
1971 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1972 return Result;
1973 }
1974
1975 assert(TheLoop->isInnermost() && "Inner loop expected.");
1976 // Check if we can if-convert non-single-bb loops.
1977 unsigned NumBlocks = TheLoop->getNumBlocks();
1978 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1979 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1980 if (DoExtraAnalysis)
1981 Result = false;
1982 else
1983 return false;
1984 }
1985
1986 // Check if we can vectorize the instructions and CFG in this loop.
1987 if (!canVectorizeInstrs()) {
1988 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1989 if (DoExtraAnalysis)
1990 Result = false;
1991 else
1992 return false;
1993 }
1994
1995 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1996 if (TheLoop->getExitingBlock()) {
1997 reportVectorizationFailure("Cannot vectorize uncountable loop",
1998 "UnsupportedUncountableLoop", ORE, TheLoop);
1999 if (DoExtraAnalysis)
2000 Result = false;
2001 else
2002 return false;
2003 } else {
2004 if (!isVectorizableEarlyExitLoop()) {
2005 assert(UncountableExitType == UncountableExitTrait::None &&
2006 "Must be false without vectorizable early-exit loop");
2007 if (DoExtraAnalysis)
2008 Result = false;
2009 else
2010 return false;
2011 }
2012 }
2013 }
2014
2015 // Go over each instruction and look at memory deps.
2016 if (!canVectorizeMemory()) {
2017 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
2018 if (DoExtraAnalysis)
2019 Result = false;
2020 else
2021 return false;
2022 }
2023
2024 // Bail out for ReadWrite loops with uncountable exits for now.
2025 if (UncountableExitType == UncountableExitTrait::ReadWrite) {
2027 "Writes to memory unsupported in early exit loops",
2028 "Cannot vectorize early exit loop with writes to memory",
2029 "WritesInEarlyExitLoop", ORE, TheLoop);
2030 return false;
2031 }
2032
2033 if (Result) {
2034 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2035 << (LAI->getRuntimePointerChecking()->Need
2036 ? " (with a runtime bound check)"
2037 : "")
2038 << "!\n");
2039 }
2040
2041 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
2042 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
2043 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
2044
2045 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2046 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2047 "due to SCEVThreshold");
2048 reportVectorizationFailure("Too many SCEV checks needed",
2049 "Too many SCEV assumptions need to be made and checked at runtime",
2050 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2051 if (DoExtraAnalysis)
2052 Result = false;
2053 else
2054 return false;
2055 }
2056
2057 // Okay! We've done all the tests. If any have failed, return false. Otherwise
2058 // we can vectorize, and at this point we don't have any other mem analysis
2059 // which may limit our maximum vectorization factor, so just return true with
2060 // no restrictions.
2061 return Result;
2062}
2063
2065 // The only loops we can vectorize without a scalar epilogue, are loops with
2066 // a bottom-test and a single exiting block. We'd have to handle the fact
2067 // that not every instruction executes on the last iteration. This will
2068 // require a lane mask which varies through the vector loop body. (TODO)
2069 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2070 LLVM_DEBUG(
2071 dbgs()
2072 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2073 return false;
2074 }
2075
2076 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2077
2078 // The list of pointers that we can safely read and write to remains empty.
2079 SmallPtrSet<Value *, 8> SafePointers;
2080
2081 // Check all blocks for predication, including those that ordinarily do not
2082 // need predication such as the header block.
2084 for (BasicBlock *BB : TheLoop->blocks()) {
2085 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2086 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2087 return false;
2088 }
2089 }
2090
2091 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2092
2093 return true;
2094}
2095
2097 // The list of pointers that we can safely read and write to remains empty.
2098 SmallPtrSet<Value *, 8> SafePointers;
2099
2100 // Mark all blocks for predication, including those that ordinarily do not
2101 // need predication such as the header block, and collect instructions needing
2102 // predication in TailFoldedMaskedOp.
2103 for (BasicBlock *BB : TheLoop->blocks()) {
2104 [[maybe_unused]] bool R =
2105 blockCanBePredicated(BB, SafePointers, TailFoldedMaskedOp);
2106 assert(R && "Must be able to predicate block when tail-folding.");
2107 }
2108}
2109
2110} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_AlwaysScalable, "always", "Scalable vectorization is available and always favored when " "feasible")))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has uncountable early exits, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_AlwaysScalable
Always vectorize loops using scalable vectors if feasible (i.e.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:174
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:523
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
iterator find(const KeyT &Key)
Definition MapVector.h:154
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
static constexpr auto FlagAnyWrap
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:74
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:431
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:889
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.