LLVM 22.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
19#include "llvm/Analysis/Loads.h"
32
33using namespace llvm;
34using namespace PatternMatch;
35
36#define LV_NAME "loop-vectorize"
37#define DEBUG_TYPE LV_NAME
38
39static cl::opt<bool>
40 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
41 cl::desc("Enable if-conversion during vectorization."));
42
43static cl::opt<bool>
44AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
45 cl::desc("Enable recognition of non-constant strided "
46 "pointer induction variables."));
47
48static cl::opt<bool>
49 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
50 cl::desc("Allow enabling loop hints to reorder "
51 "FP operations during vectorization."));
52
53// TODO: Move size-based thresholds out of legality checking, make cost based
54// decisions instead of hard thresholds.
56 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
57 cl::desc("The maximum number of SCEV checks allowed."));
58
60 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
61 cl::desc("The maximum number of SCEV checks allowed with a "
62 "vectorize(enable) pragma"));
63
66 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
68 cl::desc("Control whether the compiler can use scalable vectors to "
69 "vectorize a loop"),
72 "Scalable vectorization is disabled."),
75 "Scalable vectorization is available and favored when the "
76 "cost is inconclusive."),
79 "Scalable vectorization is available and favored when the "
80 "cost is inconclusive.")));
81
83 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
84 cl::desc("Enables autovectorization of some loops containing histograms"));
85
86/// Maximum vectorization interleave count.
87static const unsigned MaxInterleaveFactor = 16;
88
89namespace llvm {
90
91bool LoopVectorizeHints::Hint::validate(unsigned Val) {
92 switch (Kind) {
93 case HK_WIDTH:
95 case HK_INTERLEAVE:
96 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
97 case HK_FORCE:
98 return (Val <= 1);
99 case HK_ISVECTORIZED:
100 case HK_PREDICATE:
101 case HK_SCALABLE:
102 return (Val == 0 || Val == 1);
103 }
104 return false;
105}
106
108 bool InterleaveOnlyWhenForced,
111 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
112 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
113 Force("vectorize.enable", FK_Undefined, HK_FORCE),
114 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
115 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
116 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
117 TheLoop(L), ORE(ORE) {
118 // Populate values with existing loop metadata.
119 getHintsFromMetadata();
120
121 // force-vector-interleave overrides DisableInterleaving.
124
125 // If the metadata doesn't explicitly specify whether to enable scalable
126 // vectorization, then decide based on the following criteria (increasing
127 // level of priority):
128 // - Target default
129 // - Metadata width
130 // - Force option (always overrides)
132 if (TTI)
133 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
135
136 if (Width.Value)
137 // If the width is set, but the metadata says nothing about the scalable
138 // property, then assume it concerns only a fixed-width UserVF.
139 // If width is not set, the flag takes precedence.
140 Scalable.Value = SK_FixedWidthOnly;
141 }
142
143 // If the flag is set to force any use of scalable vectors, override the loop
144 // hints.
145 if (ForceScalableVectorization.getValue() !=
147 Scalable.Value = ForceScalableVectorization.getValue();
148
149 // Scalable vectorization is disabled if no preference is specified.
151 Scalable.Value = SK_FixedWidthOnly;
152
153 if (IsVectorized.Value != 1)
154 // If the vectorization width and interleaving count are both 1 then
155 // consider the loop to have been already vectorized because there's
156 // nothing more that we can do.
157 IsVectorized.Value =
159 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
160 << "LV: Interleaving disabled by the pass manager\n");
161}
162
164 LLVMContext &Context = TheLoop->getHeader()->getContext();
165
166 MDNode *IsVectorizedMD = MDNode::get(
167 Context,
168 {MDString::get(Context, "llvm.loop.isvectorized"),
169 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
170 MDNode *LoopID = TheLoop->getLoopID();
171 MDNode *NewLoopID =
172 makePostTransformationMetadata(Context, LoopID,
173 {Twine(Prefix(), "vectorize.").str(),
174 Twine(Prefix(), "interleave.").str()},
175 {IsVectorizedMD});
176 TheLoop->setLoopID(NewLoopID);
177
178 // Update internal cache.
179 IsVectorized.Value = 1;
180}
181
182void LoopVectorizeHints::reportDisallowedVectorization(
183 const StringRef DebugMsg, const StringRef RemarkName,
184 const StringRef RemarkMsg, const Loop *L) const {
185 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
186 ORE.emit(OptimizationRemarkMissed(LV_NAME, RemarkName, L->getStartLoc(),
187 L->getHeader())
188 << "loop not vectorized: " << RemarkMsg);
189}
190
192 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
194 if (Force.Value == LoopVectorizeHints::FK_Disabled) {
195 reportDisallowedVectorization("#pragma vectorize disable",
196 "MissedExplicitlyDisabled",
197 "vectorization is explicitly disabled", L);
198 } else if (hasDisableAllTransformsHint(L)) {
199 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
200 "MissedTransformsDisabled",
201 "loop transformations are disabled", L);
202 } else {
203 llvm_unreachable("loop vect disabled for an unknown reason");
204 }
205 return false;
206 }
207
208 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
209 reportDisallowedVectorization(
210 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
211 "MissedForceOnly", "only vectorizing loops that explicitly request it",
212 L);
213 return false;
214 }
215
216 if (getIsVectorized() == 1) {
217 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
218 // FIXME: Add interleave.disable metadata. This will allow
219 // vectorize.disable to be used without disabling the pass and errors
220 // to differentiate between disabled vectorization and a width of 1.
221 ORE.emit([&]() {
223 "AllDisabled", L->getStartLoc(),
224 L->getHeader())
225 << "loop not vectorized: vectorization and interleaving are "
226 "explicitly disabled, or the loop has already been "
227 "vectorized";
228 });
229 return false;
230 }
231
232 return true;
233}
234
236 using namespace ore;
237
238 ORE.emit([&]() {
239 if (Force.Value == LoopVectorizeHints::FK_Disabled)
240 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
241 TheLoop->getStartLoc(),
242 TheLoop->getHeader())
243 << "loop not vectorized: vectorization is explicitly disabled";
244
245 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
246 TheLoop->getHeader());
247 R << "loop not vectorized";
248 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
249 R << " (Force=" << NV("Force", true);
250 if (Width.Value != 0)
251 R << ", Vector Width=" << NV("VectorWidth", getWidth());
252 if (getInterleave() != 0)
253 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
254 R << ")";
255 }
256 return R;
257 });
258}
259
269
271 // Allow the vectorizer to change the order of operations if enabling
272 // loop hints are provided
273 ElementCount EC = getWidth();
274 return HintsAllowReordering &&
276 EC.getKnownMinValue() > 1);
277}
278
279void LoopVectorizeHints::getHintsFromMetadata() {
280 MDNode *LoopID = TheLoop->getLoopID();
281 if (!LoopID)
282 return;
283
284 // First operand should refer to the loop id itself.
285 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
286 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
287
288 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
289 const MDString *S = nullptr;
291
292 // The expected hint is either a MDString or a MDNode with the first
293 // operand a MDString.
294 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
295 if (!MD || MD->getNumOperands() == 0)
296 continue;
297 S = dyn_cast<MDString>(MD->getOperand(0));
298 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
299 Args.push_back(MD->getOperand(Idx));
300 } else {
301 S = dyn_cast<MDString>(MDO);
302 assert(Args.size() == 0 && "too many arguments for MDString");
303 }
304
305 if (!S)
306 continue;
307
308 // Check if the hint starts with the loop metadata prefix.
309 StringRef Name = S->getString();
310 if (Args.size() == 1)
311 setHint(Name, Args[0]);
312 }
313}
314
315void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
316 if (!Name.consume_front(Prefix()))
317 return;
318
319 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
320 if (!C)
321 return;
322 unsigned Val = C->getZExtValue();
323
324 Hint *Hints[] = {&Width, &Interleave, &Force,
325 &IsVectorized, &Predicate, &Scalable};
326 for (auto *H : Hints) {
327 if (Name == H->Name) {
328 if (H->validate(Val))
329 H->Value = Val;
330 else
331 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
332 break;
333 }
334 }
335}
336
337// Return true if the inner loop \p Lp is uniform with regard to the outer loop
338// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
339// executing the inner loop will execute the same iterations). This check is
340// very constrained for now but it will be relaxed in the future. \p Lp is
341// considered uniform if it meets all the following conditions:
342// 1) it has a canonical IV (starting from 0 and with stride 1),
343// 2) its latch terminator is a conditional branch and,
344// 3) its latch condition is a compare instruction whose operands are the
345// canonical IV and an OuterLp invariant.
346// This check doesn't take into account the uniformity of other conditions not
347// related to the loop latch because they don't affect the loop uniformity.
348//
349// NOTE: We decided to keep all these checks and its associated documentation
350// together so that we can easily have a picture of the current supported loop
351// nests. However, some of the current checks don't depend on \p OuterLp and
352// would be redundantly executed for each \p Lp if we invoked this function for
353// different candidate outer loops. This is not the case for now because we
354// don't currently have the infrastructure to evaluate multiple candidate outer
355// loops and \p OuterLp will be a fixed parameter while we only support explicit
356// outer loop vectorization. It's also very likely that these checks go away
357// before introducing the aforementioned infrastructure. However, if this is not
358// the case, we should move the \p OuterLp independent checks to a separate
359// function that is only executed once for each \p Lp.
360static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
361 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
362
363 // If Lp is the outer loop, it's uniform by definition.
364 if (Lp == OuterLp)
365 return true;
366 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
367
368 // 1.
370 if (!IV) {
371 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
372 return false;
373 }
374
375 // 2.
376 BasicBlock *Latch = Lp->getLoopLatch();
377 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
378 if (!LatchBr || LatchBr->isUnconditional()) {
379 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
380 return false;
381 }
382
383 // 3.
384 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
385 if (!LatchCmp) {
387 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
388 return false;
389 }
390
391 Value *CondOp0 = LatchCmp->getOperand(0);
392 Value *CondOp1 = LatchCmp->getOperand(1);
393 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
394 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
395 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
396 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
397 return false;
398 }
399
400 return true;
401}
402
403// Return true if \p Lp and all its nested loops are uniform with regard to \p
404// OuterLp.
405static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
406 if (!isUniformLoop(Lp, OuterLp))
407 return false;
408
409 // Check if nested loops are uniform.
410 for (Loop *SubLp : *Lp)
411 if (!isUniformLoopNest(SubLp, OuterLp))
412 return false;
413
414 return true;
415}
416
418 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
419
420 if (Ty->isPointerTy())
421 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
422
423 // It is possible that char's or short's overflow when we ask for the loop's
424 // trip count, work around this by changing the type size.
425 if (Ty->getScalarSizeInBits() < 32)
426 return Type::getInt32Ty(Ty->getContext());
427
428 return cast<IntegerType>(Ty);
429}
430
432 Type *Ty1) {
435 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
436}
437
438/// Check that the instruction has outside loop users and is not an
439/// identified reduction variable.
440static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
441 SmallPtrSetImpl<Value *> &AllowedExit) {
442 // Reductions, Inductions and non-header phis are allowed to have exit users. All
443 // other instructions must not have external users.
444 if (!AllowedExit.count(Inst))
445 // Check that all of the users of the loop are inside the BB.
446 for (User *U : Inst->users()) {
448 // This user may be a reduction exit value.
449 if (!TheLoop->contains(UI)) {
450 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
451 return true;
452 }
453 }
454 return false;
455}
456
457/// Returns true if A and B have same pointer operands or same SCEVs addresses
459 StoreInst *B) {
460 // Compare store
461 if (A == B)
462 return true;
463
464 // Otherwise Compare pointers
465 Value *APtr = A->getPointerOperand();
466 Value *BPtr = B->getPointerOperand();
467 if (APtr == BPtr)
468 return true;
469
470 // Otherwise compare address SCEVs
471 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
472}
473
475 Value *Ptr) const {
476 // FIXME: Currently, the set of symbolic strides is sometimes queried before
477 // it's collected. This happens from canVectorizeWithIfConvert, when the
478 // pointer is checked to reference consecutive elements suitable for a
479 // masked access.
480 const auto &Strides =
481 LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
482
483 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
484 AllowRuntimeSCEVChecks, false)
485 .value_or(0);
486 if (Stride == 1 || Stride == -1)
487 return Stride;
488 return 0;
489}
490
492 return LAI->isInvariant(V);
493}
494
495namespace {
496/// A rewriter to build the SCEVs for each of the VF lanes in the expected
497/// vectorized loop, which can then be compared to detect their uniformity. This
498/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
499/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
500/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
501/// uniformity.
502class SCEVAddRecForUniformityRewriter
503 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
504 /// Multiplier to be applied to the step of AddRecs in TheLoop.
505 unsigned StepMultiplier;
506
507 /// Offset to be added to the AddRecs in TheLoop.
508 unsigned Offset;
509
510 /// Loop for which to rewrite AddRecsFor.
511 Loop *TheLoop;
512
513 /// Is any sub-expressions not analyzable w.r.t. uniformity?
514 bool CannotAnalyze = false;
515
516 bool canAnalyze() const { return !CannotAnalyze; }
517
518public:
519 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
520 unsigned Offset, Loop *TheLoop)
521 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
522 TheLoop(TheLoop) {}
523
524 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
525 assert(Expr->getLoop() == TheLoop &&
526 "addrec outside of TheLoop must be invariant and should have been "
527 "handled earlier");
528 // Build a new AddRec by multiplying the step by StepMultiplier and
529 // incrementing the start by Offset * step.
530 Type *Ty = Expr->getType();
531 const SCEV *Step = Expr->getStepRecurrence(SE);
532 if (!SE.isLoopInvariant(Step, TheLoop)) {
533 CannotAnalyze = true;
534 return Expr;
535 }
536 const SCEV *NewStep =
537 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
538 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
539 const SCEV *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
540 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
541 }
542
543 const SCEV *visit(const SCEV *S) {
544 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
545 return S;
547 }
548
549 const SCEV *visitUnknown(const SCEVUnknown *S) {
550 if (SE.isLoopInvariant(S, TheLoop))
551 return S;
552 // The value could vary across iterations.
553 CannotAnalyze = true;
554 return S;
555 }
556
557 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
558 // Could not analyze the expression.
559 CannotAnalyze = true;
560 return S;
561 }
562
563 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
564 unsigned StepMultiplier, unsigned Offset,
565 Loop *TheLoop) {
566 /// Bail out if the expression does not contain an UDiv expression.
567 /// Uniform values which are not loop invariant require operations to strip
568 /// out the lowest bits. For now just look for UDivs and use it to avoid
569 /// re-writing UDIV-free expressions for other lanes to limit compile time.
570 if (!SCEVExprContains(S,
571 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
572 return SE.getCouldNotCompute();
573
574 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
575 TheLoop);
576 const SCEV *Result = Rewriter.visit(S);
577
578 if (Rewriter.canAnalyze())
579 return Result;
580 return SE.getCouldNotCompute();
581 }
582};
583
584} // namespace
585
587 if (isInvariant(V))
588 return true;
589 if (VF.isScalable())
590 return false;
591 if (VF.isScalar())
592 return true;
593
594 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
595 // never considered uniform.
596 auto *SE = PSE.getSE();
597 if (!SE->isSCEVable(V->getType()))
598 return false;
599 const SCEV *S = SE->getSCEV(V);
600
601 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
602 // lane 0 matches the expressions for all other lanes.
603 unsigned FixedVF = VF.getKnownMinValue();
604 const SCEV *FirstLaneExpr =
605 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
606 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
607 return false;
608
609 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
610 // lane 0. We check lanes in reverse order for compile-time, as frequently
611 // checking the last lane is sufficient to rule out uniformity.
612 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
613 const SCEV *IthLaneExpr =
614 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
615 return FirstLaneExpr == IthLaneExpr;
616 });
617}
618
620 ElementCount VF) const {
622 if (!Ptr)
623 return false;
624 // Note: There's nothing inherent which prevents predicated loads and
625 // stores from being uniform. The current lowering simply doesn't handle
626 // it; in particular, the cost model distinguishes scatter/gather from
627 // scalar w/predication, and we currently rely on the scalar path.
628 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
629}
630
631bool LoopVectorizationLegality::canVectorizeOuterLoop() {
632 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
633 // Store the result and return it at the end instead of exiting early, in case
634 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
635 bool Result = true;
636 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
637
638 for (BasicBlock *BB : TheLoop->blocks()) {
639 // Check whether the BB terminator is a BranchInst. Any other terminator is
640 // not supported yet.
641 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
642 if (!Br) {
643 reportVectorizationFailure("Unsupported basic block terminator",
644 "loop control flow is not understood by vectorizer",
645 "CFGNotUnderstood", ORE, TheLoop);
646 if (DoExtraAnalysis)
647 Result = false;
648 else
649 return false;
650 }
651
652 // Check whether the BranchInst is a supported one. Only unconditional
653 // branches, conditional branches with an outer loop invariant condition or
654 // backedges are supported.
655 // FIXME: We skip these checks when VPlan predication is enabled as we
656 // want to allow divergent branches. This whole check will be removed
657 // once VPlan predication is on by default.
658 if (Br && Br->isConditional() &&
659 !TheLoop->isLoopInvariant(Br->getCondition()) &&
660 !LI->isLoopHeader(Br->getSuccessor(0)) &&
661 !LI->isLoopHeader(Br->getSuccessor(1))) {
662 reportVectorizationFailure("Unsupported conditional branch",
663 "loop control flow is not understood by vectorizer",
664 "CFGNotUnderstood", ORE, TheLoop);
665 if (DoExtraAnalysis)
666 Result = false;
667 else
668 return false;
669 }
670 }
671
672 // Check whether inner loops are uniform. At this point, we only support
673 // simple outer loops scenarios with uniform nested loops.
674 if (!isUniformLoopNest(TheLoop /*loop nest*/,
675 TheLoop /*context outer loop*/)) {
676 reportVectorizationFailure("Outer loop contains divergent loops",
677 "loop control flow is not understood by vectorizer",
678 "CFGNotUnderstood", ORE, TheLoop);
679 if (DoExtraAnalysis)
680 Result = false;
681 else
682 return false;
683 }
684
685 // Check whether we are able to set up outer loop induction.
686 if (!setupOuterLoopInductions()) {
687 reportVectorizationFailure("Unsupported outer loop Phi(s)",
688 "UnsupportedPhi", ORE, TheLoop);
689 if (DoExtraAnalysis)
690 Result = false;
691 else
692 return false;
693 }
694
695 return Result;
696}
697
698void LoopVectorizationLegality::addInductionPhi(
699 PHINode *Phi, const InductionDescriptor &ID,
700 SmallPtrSetImpl<Value *> &AllowedExit) {
701 Inductions[Phi] = ID;
702
703 // In case this induction also comes with casts that we know we can ignore
704 // in the vectorized loop body, record them here. All casts could be recorded
705 // here for ignoring, but suffices to record only the first (as it is the
706 // only one that may bw used outside the cast sequence).
707 ArrayRef<Instruction *> Casts = ID.getCastInsts();
708 if (!Casts.empty())
709 InductionCastsToIgnore.insert(*Casts.begin());
710
711 Type *PhiTy = Phi->getType();
712 const DataLayout &DL = Phi->getDataLayout();
713
714 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
715 "Expected int, ptr, or FP induction phi type");
716
717 // Get the widest type.
718 if (PhiTy->isIntOrPtrTy()) {
719 if (!WidestIndTy)
720 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
721 else
722 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
723 }
724
725 // Int inductions are special because we only allow one IV.
726 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
727 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
728 isa<Constant>(ID.getStartValue()) &&
729 cast<Constant>(ID.getStartValue())->isNullValue()) {
730
731 // Use the phi node with the widest type as induction. Use the last
732 // one if there are multiple (no good reason for doing this other
733 // than it is expedient). We've checked that it begins at zero and
734 // steps by one, so this is a canonical induction variable.
735 if (!PrimaryInduction || PhiTy == WidestIndTy)
736 PrimaryInduction = Phi;
737 }
738
739 // Both the PHI node itself, and the "post-increment" value feeding
740 // back into the PHI node may have external users.
741 // We can allow those uses, except if the SCEVs we have for them rely
742 // on predicates that only hold within the loop, since allowing the exit
743 // currently means re-using this SCEV outside the loop (see PR33706 for more
744 // details).
745 if (PSE.getPredicate().isAlwaysTrue()) {
746 AllowedExit.insert(Phi);
747 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
748 }
749
750 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
751}
752
753bool LoopVectorizationLegality::setupOuterLoopInductions() {
754 BasicBlock *Header = TheLoop->getHeader();
755
756 // Returns true if a given Phi is a supported induction.
757 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
758 InductionDescriptor ID;
759 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
761 addInductionPhi(&Phi, ID, AllowedExit);
762 return true;
763 }
764 // Bail out for any Phi in the outer loop header that is not a supported
765 // induction.
767 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
768 return false;
769 };
770
771 return llvm::all_of(Header->phis(), IsSupportedPhi);
772}
773
774/// Checks if a function is scalarizable according to the TLI, in
775/// the sense that it should be vectorized and then expanded in
776/// multiple scalar calls. This is represented in the
777/// TLI via mappings that do not specify a vector name, as in the
778/// following example:
779///
780/// const VecDesc VecIntrinsics[] = {
781/// {"llvm.phx.abs.i32", "", 4}
782/// };
783static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
784 const StringRef ScalarName = CI.getCalledFunction()->getName();
785 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
786 // Check that all known VFs are not associated to a vector
787 // function, i.e. the vector name is emty.
788 if (Scalarize) {
789 ElementCount WidestFixedVF, WidestScalableVF;
790 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
792 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
793 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
795 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
796 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
797 assert((WidestScalableVF.isZero() || !Scalarize) &&
798 "Caller may decide to scalarize a variant using a scalable VF");
799 }
800 return Scalarize;
801}
802
803/// Returns true if the call return type `Ty` can be widened by the loop
804/// vectorizer.
805static bool canWidenCallReturnType(Type *Ty) {
806 auto *StructTy = dyn_cast<StructType>(Ty);
807 // TODO: Remove the homogeneous types restriction. This is just an initial
808 // simplification. When we want to support things like the overflow intrinsics
809 // we will have to lift this restriction.
810 if (StructTy && !StructTy->containsHomogeneousTypes())
811 return false;
812 return canVectorizeTy(StructTy);
813}
814
815bool LoopVectorizationLegality::canVectorizeInstrs() {
816 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
817 bool Result = true;
818
819 // For each block in the loop.
820 for (BasicBlock *BB : TheLoop->blocks()) {
821 // Scan the instructions in the block and look for hazards.
822 for (Instruction &I : *BB) {
823 Result &= canVectorizeInstr(I);
824 if (!DoExtraAnalysis && !Result)
825 return false;
826 }
827 }
828
829 if (!PrimaryInduction) {
830 if (Inductions.empty()) {
832 "Did not find one integer induction var",
833 "loop induction variable could not be identified",
834 "NoInductionVariable", ORE, TheLoop);
835 return false;
836 }
837 if (!WidestIndTy) {
839 "Did not find one integer induction var",
840 "integer loop induction variable could not be identified",
841 "NoIntegerInductionVariable", ORE, TheLoop);
842 return false;
843 }
844 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
845 }
846
847 // Now we know the widest induction type, check if our found induction
848 // is the same size. If it's not, unset it here and InnerLoopVectorizer
849 // will create another.
850 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
851 PrimaryInduction = nullptr;
852
853 return Result;
854}
855
856bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
857 BasicBlock *BB = I.getParent();
858 BasicBlock *Header = TheLoop->getHeader();
859
860 if (auto *Phi = dyn_cast<PHINode>(&I)) {
861 Type *PhiTy = Phi->getType();
862 // Check that this PHI type is allowed.
863 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
864 !PhiTy->isPointerTy()) {
866 "Found a non-int non-pointer PHI",
867 "loop control flow is not understood by vectorizer",
868 "CFGNotUnderstood", ORE, TheLoop);
869 return false;
870 }
871
872 // If this PHINode is not in the header block, then we know that we
873 // can convert it to select during if-conversion. No need to check if
874 // the PHIs in this block are induction or reduction variables.
875 if (BB != Header) {
876 // Non-header phi nodes that have outside uses can be vectorized. Add
877 // them to the list of allowed exits.
878 // Unsafe cyclic dependencies with header phis are identified during
879 // legalization for reduction, induction and fixed order
880 // recurrences.
881 AllowedExit.insert(&I);
882 return true;
883 }
884
885 // We only allow if-converted PHIs with exactly two incoming values.
886 if (Phi->getNumIncomingValues() != 2) {
888 "Found an invalid PHI",
889 "loop control flow is not understood by vectorizer",
890 "CFGNotUnderstood", ORE, TheLoop, Phi);
891 return false;
892 }
893
894 RecurrenceDescriptor RedDes;
895 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
896 PSE.getSE())) {
897 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
898 AllowedExit.insert(RedDes.getLoopExitInstr());
899 Reductions[Phi] = RedDes;
902 RedDes.getRecurrenceKind())) &&
903 "Only min/max recurrences are allowed to have multiple uses "
904 "currently");
905 return true;
906 }
907
908 // We prevent matching non-constant strided pointer IVS to preserve
909 // historical vectorizer behavior after a generalization of the
910 // IVDescriptor code. The intent is to remove this check, but we
911 // have to fix issues around code quality for such loops first.
912 auto IsDisallowedStridedPointerInduction =
913 [](const InductionDescriptor &ID) {
915 return false;
916 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
917 ID.getConstIntStepValue() == nullptr;
918 };
919
920 // TODO: Instead of recording the AllowedExit, it would be good to
921 // record the complementary set: NotAllowedExit. These include (but may
922 // not be limited to):
923 // 1. Reduction phis as they represent the one-before-last value, which
924 // is not available when vectorized
925 // 2. Induction phis and increment when SCEV predicates cannot be used
926 // outside the loop - see addInductionPhi
927 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
928 // outside the loop - see call to hasOutsideLoopUser in the non-phi
929 // handling below
930 // 4. FixedOrderRecurrence phis that can possibly be handled by
931 // extraction.
932 // By recording these, we can then reason about ways to vectorize each
933 // of these NotAllowedExit.
934 InductionDescriptor ID;
935 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
936 !IsDisallowedStridedPointerInduction(ID)) {
937 addInductionPhi(Phi, ID, AllowedExit);
938 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
939 return true;
940 }
941
942 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
943 AllowedExit.insert(Phi);
944 FixedOrderRecurrences.insert(Phi);
945 return true;
946 }
947
948 // As a last resort, coerce the PHI to a AddRec expression
949 // and re-try classifying it a an induction PHI.
950 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
951 !IsDisallowedStridedPointerInduction(ID)) {
952 addInductionPhi(Phi, ID, AllowedExit);
953 return true;
954 }
955
956 reportVectorizationFailure("Found an unidentified PHI",
957 "value that could not be identified as "
958 "reduction is used outside the loop",
959 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
960 Phi);
961 return false;
962 } // end of PHI handling
963
964 // We handle calls that:
965 // * Have a mapping to an IR intrinsic.
966 // * Have a vector version available.
967 auto *CI = dyn_cast<CallInst>(&I);
968
969 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
970 !(CI->getCalledFunction() && TLI &&
971 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
972 // If the call is a recognized math libary call, it is likely that
973 // we can vectorize it given loosened floating-point constraints.
974 LibFunc Func;
975 bool IsMathLibCall =
976 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
977 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
978 TLI->hasOptimizedCodeGen(Func);
979
980 if (IsMathLibCall) {
981 // TODO: Ideally, we should not use clang-specific language here,
982 // but it's hard to provide meaningful yet generic advice.
983 // Also, should this be guarded by allowExtraAnalysis() and/or be part
984 // of the returned info from isFunctionVectorizable()?
986 "Found a non-intrinsic callsite",
987 "library call cannot be vectorized. "
988 "Try compiling with -fno-math-errno, -ffast-math, "
989 "or similar flags",
990 "CantVectorizeLibcall", ORE, TheLoop, CI);
991 } else {
992 reportVectorizationFailure("Found a non-intrinsic callsite",
993 "call instruction cannot be vectorized",
994 "CantVectorizeLibcall", ORE, TheLoop, CI);
995 }
996 return false;
997 }
998
999 // Some intrinsics have scalar arguments and should be same in order for
1000 // them to be vectorized (i.e. loop invariant).
1001 if (CI) {
1002 auto *SE = PSE.getSE();
1003 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
1004 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
1005 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
1006 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
1008 "Found unvectorizable intrinsic",
1009 "intrinsic instruction cannot be vectorized",
1010 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
1011 return false;
1012 }
1013 }
1014 }
1015
1016 // If we found a vectorized variant of a function, note that so LV can
1017 // make better decisions about maximum VF.
1018 if (CI && !VFDatabase::getMappings(*CI).empty())
1019 VecCallVariantsFound = true;
1020
1021 auto CanWidenInstructionTy = [](Instruction const &Inst) {
1022 Type *InstTy = Inst.getType();
1023 if (!isa<StructType>(InstTy))
1024 return canVectorizeTy(InstTy);
1025
1026 // For now, we only recognize struct values returned from calls where
1027 // all users are extractvalue as vectorizable. All element types of the
1028 // struct must be types that can be widened.
1029 return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
1030 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1031 };
1032
1033 // Check that the instruction return type is vectorizable.
1034 // We can't vectorize casts from vector type to scalar type.
1035 // Also, we can't vectorize extractelement instructions.
1036 if (!CanWidenInstructionTy(I) ||
1037 (isa<CastInst>(I) &&
1038 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1040 reportVectorizationFailure("Found unvectorizable type",
1041 "instruction return type cannot be vectorized",
1042 "CantVectorizeInstructionReturnType", ORE,
1043 TheLoop, &I);
1044 return false;
1045 }
1046
1047 // Check that the stored type is vectorizable.
1048 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1049 Type *T = ST->getValueOperand()->getType();
1051 reportVectorizationFailure("Store instruction cannot be vectorized",
1052 "CantVectorizeStore", ORE, TheLoop, ST);
1053 return false;
1054 }
1055
1056 // For nontemporal stores, check that a nontemporal vector version is
1057 // supported on the target.
1058 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1059 // Arbitrarily try a vector of 2 elements.
1060 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1061 assert(VecTy && "did not find vectorized version of stored type");
1062 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1064 "nontemporal store instruction cannot be vectorized",
1065 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1066 return false;
1067 }
1068 }
1069
1070 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1071 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1072 // For nontemporal loads, check that a nontemporal vector version is
1073 // supported on the target (arbitrarily try a vector of 2 elements).
1074 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1075 assert(VecTy && "did not find vectorized version of load type");
1076 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1078 "nontemporal load instruction cannot be vectorized",
1079 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1080 return false;
1081 }
1082 }
1083
1084 // FP instructions can allow unsafe algebra, thus vectorizable by
1085 // non-IEEE-754 compliant SIMD units.
1086 // This applies to floating-point math operations and calls, not memory
1087 // operations, shuffles, or casts, as they don't change precision or
1088 // semantics.
1089 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1090 !I.isFast()) {
1091 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1092 Hints->setPotentiallyUnsafe();
1093 }
1094
1095 // Reduction instructions are allowed to have exit users.
1096 // All other instructions must not have external users.
1097 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1098 // We can safely vectorize loops where instructions within the loop are
1099 // used outside the loop only if the SCEV predicates within the loop is
1100 // same as outside the loop. Allowing the exit means reusing the SCEV
1101 // outside the loop.
1102 if (PSE.getPredicate().isAlwaysTrue()) {
1103 AllowedExit.insert(&I);
1104 return true;
1105 }
1106 reportVectorizationFailure("Value cannot be used outside the loop",
1107 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1108 return false;
1109 }
1110
1111 return true;
1112}
1113
1114/// Find histogram operations that match high-level code in loops:
1115/// \code
1116/// buckets[indices[i]]+=step;
1117/// \endcode
1118///
1119/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1120/// array the computed histogram. It uses a BinOp to sum all counts, storing
1121/// them using a loop-variant index Load from the 'indices' input array.
1122///
1123/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1124/// regardless of hardware support. When there is support, it additionally
1125/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1126/// used to update histogram in \p HistogramPtrs.
1127static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1128 const PredicatedScalarEvolution &PSE,
1129 SmallVectorImpl<HistogramInfo> &Histograms) {
1130
1131 // Store value must come from a Binary Operation.
1132 Instruction *HPtrInstr = nullptr;
1133 BinaryOperator *HBinOp = nullptr;
1134 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1135 return false;
1136
1137 // BinOp must be an Add or a Sub modifying the bucket value by a
1138 // loop invariant amount.
1139 // FIXME: We assume the loop invariant term is on the RHS.
1140 // Fine for an immediate/constant, but maybe not a generic value?
1141 Value *HIncVal = nullptr;
1142 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1143 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1144 return false;
1145
1146 // Make sure the increment value is loop invariant.
1147 if (!TheLoop->isLoopInvariant(HIncVal))
1148 return false;
1149
1150 // The address to store is calculated through a GEP Instruction.
1152 if (!GEP)
1153 return false;
1154
1155 // Restrict address calculation to constant indices except for the last term.
1156 Value *HIdx = nullptr;
1157 for (Value *Index : GEP->indices()) {
1158 if (HIdx)
1159 return false;
1160 if (!isa<ConstantInt>(Index))
1161 HIdx = Index;
1162 }
1163
1164 if (!HIdx)
1165 return false;
1166
1167 // Check that the index is calculated by loading from another array. Ignore
1168 // any extensions.
1169 // FIXME: Support indices from other sources than a linear load from memory?
1170 // We're currently trying to match an operation looping over an array
1171 // of indices, but there could be additional levels of indirection
1172 // in place, or possibly some additional calculation to form the index
1173 // from the loaded data.
1174 Value *VPtrVal;
1175 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1176 return false;
1177
1178 // Make sure the index address varies in this loop, not an outer loop.
1179 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1180 if (!AR || AR->getLoop() != TheLoop)
1181 return false;
1182
1183 // Ensure we'll have the same mask by checking that all parts of the histogram
1184 // (gather load, update, scatter store) are in the same block.
1185 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1186 BasicBlock *LdBB = IndexedLoad->getParent();
1187 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1188 return false;
1189
1190 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1191
1192 // Store the operations that make up the histogram.
1193 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1194 return true;
1195}
1196
1197bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1198 // For now, we only support an IndirectUnsafe dependency that calculates
1199 // a histogram
1201 return false;
1202
1203 // Find a single IndirectUnsafe dependency.
1204 const MemoryDepChecker::Dependence *IUDep = nullptr;
1205 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1206 const auto *Deps = DepChecker.getDependences();
1207 // If there were too many dependences, LAA abandons recording them. We can't
1208 // proceed safely if we don't know what the dependences are.
1209 if (!Deps)
1210 return false;
1211
1212 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1213 // Ignore dependencies that are either known to be safe or can be
1214 // checked at runtime.
1217 continue;
1218
1219 // We're only interested in IndirectUnsafe dependencies here, where the
1220 // address might come from a load from memory. We also only want to handle
1221 // one such dependency, at least for now.
1222 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1223 return false;
1224
1225 IUDep = &Dep;
1226 }
1227 if (!IUDep)
1228 return false;
1229
1230 // For now only normal loads and stores are supported.
1231 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1232 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1233
1234 if (!LI || !SI)
1235 return false;
1236
1237 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1238 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1239}
1240
1241bool LoopVectorizationLegality::canVectorizeMemory() {
1242 LAI = &LAIs.getInfo(*TheLoop);
1243 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1244 if (LAR) {
1245 ORE->emit([&]() {
1246 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1247 "loop not vectorized: ", *LAR);
1248 });
1249 }
1250
1251 if (!LAI->canVectorizeMemory()) {
1254 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1255 "side effects",
1256 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1257 TheLoop);
1258 return false;
1259 }
1260
1261 return canVectorizeIndirectUnsafeDependences();
1262 }
1263
1264 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1265 reportVectorizationFailure("We don't allow storing to uniform addresses",
1266 "write to a loop invariant address could not "
1267 "be vectorized",
1268 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1269 TheLoop);
1270 return false;
1271 }
1272
1273 // We can vectorize stores to invariant address when final reduction value is
1274 // guaranteed to be stored at the end of the loop. Also, if decision to
1275 // vectorize loop is made, runtime checks are added so as to make sure that
1276 // invariant address won't alias with any other objects.
1277 if (!LAI->getStoresToInvariantAddresses().empty()) {
1278 // For each invariant address, check if last stored value is unconditional
1279 // and the address is not calculated inside the loop.
1280 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1282 continue;
1283
1284 if (blockNeedsPredication(SI->getParent())) {
1286 "We don't allow storing to uniform addresses",
1287 "write of conditional recurring variant value to a loop "
1288 "invariant address could not be vectorized",
1289 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1290 return false;
1291 }
1292
1293 // Invariant address should be defined outside of loop. LICM pass usually
1294 // makes sure it happens, but in rare cases it does not, we do not want
1295 // to overcomplicate vectorization to support this case.
1296 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1297 if (TheLoop->contains(Ptr)) {
1299 "Invariant address is calculated inside the loop",
1300 "write to a loop invariant address could not "
1301 "be vectorized",
1302 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1303 return false;
1304 }
1305 }
1306 }
1307
1308 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1309 // For each invariant address, check its last stored value is the result
1310 // of one of our reductions.
1311 //
1312 // We do not check if dependence with loads exists because that is already
1313 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1314 ScalarEvolution *SE = PSE.getSE();
1315 SmallVector<StoreInst *, 4> UnhandledStores;
1316 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1318 // Earlier stores to this address are effectively deadcode.
1319 // With opaque pointers it is possible for one pointer to be used with
1320 // different sizes of stored values:
1321 // store i32 0, ptr %x
1322 // store i8 0, ptr %x
1323 // The latest store doesn't complitely overwrite the first one in the
1324 // example. That is why we have to make sure that types of stored
1325 // values are same.
1326 // TODO: Check that bitwidth of unhandled store is smaller then the
1327 // one that overwrites it and add a test.
1328 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1329 return storeToSameAddress(SE, SI, I) &&
1330 I->getValueOperand()->getType() ==
1331 SI->getValueOperand()->getType();
1332 });
1333 continue;
1334 }
1335 UnhandledStores.push_back(SI);
1336 }
1337
1338 bool IsOK = UnhandledStores.empty();
1339 // TODO: we should also validate against InvariantMemSets.
1340 if (!IsOK) {
1342 "We don't allow storing to uniform addresses",
1343 "write to a loop invariant address could not "
1344 "be vectorized",
1345 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1346 return false;
1347 }
1348 }
1349 }
1350
1351 PSE.addPredicate(LAI->getPSE().getPredicate());
1352 return true;
1353}
1354
1356 bool EnableStrictReductions) {
1357
1358 // First check if there is any ExactFP math or if we allow reassociations
1359 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1360 return true;
1361
1362 // If the above is false, we have ExactFPMath & do not allow reordering.
1363 // If the EnableStrictReductions flag is set, first check if we have any
1364 // Exact FP induction vars, which we cannot vectorize.
1365 if (!EnableStrictReductions ||
1366 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1367 InductionDescriptor IndDesc = Induction.second;
1368 return IndDesc.getExactFPMathInst();
1369 }))
1370 return false;
1371
1372 // We can now only vectorize if all reductions with Exact FP math also
1373 // have the isOrdered flag set, which indicates that we can move the
1374 // reduction operations in-loop.
1375 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1376 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1377 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1378 }));
1379}
1380
1382 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1383 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1384 return RdxDesc.IntermediateStore == SI;
1385 });
1386}
1387
1389 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1390 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1391 if (!RdxDesc.IntermediateStore)
1392 return false;
1393
1394 ScalarEvolution *SE = PSE.getSE();
1395 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1396 return V == InvariantAddress ||
1397 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1398 });
1399}
1400
1402 Value *In0 = const_cast<Value *>(V);
1404 if (!PN)
1405 return false;
1406
1407 return Inductions.count(PN);
1408}
1409
1410const InductionDescriptor *
1412 if (!isInductionPhi(Phi))
1413 return nullptr;
1414 auto &ID = getInductionVars().find(Phi)->second;
1415 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1417 return &ID;
1418 return nullptr;
1419}
1420
1421const InductionDescriptor *
1423 if (!isInductionPhi(Phi))
1424 return nullptr;
1425 auto &ID = getInductionVars().find(Phi)->second;
1427 return &ID;
1428 return nullptr;
1429}
1430
1432 const Value *V) const {
1433 auto *Inst = dyn_cast<Instruction>(V);
1434 return (Inst && InductionCastsToIgnore.count(Inst));
1435}
1436
1440
1442 const PHINode *Phi) const {
1443 return FixedOrderRecurrences.count(Phi);
1444}
1445
1447 const BasicBlock *BB) const {
1448 // When vectorizing early exits, create predicates for the latch block only.
1449 // The early exiting block must be a direct predecessor of the latch at the
1450 // moment.
1451 BasicBlock *Latch = TheLoop->getLoopLatch();
1453 assert(
1455 "Uncountable exiting block must be a direct predecessor of latch");
1456 return BB == Latch;
1457 }
1458 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1459}
1460
1461bool LoopVectorizationLegality::blockCanBePredicated(
1462 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1463 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1464 for (Instruction &I : *BB) {
1465 // We can predicate blocks with calls to assume, as long as we drop them in
1466 // case we flatten the CFG via predication.
1468 MaskedOp.insert(&I);
1469 continue;
1470 }
1471
1472 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1473 // TODO: there might be cases that it should block the vectorization. Let's
1474 // ignore those for now.
1476 continue;
1477
1478 // We can allow masked calls if there's at least one vector variant, even
1479 // if we end up scalarizing due to the cost model calculations.
1480 // TODO: Allow other calls if they have appropriate attributes... readonly
1481 // and argmemonly?
1482 if (CallInst *CI = dyn_cast<CallInst>(&I))
1484 MaskedOp.insert(CI);
1485 continue;
1486 }
1487
1488 // Loads are handled via masking (or speculated if safe to do so.)
1489 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1490 if (!SafePtrs.count(LI->getPointerOperand()))
1491 MaskedOp.insert(LI);
1492 continue;
1493 }
1494
1495 // Predicated store requires some form of masking:
1496 // 1) masked store HW instruction,
1497 // 2) emulation via load-blend-store (only if safe and legal to do so,
1498 // be aware on the race conditions), or
1499 // 3) element-by-element predicate check and scalar store.
1500 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1501 MaskedOp.insert(SI);
1502 continue;
1503 }
1504
1505 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1506 return false;
1507 }
1508
1509 return true;
1510}
1511
1512bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1513 if (!EnableIfConversion) {
1514 reportVectorizationFailure("If-conversion is disabled",
1515 "IfConversionDisabled", ORE, TheLoop);
1516 return false;
1517 }
1518
1519 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1520
1521 // A list of pointers which are known to be dereferenceable within scope of
1522 // the loop body for each iteration of the loop which executes. That is,
1523 // the memory pointed to can be dereferenced (with the access size implied by
1524 // the value's type) unconditionally within the loop header without
1525 // introducing a new fault.
1526 SmallPtrSet<Value *, 8> SafePointers;
1527
1528 // Collect safe addresses.
1529 for (BasicBlock *BB : TheLoop->blocks()) {
1530 if (!blockNeedsPredication(BB)) {
1531 for (Instruction &I : *BB)
1532 if (auto *Ptr = getLoadStorePointerOperand(&I))
1533 SafePointers.insert(Ptr);
1534 continue;
1535 }
1536
1537 // For a block which requires predication, a address may be safe to access
1538 // in the loop w/o predication if we can prove dereferenceability facts
1539 // sufficient to ensure it'll never fault within the loop. For the moment,
1540 // we restrict this to loads; stores are more complicated due to
1541 // concurrency restrictions.
1542 ScalarEvolution &SE = *PSE.getSE();
1544 for (Instruction &I : *BB) {
1545 LoadInst *LI = dyn_cast<LoadInst>(&I);
1546
1547 // Make sure we can execute all computations feeding into Ptr in the loop
1548 // w/o triggering UB and that none of the out-of-loop operands are poison.
1549 // We do not need to check if operations inside the loop can produce
1550 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1551 // because flags will be dropped when executing them unconditionally.
1552 // TODO: Results could be improved by considering poison-propagation
1553 // properties of visited ops.
1554 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1555 SmallVector<Value *> Worklist = {Ptr};
1556 SmallPtrSet<Value *, 4> Visited;
1557 while (!Worklist.empty()) {
1558 Value *CurrV = Worklist.pop_back_val();
1559 if (!Visited.insert(CurrV).second)
1560 continue;
1561
1562 auto *CurrI = dyn_cast<Instruction>(CurrV);
1563 if (!CurrI || !TheLoop->contains(CurrI)) {
1564 // If operands from outside the loop may be poison then Ptr may also
1565 // be poison.
1566 if (!isGuaranteedNotToBePoison(CurrV, AC,
1567 TheLoop->getLoopPredecessor()
1568 ->getTerminator()
1569 ->getIterator(),
1570 DT))
1571 return false;
1572 continue;
1573 }
1574
1575 // A loaded value may be poison, independent of any flags.
1576 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1577 return false;
1578
1579 // For other ops, assume poison can only be introduced via flags,
1580 // which can be dropped.
1581 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1582 return false;
1583 append_range(Worklist, CurrI->operands());
1584 }
1585 return true;
1586 };
1587 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1588 // that it will consider loops that need guarding by SCEV checks. The
1589 // vectoriser will generate these checks if we decide to vectorise.
1590 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1591 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1592 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1593 &Predicates))
1594 SafePointers.insert(LI->getPointerOperand());
1595 Predicates.clear();
1596 }
1597 }
1598
1599 // Collect the blocks that need predication.
1600 for (BasicBlock *BB : TheLoop->blocks()) {
1601 // We support only branches and switch statements as terminators inside the
1602 // loop.
1603 if (isa<SwitchInst>(BB->getTerminator())) {
1604 if (TheLoop->isLoopExiting(BB)) {
1605 reportVectorizationFailure("Loop contains an unsupported switch",
1606 "LoopContainsUnsupportedSwitch", ORE,
1607 TheLoop, BB->getTerminator());
1608 return false;
1609 }
1610 } else if (!isa<BranchInst>(BB->getTerminator())) {
1611 reportVectorizationFailure("Loop contains an unsupported terminator",
1612 "LoopContainsUnsupportedTerminator", ORE,
1613 TheLoop, BB->getTerminator());
1614 return false;
1615 }
1616
1617 // We must be able to predicate all blocks that need to be predicated.
1618 if (blockNeedsPredication(BB) &&
1619 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1621 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1622 ORE, TheLoop, BB->getTerminator());
1623 return false;
1624 }
1625 }
1626
1627 // We can if-convert this loop.
1628 return true;
1629}
1630
1631// Helper function to canVectorizeLoopNestCFG.
1632bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1633 bool UseVPlanNativePath) {
1634 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1635 "VPlan-native path is not enabled.");
1636
1637 // TODO: ORE should be improved to show more accurate information when an
1638 // outer loop can't be vectorized because a nested loop is not understood or
1639 // legal. Something like: "outer_loop_location: loop not vectorized:
1640 // (inner_loop_location) loop control flow is not understood by vectorizer".
1641
1642 // Store the result and return it at the end instead of exiting early, in case
1643 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1644 bool Result = true;
1645 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1646
1647 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1648 // be canonicalized.
1649 if (!Lp->getLoopPreheader()) {
1650 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1651 "loop control flow is not understood by vectorizer",
1652 "CFGNotUnderstood", ORE, TheLoop);
1653 if (DoExtraAnalysis)
1654 Result = false;
1655 else
1656 return false;
1657 }
1658
1659 // We must have a single backedge.
1660 if (Lp->getNumBackEdges() != 1) {
1661 reportVectorizationFailure("The loop must have a single backedge",
1662 "loop control flow is not understood by vectorizer",
1663 "CFGNotUnderstood", ORE, TheLoop);
1664 if (DoExtraAnalysis)
1665 Result = false;
1666 else
1667 return false;
1668 }
1669
1670 // The latch must be terminated by a BranchInst.
1671 BasicBlock *Latch = Lp->getLoopLatch();
1672 if (Latch && !isa<BranchInst>(Latch->getTerminator())) {
1674 "The loop latch terminator is not a BranchInst",
1675 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1676 ORE, TheLoop);
1677 if (DoExtraAnalysis)
1678 Result = false;
1679 else
1680 return false;
1681 }
1682
1683 return Result;
1684}
1685
1686bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1687 Loop *Lp, bool UseVPlanNativePath) {
1688 // Store the result and return it at the end instead of exiting early, in case
1689 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1690 bool Result = true;
1691 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1692 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1693 if (DoExtraAnalysis)
1694 Result = false;
1695 else
1696 return false;
1697 }
1698
1699 // Recursively check whether the loop control flow of nested loops is
1700 // understood.
1701 for (Loop *SubLp : *Lp)
1702 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1703 if (DoExtraAnalysis)
1704 Result = false;
1705 else
1706 return false;
1707 }
1708
1709 return Result;
1710}
1711
1712bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1713 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1714 if (!LatchBB) {
1715 reportVectorizationFailure("Loop does not have a latch",
1716 "Cannot vectorize early exit loop",
1717 "NoLatchEarlyExit", ORE, TheLoop);
1718 return false;
1719 }
1720
1721 if (Reductions.size() || FixedOrderRecurrences.size()) {
1723 "Found reductions or recurrences in early-exit loop",
1724 "Cannot vectorize early exit loop with reductions or recurrences",
1725 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1726 return false;
1727 }
1728
1729 SmallVector<BasicBlock *, 8> ExitingBlocks;
1730 TheLoop->getExitingBlocks(ExitingBlocks);
1731
1732 // Keep a record of all the exiting blocks.
1734 BasicBlock *SingleUncountableExitingBlock = nullptr;
1735 for (BasicBlock *BB : ExitingBlocks) {
1736 const SCEV *EC =
1737 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1738 if (isa<SCEVCouldNotCompute>(EC)) {
1739 if (size(successors(BB)) != 2) {
1741 "Early exiting block does not have exactly two successors",
1742 "Incorrect number of successors from early exiting block",
1743 "EarlyExitTooManySuccessors", ORE, TheLoop);
1744 return false;
1745 }
1746
1747 if (SingleUncountableExitingBlock) {
1749 "Loop has too many uncountable exits",
1750 "Cannot vectorize early exit loop with more than one early exit",
1751 "TooManyUncountableEarlyExits", ORE, TheLoop);
1752 return false;
1753 }
1754
1755 SingleUncountableExitingBlock = BB;
1756 } else
1757 CountableExitingBlocks.push_back(BB);
1758 }
1759 // We can safely ignore the predicates here because when vectorizing the loop
1760 // the PredicatatedScalarEvolution class will keep track of all predicates
1761 // for each exiting block anyway. This happens when calling
1762 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1763 Predicates.clear();
1764
1765 if (!SingleUncountableExitingBlock) {
1766 LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits");
1767 return false;
1768 }
1769
1770 // The only supported early exit loops so far are ones where the early
1771 // exiting block is a unique predecessor of the latch block.
1772 BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1773 if (LatchPredBB != SingleUncountableExitingBlock) {
1774 reportVectorizationFailure("Early exit is not the latch predecessor",
1775 "Cannot vectorize early exit loop",
1776 "EarlyExitNotLatchPredecessor", ORE, TheLoop);
1777 return false;
1778 }
1779
1780 // The latch block must have a countable exit.
1782 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1784 "Cannot determine exact exit count for latch block",
1785 "Cannot vectorize early exit loop",
1786 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1787 return false;
1788 }
1789 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1790 "Latch block not found in list of countable exits!");
1791
1792 // Check to see if there are instructions that could potentially generate
1793 // exceptions or have side-effects.
1794 auto IsSafeOperation = [](Instruction *I) -> bool {
1795 switch (I->getOpcode()) {
1796 case Instruction::Load:
1797 case Instruction::Store:
1798 case Instruction::PHI:
1799 case Instruction::Br:
1800 // These are checked separately.
1801 return true;
1802 default:
1804 }
1805 };
1806
1807 bool HasSideEffects = false;
1808 for (auto *BB : TheLoop->blocks())
1809 for (auto &I : *BB) {
1810 if (I.mayWriteToMemory()) {
1811 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1812 HasSideEffects = true;
1813 continue;
1814 }
1815
1816 // We don't support complex writes to memory.
1818 "Complex writes to memory unsupported in early exit loops",
1819 "Cannot vectorize early exit loop with complex writes to memory",
1820 "WritesInEarlyExitLoop", ORE, TheLoop);
1821 return false;
1822 }
1823
1824 if (!IsSafeOperation(&I)) {
1825 reportVectorizationFailure("Early exit loop contains operations that "
1826 "cannot be speculatively executed",
1827 "UnsafeOperationsEarlyExitLoop", ORE,
1828 TheLoop);
1829 return false;
1830 }
1831 }
1832
1833 // The vectoriser cannot handle loads that occur after the early exit block.
1834 assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
1835 "Expected latch predecessor to be the early exiting block");
1836
1837 SmallVector<LoadInst *, 4> NonDerefLoads;
1838 // TODO: Handle loops that may fault.
1839 if (!HasSideEffects) {
1840 // Read-only loop.
1841 Predicates.clear();
1842 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1843 &Predicates)) {
1845 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1846 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1847 return false;
1848 }
1849 } else if (!canUncountableExitConditionLoadBeMoved(
1850 SingleUncountableExitingBlock))
1851 return false;
1852
1853 // Check non-dereferenceable loads if any.
1854 for (LoadInst *LI : NonDerefLoads) {
1855 // Only support unit-stride access for now.
1856 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1857 if (Stride != 1) {
1859 "Loop contains potentially faulting strided load",
1860 "Cannot vectorize early exit loop with "
1861 "strided fault-only-first load",
1862 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1863 return false;
1864 }
1865 PotentiallyFaultingLoads.insert(LI);
1866 LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
1867 << "\n");
1868 }
1869
1870 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1871 PSE.getSymbolicMaxBackedgeTakenCount();
1872 // Since we have an exact exit count for the latch and the early exit
1873 // dominates the latch, then this should guarantee a computed SCEV value.
1874 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1875 "Failed to get symbolic expression for backedge taken count");
1876 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1877 "backedge taken count: "
1878 << *SymbolicMaxBTC << '\n');
1879 UncountableExitingBB = SingleUncountableExitingBlock;
1880 UncountableExitWithSideEffects = HasSideEffects;
1881 return true;
1882}
1883
1884bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1885 BasicBlock *ExitingBlock) {
1886 // Try to find a load in the critical path for the uncountable exit condition.
1887 // This is currently matching about the simplest form we can, expecting
1888 // only one in-loop load, the result of which is directly compared against
1889 // a loop-invariant value.
1890 // FIXME: We're insisting on a single use for now, because otherwise we will
1891 // need to make PHI nodes for other users. That can be done once the initial
1892 // transform code lands.
1893 auto *Br = cast<BranchInst>(ExitingBlock->getTerminator());
1894
1895 using namespace llvm::PatternMatch;
1896 Instruction *L = nullptr;
1897 Value *Ptr = nullptr;
1898 Value *R = nullptr;
1899 if (!match(Br->getCondition(),
1901 m_Value(R))))) {
1903 "Early exit loop with store but no supported condition load",
1904 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1905 return false;
1906 }
1907
1908 // FIXME: Don't rely on operand ordering for the comparison.
1909 if (!TheLoop->isLoopInvariant(R)) {
1911 "Early exit loop with store but no supported condition load",
1912 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1913 return false;
1914 }
1915
1916 // Make sure that the load address is not loop invariant; we want an
1917 // address calculation that we can rotate to the next vector iteration.
1918 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1919 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1921 "Uncountable exit condition depends on load with an address that is "
1922 "not an add recurrence in the loop",
1923 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1924 return false;
1925 }
1926
1927 // FIXME: Support gathers after first-faulting load support lands.
1929 LoadInst *Load = cast<LoadInst>(L);
1930 if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
1931 &Predicates)) {
1933 "Loop may fault",
1934 "Cannot vectorize potentially faulting early exit loop",
1935 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1936 return false;
1937 }
1938
1939 ICFLoopSafetyInfo SafetyInfo;
1940 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1941 // We need to know that load will be executed before we can hoist a
1942 // copy out to run just before the first iteration.
1943 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1945 "Load for uncountable exit not guaranteed to execute",
1946 "ConditionalUncountableExitLoad", ORE, TheLoop);
1947 return false;
1948 }
1949
1950 // Prohibit any potential aliasing with any instruction in the loop which
1951 // might store to memory.
1952 // FIXME: Relax this constraint where possible.
1953 for (auto *BB : TheLoop->blocks()) {
1954 for (auto &I : *BB) {
1955 if (&I == Load)
1956 continue;
1957
1958 if (I.mayWriteToMemory()) {
1959 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1960 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1961 if (AR == AliasResult::NoAlias)
1962 continue;
1963 }
1964
1966 "Cannot determine whether critical uncountable exit load address "
1967 "does not alias with a memory write",
1968 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1969 return false;
1970 }
1971 }
1972 }
1973
1974 return true;
1975}
1976
1977bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1978 // Store the result and return it at the end instead of exiting early, in case
1979 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1980 bool Result = true;
1981
1982 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1983 // Check whether the loop-related control flow in the loop nest is expected by
1984 // vectorizer.
1985 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1986 if (DoExtraAnalysis) {
1987 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1988 Result = false;
1989 } else {
1990 return false;
1991 }
1992 }
1993
1994 // We need to have a loop header.
1995 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1996 << '\n');
1997
1998 // Specific checks for outer loops. We skip the remaining legal checks at this
1999 // point because they don't support outer loops.
2000 if (!TheLoop->isInnermost()) {
2001 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
2002
2003 if (!canVectorizeOuterLoop()) {
2004 reportVectorizationFailure("Unsupported outer loop",
2005 "UnsupportedOuterLoop", ORE, TheLoop);
2006 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
2007 // outer loops.
2008 return false;
2009 }
2010
2011 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
2012 return Result;
2013 }
2014
2015 assert(TheLoop->isInnermost() && "Inner loop expected.");
2016 // Check if we can if-convert non-single-bb loops.
2017 unsigned NumBlocks = TheLoop->getNumBlocks();
2018 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
2019 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
2020 if (DoExtraAnalysis)
2021 Result = false;
2022 else
2023 return false;
2024 }
2025
2026 // Check if we can vectorize the instructions and CFG in this loop.
2027 if (!canVectorizeInstrs()) {
2028 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
2029 if (DoExtraAnalysis)
2030 Result = false;
2031 else
2032 return false;
2033 }
2034
2035 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
2036 if (TheLoop->getExitingBlock()) {
2037 reportVectorizationFailure("Cannot vectorize uncountable loop",
2038 "UnsupportedUncountableLoop", ORE, TheLoop);
2039 if (DoExtraAnalysis)
2040 Result = false;
2041 else
2042 return false;
2043 } else {
2044 if (!isVectorizableEarlyExitLoop()) {
2047 "Must be false without vectorizable early-exit loop");
2048 if (DoExtraAnalysis)
2049 Result = false;
2050 else
2051 return false;
2052 }
2053 }
2054 }
2055
2056 // Go over each instruction and look at memory deps.
2057 if (!canVectorizeMemory()) {
2058 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
2059 if (DoExtraAnalysis)
2060 Result = false;
2061 else
2062 return false;
2063 }
2064
2065 // Bail out for state-changing loops with uncountable exits for now.
2066 if (UncountableExitWithSideEffects) {
2068 "Writes to memory unsupported in early exit loops",
2069 "Cannot vectorize early exit loop with writes to memory",
2070 "WritesInEarlyExitLoop", ORE, TheLoop);
2071 return false;
2072 }
2073
2074 if (Result) {
2075 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2076 << (LAI->getRuntimePointerChecking()->Need
2077 ? " (with a runtime bound check)"
2078 : "")
2079 << "!\n");
2080 }
2081
2082 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
2083 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
2084 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
2085
2086 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2087 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2088 "due to SCEVThreshold");
2089 reportVectorizationFailure("Too many SCEV checks needed",
2090 "Too many SCEV assumptions need to be made and checked at runtime",
2091 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2092 if (DoExtraAnalysis)
2093 Result = false;
2094 else
2095 return false;
2096 }
2097
2098 // Okay! We've done all the tests. If any have failed, return false. Otherwise
2099 // we can vectorize, and at this point we don't have any other mem analysis
2100 // which may limit our maximum vectorization factor, so just return true with
2101 // no restrictions.
2102 return Result;
2103}
2104
2106 // The only loops we can vectorize without a scalar epilogue, are loops with
2107 // a bottom-test and a single exiting block. We'd have to handle the fact
2108 // that not every instruction executes on the last iteration. This will
2109 // require a lane mask which varies through the vector loop body. (TODO)
2110 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2111 LLVM_DEBUG(
2112 dbgs()
2113 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2114 return false;
2115 }
2116
2117 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2118
2119 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
2120
2121 for (const auto &Reduction : getReductionVars())
2122 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
2123
2124 for (const auto &Entry : getInductionVars()) {
2125 PHINode *OrigPhi = Entry.first;
2126 for (User *U : OrigPhi->users()) {
2127 auto *UI = cast<Instruction>(U);
2128 if (!TheLoop->contains(UI)) {
2129 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
2130 "outside user for "
2131 << *UI << "\n");
2132 return false;
2133 }
2134 }
2135 }
2136
2137 // The list of pointers that we can safely read and write to remains empty.
2138 SmallPtrSet<Value *, 8> SafePointers;
2139
2140 // Check all blocks for predication, including those that ordinarily do not
2141 // need predication such as the header block.
2143 for (BasicBlock *BB : TheLoop->blocks()) {
2144 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2145 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2146 return false;
2147 }
2148 }
2149
2150 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2151
2152 return true;
2153}
2154
2156 // The list of pointers that we can safely read and write to remains empty.
2157 SmallPtrSet<Value *, 8> SafePointers;
2158
2159 // Mark all blocks for predication, including those that ordinarily do not
2160 // need predication such as the header block.
2161 for (BasicBlock *BB : TheLoop->blocks()) {
2162 [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
2163 assert(R && "Must be able to predicate block when tail-folding.");
2164 }
2165}
2166
2167} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has exactly one uncountable early exit, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
BasicBlock * getUncountableEarlyExitingBlock() const
Returns the uncountable early exiting block, if there is exactly one.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:61
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:151
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:502
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1440
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
iterator find(const KeyT &Key)
Definition MapVector.h:154
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Value * getOperand(unsigned i) const
Definition User.h:232
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:74
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:420
static bool canWidenCallReturnType(Type *Ty)
Returns true if the call return type Ty can be widened by the loop vectorizer.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:875
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2132
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.