LLVM  15.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/PatternMatch.h"
29 
30 using namespace llvm;
31 using namespace PatternMatch;
32 
33 #define LV_NAME "loop-vectorize"
34 #define DEBUG_TYPE LV_NAME
35 
36 static cl::opt<bool>
37  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38  cl::desc("Enable if-conversion during vectorization."));
39 
40 namespace llvm {
42  HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
43  cl::desc("Allow enabling loop hints to reorder "
44  "FP operations during vectorization."));
45 }
46 
47 // TODO: Move size-based thresholds out of legality checking, make cost based
48 // decisions instead of hard thresholds.
50  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
51  cl::desc("The maximum number of SCEV checks allowed."));
52 
54  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
55  cl::desc("The maximum number of SCEV checks allowed with a "
56  "vectorize(enable) pragma"));
57 
60  "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
61  cl::Hidden,
62  cl::desc("Control whether the compiler can use scalable vectors to "
63  "vectorize a loop"),
64  cl::values(
66  "Scalable vectorization is disabled."),
67  clEnumValN(
69  "Scalable vectorization is available and favored when the "
70  "cost is inconclusive."),
71  clEnumValN(
73  "Scalable vectorization is available and favored when the "
74  "cost is inconclusive.")));
75 
76 /// Maximum vectorization interleave count.
77 static const unsigned MaxInterleaveFactor = 16;
78 
79 namespace llvm {
80 
81 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
82  switch (Kind) {
83  case HK_WIDTH:
84  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
85  case HK_INTERLEAVE:
86  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
87  case HK_FORCE:
88  return (Val <= 1);
89  case HK_ISVECTORIZED:
90  case HK_PREDICATE:
91  case HK_SCALABLE:
92  return (Val == 0 || Val == 1);
93  }
94  return false;
95 }
96 
98  bool InterleaveOnlyWhenForced,
100  const TargetTransformInfo *TTI)
101  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
102  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
103  Force("vectorize.enable", FK_Undefined, HK_FORCE),
104  IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
105  Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
106  Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
107  TheLoop(L), ORE(ORE) {
108  // Populate values with existing loop metadata.
109  getHintsFromMetadata();
110 
111  // force-vector-interleave overrides DisableInterleaving.
114 
115  // If the metadata doesn't explicitly specify whether to enable scalable
116  // vectorization, then decide based on the following criteria (increasing
117  // level of priority):
118  // - Target default
119  // - Metadata width
120  // - Force option (always overrides)
121  if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
122  if (TTI)
125 
126  if (Width.Value)
127  // If the width is set, but the metadata says nothing about the scalable
128  // property, then assume it concerns only a fixed-width UserVF.
129  // If width is not set, the flag takes precedence.
130  Scalable.Value = SK_FixedWidthOnly;
131  }
132 
133  // If the flag is set to force any use of scalable vectors, override the loop
134  // hints.
135  if (ForceScalableVectorization.getValue() !=
137  Scalable.Value = ForceScalableVectorization.getValue();
138 
139  // Scalable vectorization is disabled if no preference is specified.
141  Scalable.Value = SK_FixedWidthOnly;
142 
143  if (IsVectorized.Value != 1)
144  // If the vectorization width and interleaving count are both 1 then
145  // consider the loop to have been already vectorized because there's
146  // nothing more that we can do.
147  IsVectorized.Value =
149  LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
150  << "LV: Interleaving disabled by the pass manager\n");
151 }
152 
154  LLVMContext &Context = TheLoop->getHeader()->getContext();
155 
156  MDNode *IsVectorizedMD = MDNode::get(
157  Context,
158  {MDString::get(Context, "llvm.loop.isvectorized"),
160  MDNode *LoopID = TheLoop->getLoopID();
161  MDNode *NewLoopID =
163  {Twine(Prefix(), "vectorize.").str(),
164  Twine(Prefix(), "interleave.").str()},
165  {IsVectorizedMD});
166  TheLoop->setLoopID(NewLoopID);
167 
168  // Update internal cache.
169  IsVectorized.Value = 1;
170 }
171 
173  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
175  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
177  return false;
178  }
179 
180  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
181  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
183  return false;
184  }
185 
186  if (getIsVectorized() == 1) {
187  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
188  // FIXME: Add interleave.disable metadata. This will allow
189  // vectorize.disable to be used without disabling the pass and errors
190  // to differentiate between disabled vectorization and a width of 1.
191  ORE.emit([&]() {
193  "AllDisabled", L->getStartLoc(),
194  L->getHeader())
195  << "loop not vectorized: vectorization and interleaving are "
196  "explicitly disabled, or the loop has already been "
197  "vectorized";
198  });
199  return false;
200  }
201 
202  return true;
203 }
204 
206  using namespace ore;
207 
208  ORE.emit([&]() {
209  if (Force.Value == LoopVectorizeHints::FK_Disabled)
210  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
211  TheLoop->getStartLoc(),
212  TheLoop->getHeader())
213  << "loop not vectorized: vectorization is explicitly disabled";
214  else {
215  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
216  TheLoop->getStartLoc(), TheLoop->getHeader());
217  R << "loop not vectorized";
218  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
219  R << " (Force=" << NV("Force", true);
220  if (Width.Value != 0)
221  R << ", Vector Width=" << NV("VectorWidth", getWidth());
222  if (getInterleave() != 0)
223  R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
224  R << ")";
225  }
226  return R;
227  }
228  });
229 }
230 
232  if (getWidth() == ElementCount::getFixed(1))
233  return LV_NAME;
235  return LV_NAME;
237  return LV_NAME;
239 }
240 
242  // Allow the vectorizer to change the order of operations if enabling
243  // loop hints are provided
244  ElementCount EC = getWidth();
245  return HintsAllowReordering &&
247  EC.getKnownMinValue() > 1);
248 }
249 
250 void LoopVectorizeHints::getHintsFromMetadata() {
251  MDNode *LoopID = TheLoop->getLoopID();
252  if (!LoopID)
253  return;
254 
255  // First operand should refer to the loop id itself.
256  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
257  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
258 
259  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
260  const MDString *S = nullptr;
262 
263  // The expected hint is either a MDString or a MDNode with the first
264  // operand a MDString.
265  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
266  if (!MD || MD->getNumOperands() == 0)
267  continue;
268  S = dyn_cast<MDString>(MD->getOperand(0));
269  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
270  Args.push_back(MD->getOperand(i));
271  } else {
272  S = dyn_cast<MDString>(LoopID->getOperand(i));
273  assert(Args.size() == 0 && "too many arguments for MDString");
274  }
275 
276  if (!S)
277  continue;
278 
279  // Check if the hint starts with the loop metadata prefix.
280  StringRef Name = S->getString();
281  if (Args.size() == 1)
282  setHint(Name, Args[0]);
283  }
284 }
285 
286 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
287  if (!Name.startswith(Prefix()))
288  return;
289  Name = Name.substr(Prefix().size(), StringRef::npos);
290 
291  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
292  if (!C)
293  return;
294  unsigned Val = C->getZExtValue();
295 
296  Hint *Hints[] = {&Width, &Interleave, &Force,
297  &IsVectorized, &Predicate, &Scalable};
298  for (auto H : Hints) {
299  if (Name == H->Name) {
300  if (H->validate(Val))
301  H->Value = Val;
302  else
303  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
304  break;
305  }
306  }
307 }
308 
309 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
310 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
311 // executing the inner loop will execute the same iterations). This check is
312 // very constrained for now but it will be relaxed in the future. \p Lp is
313 // considered uniform if it meets all the following conditions:
314 // 1) it has a canonical IV (starting from 0 and with stride 1),
315 // 2) its latch terminator is a conditional branch and,
316 // 3) its latch condition is a compare instruction whose operands are the
317 // canonical IV and an OuterLp invariant.
318 // This check doesn't take into account the uniformity of other conditions not
319 // related to the loop latch because they don't affect the loop uniformity.
320 //
321 // NOTE: We decided to keep all these checks and its associated documentation
322 // together so that we can easily have a picture of the current supported loop
323 // nests. However, some of the current checks don't depend on \p OuterLp and
324 // would be redundantly executed for each \p Lp if we invoked this function for
325 // different candidate outer loops. This is not the case for now because we
326 // don't currently have the infrastructure to evaluate multiple candidate outer
327 // loops and \p OuterLp will be a fixed parameter while we only support explicit
328 // outer loop vectorization. It's also very likely that these checks go away
329 // before introducing the aforementioned infrastructure. However, if this is not
330 // the case, we should move the \p OuterLp independent checks to a separate
331 // function that is only executed once for each \p Lp.
332 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
333  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
334 
335  // If Lp is the outer loop, it's uniform by definition.
336  if (Lp == OuterLp)
337  return true;
338  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
339 
340  // 1.
342  if (!IV) {
343  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
344  return false;
345  }
346 
347  // 2.
348  BasicBlock *Latch = Lp->getLoopLatch();
349  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
350  if (!LatchBr || LatchBr->isUnconditional()) {
351  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
352  return false;
353  }
354 
355  // 3.
356  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
357  if (!LatchCmp) {
358  LLVM_DEBUG(
359  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
360  return false;
361  }
362 
363  Value *CondOp0 = LatchCmp->getOperand(0);
364  Value *CondOp1 = LatchCmp->getOperand(1);
365  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
366  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
367  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
368  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
369  return false;
370  }
371 
372  return true;
373 }
374 
375 // Return true if \p Lp and all its nested loops are uniform with regard to \p
376 // OuterLp.
377 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
378  if (!isUniformLoop(Lp, OuterLp))
379  return false;
380 
381  // Check if nested loops are uniform.
382  for (Loop *SubLp : *Lp)
383  if (!isUniformLoopNest(SubLp, OuterLp))
384  return false;
385 
386  return true;
387 }
388 
389 /// Check whether it is safe to if-convert this phi node.
390 ///
391 /// Phi nodes with constant expressions that can trap are not safe to if
392 /// convert.
394  for (PHINode &Phi : BB->phis()) {
395  for (Value *V : Phi.incoming_values())
396  if (auto *C = dyn_cast<Constant>(V))
397  if (C->canTrap())
398  return false;
399  }
400  return true;
401 }
402 
404  if (Ty->isPointerTy())
405  return DL.getIntPtrType(Ty);
406 
407  // It is possible that char's or short's overflow when we ask for the loop's
408  // trip count, work around this by changing the type size.
409  if (Ty->getScalarSizeInBits() < 32)
410  return Type::getInt32Ty(Ty->getContext());
411 
412  return Ty;
413 }
414 
415 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
416  Ty0 = convertPointerToIntegerType(DL, Ty0);
417  Ty1 = convertPointerToIntegerType(DL, Ty1);
418  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
419  return Ty0;
420  return Ty1;
421 }
422 
423 /// Check that the instruction has outside loop users and is not an
424 /// identified reduction variable.
425 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
426  SmallPtrSetImpl<Value *> &AllowedExit) {
427  // Reductions, Inductions and non-header phis are allowed to have exit users. All
428  // other instructions must not have external users.
429  if (!AllowedExit.count(Inst))
430  // Check that all of the users of the loop are inside the BB.
431  for (User *U : Inst->users()) {
432  Instruction *UI = cast<Instruction>(U);
433  // This user may be a reduction exit value.
434  if (!TheLoop->contains(UI)) {
435  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
436  return true;
437  }
438  }
439  return false;
440 }
441 
442 /// Returns true if A and B have same pointer operands or same SCEVs addresses
444  StoreInst *B) {
445  // Compare store
446  if (A == B)
447  return true;
448 
449  // Otherwise Compare pointers
450  Value *APtr = A->getPointerOperand();
451  Value *BPtr = B->getPointerOperand();
452  if (APtr == BPtr)
453  return true;
454 
455  // Otherwise compare address SCEVs
456  if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
457  return true;
458 
459  return false;
460 }
461 
463  Value *Ptr) const {
464  const ValueToValueMap &Strides =
465  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
466 
467  Function *F = TheLoop->getHeader()->getParent();
468  bool OptForSize = F->hasOptSize() ||
469  llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
471  bool CanAddPredicate = !OptForSize;
472  int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
473  CanAddPredicate, false);
474  if (Stride == 1 || Stride == -1)
475  return Stride;
476  return 0;
477 }
478 
480  return LAI->isUniform(V);
481 }
482 
483 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
484  assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
485  // Store the result and return it at the end instead of exiting early, in case
486  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
487  bool Result = true;
488  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
489 
490  for (BasicBlock *BB : TheLoop->blocks()) {
491  // Check whether the BB terminator is a BranchInst. Any other terminator is
492  // not supported yet.
493  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
494  if (!Br) {
495  reportVectorizationFailure("Unsupported basic block terminator",
496  "loop control flow is not understood by vectorizer",
497  "CFGNotUnderstood", ORE, TheLoop);
498  if (DoExtraAnalysis)
499  Result = false;
500  else
501  return false;
502  }
503 
504  // Check whether the BranchInst is a supported one. Only unconditional
505  // branches, conditional branches with an outer loop invariant condition or
506  // backedges are supported.
507  // FIXME: We skip these checks when VPlan predication is enabled as we
508  // want to allow divergent branches. This whole check will be removed
509  // once VPlan predication is on by default.
510  if (Br && Br->isConditional() &&
511  !TheLoop->isLoopInvariant(Br->getCondition()) &&
512  !LI->isLoopHeader(Br->getSuccessor(0)) &&
513  !LI->isLoopHeader(Br->getSuccessor(1))) {
514  reportVectorizationFailure("Unsupported conditional branch",
515  "loop control flow is not understood by vectorizer",
516  "CFGNotUnderstood", ORE, TheLoop);
517  if (DoExtraAnalysis)
518  Result = false;
519  else
520  return false;
521  }
522  }
523 
524  // Check whether inner loops are uniform. At this point, we only support
525  // simple outer loops scenarios with uniform nested loops.
526  if (!isUniformLoopNest(TheLoop /*loop nest*/,
527  TheLoop /*context outer loop*/)) {
528  reportVectorizationFailure("Outer loop contains divergent loops",
529  "loop control flow is not understood by vectorizer",
530  "CFGNotUnderstood", ORE, TheLoop);
531  if (DoExtraAnalysis)
532  Result = false;
533  else
534  return false;
535  }
536 
537  // Check whether we are able to set up outer loop induction.
538  if (!setupOuterLoopInductions()) {
539  reportVectorizationFailure("Unsupported outer loop Phi(s)",
540  "Unsupported outer loop Phi(s)",
541  "UnsupportedPhi", ORE, TheLoop);
542  if (DoExtraAnalysis)
543  Result = false;
544  else
545  return false;
546  }
547 
548  return Result;
549 }
550 
551 void LoopVectorizationLegality::addInductionPhi(
552  PHINode *Phi, const InductionDescriptor &ID,
553  SmallPtrSetImpl<Value *> &AllowedExit) {
554  Inductions[Phi] = ID;
555 
556  // In case this induction also comes with casts that we know we can ignore
557  // in the vectorized loop body, record them here. All casts could be recorded
558  // here for ignoring, but suffices to record only the first (as it is the
559  // only one that may bw used outside the cast sequence).
560  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
561  if (!Casts.empty())
562  InductionCastsToIgnore.insert(*Casts.begin());
563 
564  Type *PhiTy = Phi->getType();
565  const DataLayout &DL = Phi->getModule()->getDataLayout();
566 
567  // Get the widest type.
568  if (!PhiTy->isFloatingPointTy()) {
569  if (!WidestIndTy)
570  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
571  else
572  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
573  }
574 
575  // Int inductions are special because we only allow one IV.
576  if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
577  ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
578  isa<Constant>(ID.getStartValue()) &&
579  cast<Constant>(ID.getStartValue())->isNullValue()) {
580 
581  // Use the phi node with the widest type as induction. Use the last
582  // one if there are multiple (no good reason for doing this other
583  // than it is expedient). We've checked that it begins at zero and
584  // steps by one, so this is a canonical induction variable.
585  if (!PrimaryInduction || PhiTy == WidestIndTy)
586  PrimaryInduction = Phi;
587  }
588 
589  // Both the PHI node itself, and the "post-increment" value feeding
590  // back into the PHI node may have external users.
591  // We can allow those uses, except if the SCEVs we have for them rely
592  // on predicates that only hold within the loop, since allowing the exit
593  // currently means re-using this SCEV outside the loop (see PR33706 for more
594  // details).
595  if (PSE.getPredicate().isAlwaysTrue()) {
596  AllowedExit.insert(Phi);
597  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
598  }
599 
600  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
601 }
602 
603 bool LoopVectorizationLegality::setupOuterLoopInductions() {
604  BasicBlock *Header = TheLoop->getHeader();
605 
606  // Returns true if a given Phi is a supported induction.
607  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
609  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
611  addInductionPhi(&Phi, ID, AllowedExit);
612  return true;
613  } else {
614  // Bail out for any Phi in the outer loop header that is not a supported
615  // induction.
616  LLVM_DEBUG(
617  dbgs()
618  << "LV: Found unsupported PHI for outer loop vectorization.\n");
619  return false;
620  }
621  };
622 
623  if (llvm::all_of(Header->phis(), isSupportedPhi))
624  return true;
625  else
626  return false;
627 }
628 
629 /// Checks if a function is scalarizable according to the TLI, in
630 /// the sense that it should be vectorized and then expanded in
631 /// multiple scalar calls. This is represented in the
632 /// TLI via mappings that do not specify a vector name, as in the
633 /// following example:
634 ///
635 /// const VecDesc VecIntrinsics[] = {
636 /// {"llvm.phx.abs.i32", "", 4}
637 /// };
638 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
639  const StringRef ScalarName = CI.getCalledFunction()->getName();
640  bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
641  // Check that all known VFs are not associated to a vector
642  // function, i.e. the vector name is emty.
643  if (Scalarize) {
644  ElementCount WidestFixedVF, WidestScalableVF;
645  TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
647  ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
648  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
650  ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
651  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
652  assert((WidestScalableVF.isZero() || !Scalarize) &&
653  "Caller may decide to scalarize a variant using a scalable VF");
654  }
655  return Scalarize;
656 }
657 
658 bool LoopVectorizationLegality::canVectorizeInstrs() {
659  BasicBlock *Header = TheLoop->getHeader();
660 
661  // For each block in the loop.
662  for (BasicBlock *BB : TheLoop->blocks()) {
663  // Scan the instructions in the block and look for hazards.
664  for (Instruction &I : *BB) {
665  if (auto *Phi = dyn_cast<PHINode>(&I)) {
666  Type *PhiTy = Phi->getType();
667  // Check that this PHI type is allowed.
668  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
669  !PhiTy->isPointerTy()) {
670  reportVectorizationFailure("Found a non-int non-pointer PHI",
671  "loop control flow is not understood by vectorizer",
672  "CFGNotUnderstood", ORE, TheLoop);
673  return false;
674  }
675 
676  // If this PHINode is not in the header block, then we know that we
677  // can convert it to select during if-conversion. No need to check if
678  // the PHIs in this block are induction or reduction variables.
679  if (BB != Header) {
680  // Non-header phi nodes that have outside uses can be vectorized. Add
681  // them to the list of allowed exits.
682  // Unsafe cyclic dependencies with header phis are identified during
683  // legalization for reduction, induction and first order
684  // recurrences.
685  AllowedExit.insert(&I);
686  continue;
687  }
688 
689  // We only allow if-converted PHIs with exactly two incoming values.
690  if (Phi->getNumIncomingValues() != 2) {
691  reportVectorizationFailure("Found an invalid PHI",
692  "loop control flow is not understood by vectorizer",
693  "CFGNotUnderstood", ORE, TheLoop, Phi);
694  return false;
695  }
696 
697  RecurrenceDescriptor RedDes;
698  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
699  DT, PSE.getSE())) {
700  Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
701  AllowedExit.insert(RedDes.getLoopExitInstr());
702  Reductions[Phi] = RedDes;
703  continue;
704  }
705 
706  // TODO: Instead of recording the AllowedExit, it would be good to record the
707  // complementary set: NotAllowedExit. These include (but may not be
708  // limited to):
709  // 1. Reduction phis as they represent the one-before-last value, which
710  // is not available when vectorized
711  // 2. Induction phis and increment when SCEV predicates cannot be used
712  // outside the loop - see addInductionPhi
713  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
714  // outside the loop - see call to hasOutsideLoopUser in the non-phi
715  // handling below
716  // 4. FirstOrderRecurrence phis that can possibly be handled by
717  // extraction.
718  // By recording these, we can then reason about ways to vectorize each
719  // of these NotAllowedExit.
721  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
722  addInductionPhi(Phi, ID, AllowedExit);
723  Requirements->addExactFPMathInst(ID.getExactFPMathInst());
724  continue;
725  }
726 
728  SinkAfter, DT)) {
729  AllowedExit.insert(Phi);
730  FirstOrderRecurrences.insert(Phi);
731  continue;
732  }
733 
734  // As a last resort, coerce the PHI to a AddRec expression
735  // and re-try classifying it a an induction PHI.
736  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
737  addInductionPhi(Phi, ID, AllowedExit);
738  continue;
739  }
740 
741  reportVectorizationFailure("Found an unidentified PHI",
742  "value that could not be identified as "
743  "reduction is used outside the loop",
744  "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
745  return false;
746  } // end of PHI handling
747 
748  // We handle calls that:
749  // * Are debug info intrinsics.
750  // * Have a mapping to an IR intrinsic.
751  // * Have a vector version available.
752  auto *CI = dyn_cast<CallInst>(&I);
753 
754  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
755  !isa<DbgInfoIntrinsic>(CI) &&
756  !(CI->getCalledFunction() && TLI &&
757  (!VFDatabase::getMappings(*CI).empty() ||
758  isTLIScalarize(*TLI, *CI)))) {
759  // If the call is a recognized math libary call, it is likely that
760  // we can vectorize it given loosened floating-point constraints.
761  LibFunc Func;
762  bool IsMathLibCall =
763  TLI && CI->getCalledFunction() &&
764  CI->getType()->isFloatingPointTy() &&
765  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
766  TLI->hasOptimizedCodeGen(Func);
767 
768  if (IsMathLibCall) {
769  // TODO: Ideally, we should not use clang-specific language here,
770  // but it's hard to provide meaningful yet generic advice.
771  // Also, should this be guarded by allowExtraAnalysis() and/or be part
772  // of the returned info from isFunctionVectorizable()?
774  "Found a non-intrinsic callsite",
775  "library call cannot be vectorized. "
776  "Try compiling with -fno-math-errno, -ffast-math, "
777  "or similar flags",
778  "CantVectorizeLibcall", ORE, TheLoop, CI);
779  } else {
780  reportVectorizationFailure("Found a non-intrinsic callsite",
781  "call instruction cannot be vectorized",
782  "CantVectorizeLibcall", ORE, TheLoop, CI);
783  }
784  return false;
785  }
786 
787  // Some intrinsics have scalar arguments and should be same in order for
788  // them to be vectorized (i.e. loop invariant).
789  if (CI) {
790  auto *SE = PSE.getSE();
791  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
792  for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
793  if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
794  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
795  reportVectorizationFailure("Found unvectorizable intrinsic",
796  "intrinsic instruction cannot be vectorized",
797  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
798  return false;
799  }
800  }
801  }
802 
803  // Check that the instruction return type is vectorizable.
804  // Also, we can't vectorize extractelement instructions.
805  if ((!VectorType::isValidElementType(I.getType()) &&
806  !I.getType()->isVoidTy()) ||
807  isa<ExtractElementInst>(I)) {
808  reportVectorizationFailure("Found unvectorizable type",
809  "instruction return type cannot be vectorized",
810  "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
811  return false;
812  }
813 
814  // Check that the stored type is vectorizable.
815  if (auto *ST = dyn_cast<StoreInst>(&I)) {
816  Type *T = ST->getValueOperand()->getType();
818  reportVectorizationFailure("Store instruction cannot be vectorized",
819  "store instruction cannot be vectorized",
820  "CantVectorizeStore", ORE, TheLoop, ST);
821  return false;
822  }
823 
824  // For nontemporal stores, check that a nontemporal vector version is
825  // supported on the target.
826  if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
827  // Arbitrarily try a vector of 2 elements.
828  auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
829  assert(VecTy && "did not find vectorized version of stored type");
830  if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
832  "nontemporal store instruction cannot be vectorized",
833  "nontemporal store instruction cannot be vectorized",
834  "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
835  return false;
836  }
837  }
838 
839  } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
840  if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
841  // For nontemporal loads, check that a nontemporal vector version is
842  // supported on the target (arbitrarily try a vector of 2 elements).
843  auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
844  assert(VecTy && "did not find vectorized version of load type");
845  if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
847  "nontemporal load instruction cannot be vectorized",
848  "nontemporal load instruction cannot be vectorized",
849  "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
850  return false;
851  }
852  }
853 
854  // FP instructions can allow unsafe algebra, thus vectorizable by
855  // non-IEEE-754 compliant SIMD units.
856  // This applies to floating-point math operations and calls, not memory
857  // operations, shuffles, or casts, as they don't change precision or
858  // semantics.
859  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
860  !I.isFast()) {
861  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
862  Hints->setPotentiallyUnsafe();
863  }
864 
865  // Reduction instructions are allowed to have exit users.
866  // All other instructions must not have external users.
867  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
868  // We can safely vectorize loops where instructions within the loop are
869  // used outside the loop only if the SCEV predicates within the loop is
870  // same as outside the loop. Allowing the exit means reusing the SCEV
871  // outside the loop.
872  if (PSE.getPredicate().isAlwaysTrue()) {
873  AllowedExit.insert(&I);
874  continue;
875  }
876  reportVectorizationFailure("Value cannot be used outside the loop",
877  "value cannot be used outside the loop",
878  "ValueUsedOutsideLoop", ORE, TheLoop, &I);
879  return false;
880  }
881  } // next instr.
882  }
883 
884  if (!PrimaryInduction) {
885  if (Inductions.empty()) {
886  reportVectorizationFailure("Did not find one integer induction var",
887  "loop induction variable could not be identified",
888  "NoInductionVariable", ORE, TheLoop);
889  return false;
890  } else if (!WidestIndTy) {
891  reportVectorizationFailure("Did not find one integer induction var",
892  "integer loop induction variable could not be identified",
893  "NoIntegerInductionVariable", ORE, TheLoop);
894  return false;
895  } else {
896  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
897  }
898  }
899 
900  // For first order recurrences, we use the previous value (incoming value from
901  // the latch) to check if it dominates all users of the recurrence. Bail out
902  // if we have to sink such an instruction for another recurrence, as the
903  // dominance requirement may not hold after sinking.
904  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
905  if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
906  Instruction *V =
907  cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
908  return SinkAfter.find(V) != SinkAfter.end();
909  }))
910  return false;
911 
912  // Now we know the widest induction type, check if our found induction
913  // is the same size. If it's not, unset it here and InnerLoopVectorizer
914  // will create another.
915  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
916  PrimaryInduction = nullptr;
917 
918  return true;
919 }
920 
921 bool LoopVectorizationLegality::canVectorizeMemory() {
922  LAI = &(*GetLAA)(*TheLoop);
923  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
924  if (LAR) {
925  ORE->emit([&]() {
926  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
927  "loop not vectorized: ", *LAR);
928  });
929  }
930 
931  if (!LAI->canVectorizeMemory())
932  return false;
933 
934  // We can vectorize stores to invariant address when final reduction value is
935  // guaranteed to be stored at the end of the loop. Also, if decision to
936  // vectorize loop is made, runtime checks are added so as to make sure that
937  // invariant address won't alias with any other objects.
938  if (!LAI->getStoresToInvariantAddresses().empty()) {
939  // For each invariant address, check its last stored value is unconditional.
940  for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
942  blockNeedsPredication(SI->getParent())) {
944  "We don't allow storing to uniform addresses",
945  "write of conditional recurring variant value to a loop "
946  "invariant address could not be vectorized",
947  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
948  return false;
949  }
950  }
951 
953  // For each invariant address, check its last stored value is the result
954  // of one of our reductions.
955  //
956  // We do not check if dependence with loads exists because they are
957  // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
958  // behaviour changes we have to modify this code.
959  ScalarEvolution *SE = PSE.getSE();
960  SmallVector<StoreInst *, 4> UnhandledStores;
961  for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
963  // Earlier stores to this address are effectively deadcode.
964  // With opaque pointers it is possible for one pointer to be used with
965  // different sizes of stored values:
966  // store i32 0, ptr %x
967  // store i8 0, ptr %x
968  // The latest store doesn't complitely overwrite the first one in the
969  // example. That is why we have to make sure that types of stored
970  // values are same.
971  // TODO: Check that bitwidth of unhandled store is smaller then the
972  // one that overwrites it and add a test.
973  erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
974  return storeToSameAddress(SE, SI, I) &&
975  I->getValueOperand()->getType() ==
976  SI->getValueOperand()->getType();
977  });
978  continue;
979  }
980  UnhandledStores.push_back(SI);
981  }
982 
983  bool IsOK = UnhandledStores.empty();
984  // TODO: we should also validate against InvariantMemSets.
985  if (!IsOK) {
987  "We don't allow storing to uniform addresses",
988  "write to a loop invariant address could not "
989  "be vectorized",
990  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
991  return false;
992  }
993  }
994  }
995 
997  PSE.addPredicate(LAI->getPSE().getPredicate());
998  return true;
999 }
1000 
1002  bool EnableStrictReductions) {
1003 
1004  // First check if there is any ExactFP math or if we allow reassociations
1005  if (!Requirements->getExactFPInst() || Hints->allowReordering())
1006  return true;
1007 
1008  // If the above is false, we have ExactFPMath & do not allow reordering.
1009  // If the EnableStrictReductions flag is set, first check if we have any
1010  // Exact FP induction vars, which we cannot vectorize.
1011  if (!EnableStrictReductions ||
1012  any_of(getInductionVars(), [&](auto &Induction) -> bool {
1013  InductionDescriptor IndDesc = Induction.second;
1014  return IndDesc.getExactFPMathInst();
1015  }))
1016  return false;
1017 
1018  // We can now only vectorize if all reductions with Exact FP math also
1019  // have the isOrdered flag set, which indicates that we can move the
1020  // reduction operations in-loop.
1021  return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1022  const RecurrenceDescriptor &RdxDesc = Reduction.second;
1023  return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1024  }));
1025 }
1026 
1028  return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1029  const RecurrenceDescriptor &RdxDesc = Reduction.second;
1030  return RdxDesc.IntermediateStore == SI;
1031  });
1032 }
1033 
1035  return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1036  const RecurrenceDescriptor &RdxDesc = Reduction.second;
1037  if (!RdxDesc.IntermediateStore)
1038  return false;
1039 
1040  ScalarEvolution *SE = PSE.getSE();
1041  Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1042  return V == InvariantAddress ||
1043  SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1044  });
1045 }
1046 
1048  Value *In0 = const_cast<Value *>(V);
1049  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1050  if (!PN)
1051  return false;
1052 
1053  return Inductions.count(PN);
1054 }
1055 
1056 const InductionDescriptor *
1058  if (!isInductionPhi(Phi))
1059  return nullptr;
1060  auto &ID = getInductionVars().find(Phi)->second;
1061  if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1063  return &ID;
1064  return nullptr;
1065 }
1066 
1067 const InductionDescriptor *
1069  if (!isInductionPhi(Phi))
1070  return nullptr;
1071  auto &ID = getInductionVars().find(Phi)->second;
1072  if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
1073  return &ID;
1074  return nullptr;
1075 }
1076 
1078  const Value *V) const {
1079  auto *Inst = dyn_cast<Instruction>(V);
1080  return (Inst && InductionCastsToIgnore.count(Inst));
1081 }
1082 
1084  return isInductionPhi(V) || isCastedInductionVariable(V);
1085 }
1086 
1088  const PHINode *Phi) const {
1089  return FirstOrderRecurrences.count(Phi);
1090 }
1091 
1093  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1094 }
1095 
1096 bool LoopVectorizationLegality::blockCanBePredicated(
1099  SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
1100  for (Instruction &I : *BB) {
1101  // Check that we don't have a constant expression that can trap as operand.
1102  for (Value *Operand : I.operands()) {
1103  if (auto *C = dyn_cast<Constant>(Operand))
1104  if (C->canTrap())
1105  return false;
1106  }
1107 
1108  // We can predicate blocks with calls to assume, as long as we drop them in
1109  // case we flatten the CFG via predication.
1110  if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1111  ConditionalAssumes.insert(&I);
1112  continue;
1113  }
1114 
1115  // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1116  // TODO: there might be cases that it should block the vectorization. Let's
1117  // ignore those for now.
1118  if (isa<NoAliasScopeDeclInst>(&I))
1119  continue;
1120 
1121  // We might be able to hoist the load.
1122  if (I.mayReadFromMemory()) {
1123  auto *LI = dyn_cast<LoadInst>(&I);
1124  if (!LI)
1125  return false;
1126  if (!SafePtrs.count(LI->getPointerOperand())) {
1127  MaskedOp.insert(LI);
1128  continue;
1129  }
1130  }
1131 
1132  if (I.mayWriteToMemory()) {
1133  auto *SI = dyn_cast<StoreInst>(&I);
1134  if (!SI)
1135  return false;
1136  // Predicated store requires some form of masking:
1137  // 1) masked store HW instruction,
1138  // 2) emulation via load-blend-store (only if safe and legal to do so,
1139  // be aware on the race conditions), or
1140  // 3) element-by-element predicate check and scalar store.
1141  MaskedOp.insert(SI);
1142  continue;
1143  }
1144  if (I.mayThrow())
1145  return false;
1146  }
1147 
1148  return true;
1149 }
1150 
1151 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1152  if (!EnableIfConversion) {
1153  reportVectorizationFailure("If-conversion is disabled",
1154  "if-conversion is disabled",
1155  "IfConversionDisabled",
1156  ORE, TheLoop);
1157  return false;
1158  }
1159 
1160  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1161 
1162  // A list of pointers which are known to be dereferenceable within scope of
1163  // the loop body for each iteration of the loop which executes. That is,
1164  // the memory pointed to can be dereferenced (with the access size implied by
1165  // the value's type) unconditionally within the loop header without
1166  // introducing a new fault.
1167  SmallPtrSet<Value *, 8> SafePointers;
1168 
1169  // Collect safe addresses.
1170  for (BasicBlock *BB : TheLoop->blocks()) {
1171  if (!blockNeedsPredication(BB)) {
1172  for (Instruction &I : *BB)
1173  if (auto *Ptr = getLoadStorePointerOperand(&I))
1174  SafePointers.insert(Ptr);
1175  continue;
1176  }
1177 
1178  // For a block which requires predication, a address may be safe to access
1179  // in the loop w/o predication if we can prove dereferenceability facts
1180  // sufficient to ensure it'll never fault within the loop. For the moment,
1181  // we restrict this to loads; stores are more complicated due to
1182  // concurrency restrictions.
1183  ScalarEvolution &SE = *PSE.getSE();
1184  for (Instruction &I : *BB) {
1185  LoadInst *LI = dyn_cast<LoadInst>(&I);
1186  if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1187  isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
1188  SafePointers.insert(LI->getPointerOperand());
1189  }
1190  }
1191 
1192  // Collect the blocks that need predication.
1193  BasicBlock *Header = TheLoop->getHeader();
1194  for (BasicBlock *BB : TheLoop->blocks()) {
1195  // We don't support switch statements inside loops.
1196  if (!isa<BranchInst>(BB->getTerminator())) {
1197  reportVectorizationFailure("Loop contains a switch statement",
1198  "loop contains a switch statement",
1199  "LoopContainsSwitch", ORE, TheLoop,
1200  BB->getTerminator());
1201  return false;
1202  }
1203 
1204  // We must be able to predicate all blocks that need to be predicated.
1205  if (blockNeedsPredication(BB)) {
1206  if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1207  ConditionalAssumes)) {
1209  "Control flow cannot be substituted for a select",
1210  "control flow cannot be substituted for a select",
1211  "NoCFGForSelect", ORE, TheLoop,
1212  BB->getTerminator());
1213  return false;
1214  }
1215  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1217  "Control flow cannot be substituted for a select",
1218  "control flow cannot be substituted for a select",
1219  "NoCFGForSelect", ORE, TheLoop,
1220  BB->getTerminator());
1221  return false;
1222  }
1223  }
1224 
1225  // We can if-convert this loop.
1226  return true;
1227 }
1228 
1229 // Helper function to canVectorizeLoopNestCFG.
1230 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1231  bool UseVPlanNativePath) {
1232  assert((UseVPlanNativePath || Lp->isInnermost()) &&
1233  "VPlan-native path is not enabled.");
1234 
1235  // TODO: ORE should be improved to show more accurate information when an
1236  // outer loop can't be vectorized because a nested loop is not understood or
1237  // legal. Something like: "outer_loop_location: loop not vectorized:
1238  // (inner_loop_location) loop control flow is not understood by vectorizer".
1239 
1240  // Store the result and return it at the end instead of exiting early, in case
1241  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1242  bool Result = true;
1243  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1244 
1245  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1246  // be canonicalized.
1247  if (!Lp->getLoopPreheader()) {
1248  reportVectorizationFailure("Loop doesn't have a legal pre-header",
1249  "loop control flow is not understood by vectorizer",
1250  "CFGNotUnderstood", ORE, TheLoop);
1251  if (DoExtraAnalysis)
1252  Result = false;
1253  else
1254  return false;
1255  }
1256 
1257  // We must have a single backedge.
1258  if (Lp->getNumBackEdges() != 1) {
1259  reportVectorizationFailure("The loop must have a single backedge",
1260  "loop control flow is not understood by vectorizer",
1261  "CFGNotUnderstood", ORE, TheLoop);
1262  if (DoExtraAnalysis)
1263  Result = false;
1264  else
1265  return false;
1266  }
1267 
1268  return Result;
1269 }
1270 
1271 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1272  Loop *Lp, bool UseVPlanNativePath) {
1273  // Store the result and return it at the end instead of exiting early, in case
1274  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1275  bool Result = true;
1276  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1277  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1278  if (DoExtraAnalysis)
1279  Result = false;
1280  else
1281  return false;
1282  }
1283 
1284  // Recursively check whether the loop control flow of nested loops is
1285  // understood.
1286  for (Loop *SubLp : *Lp)
1287  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1288  if (DoExtraAnalysis)
1289  Result = false;
1290  else
1291  return false;
1292  }
1293 
1294  return Result;
1295 }
1296 
1297 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1298  // Store the result and return it at the end instead of exiting early, in case
1299  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1300  bool Result = true;
1301 
1302  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1303  // Check whether the loop-related control flow in the loop nest is expected by
1304  // vectorizer.
1305  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1306  if (DoExtraAnalysis)
1307  Result = false;
1308  else
1309  return false;
1310  }
1311 
1312  // We need to have a loop header.
1313  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1314  << '\n');
1315 
1316  // Specific checks for outer loops. We skip the remaining legal checks at this
1317  // point because they don't support outer loops.
1318  if (!TheLoop->isInnermost()) {
1319  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1320 
1321  if (!canVectorizeOuterLoop()) {
1322  reportVectorizationFailure("Unsupported outer loop",
1323  "unsupported outer loop",
1324  "UnsupportedOuterLoop",
1325  ORE, TheLoop);
1326  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1327  // outer loops.
1328  return false;
1329  }
1330 
1331  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1332  return Result;
1333  }
1334 
1335  assert(TheLoop->isInnermost() && "Inner loop expected.");
1336  // Check if we can if-convert non-single-bb loops.
1337  unsigned NumBlocks = TheLoop->getNumBlocks();
1338  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1339  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1340  if (DoExtraAnalysis)
1341  Result = false;
1342  else
1343  return false;
1344  }
1345 
1346  // Check if we can vectorize the instructions and CFG in this loop.
1347  if (!canVectorizeInstrs()) {
1348  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1349  if (DoExtraAnalysis)
1350  Result = false;
1351  else
1352  return false;
1353  }
1354 
1355  // Go over each instruction and look at memory deps.
1356  if (!canVectorizeMemory()) {
1357  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1358  if (DoExtraAnalysis)
1359  Result = false;
1360  else
1361  return false;
1362  }
1363 
1364  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1365  << (LAI->getRuntimePointerChecking()->Need
1366  ? " (with a runtime bound check)"
1367  : "")
1368  << "!\n");
1369 
1370  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1371  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1372  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1373 
1374  if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1375  reportVectorizationFailure("Too many SCEV checks needed",
1376  "Too many SCEV assumptions need to be made and checked at runtime",
1377  "TooManySCEVRunTimeChecks", ORE, TheLoop);
1378  if (DoExtraAnalysis)
1379  Result = false;
1380  else
1381  return false;
1382  }
1383 
1384  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1385  // we can vectorize, and at this point we don't have any other mem analysis
1386  // which may limit our maximum vectorization factor, so just return true with
1387  // no restrictions.
1388  return Result;
1389 }
1390 
1392 
1393  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1394 
1395  SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1396 
1397  for (auto &Reduction : getReductionVars())
1398  ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1399 
1400  // TODO: handle non-reduction outside users when tail is folded by masking.
1401  for (auto *AE : AllowedExit) {
1402  // Check that all users of allowed exit values are inside the loop or
1403  // are the live-out of a reduction.
1404  if (ReductionLiveOuts.count(AE))
1405  continue;
1406  for (User *U : AE->users()) {
1407  Instruction *UI = cast<Instruction>(U);
1408  if (TheLoop->contains(UI))
1409  continue;
1410  LLVM_DEBUG(
1411  dbgs()
1412  << "LV: Cannot fold tail by masking, loop has an outside user for "
1413  << *UI << "\n");
1414  return false;
1415  }
1416  }
1417 
1418  // The list of pointers that we can safely read and write to remains empty.
1419  SmallPtrSet<Value *, 8> SafePointers;
1420 
1422  SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1423 
1424  // Check and mark all blocks for predication, including those that ordinarily
1425  // do not need predication such as the header block.
1426  for (BasicBlock *BB : TheLoop->blocks()) {
1427  if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1428  TmpConditionalAssumes)) {
1429  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
1430  return false;
1431  }
1432  }
1433 
1434  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1435 
1436  MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1437  ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1438  TmpConditionalAssumes.end());
1439 
1440  return true;
1441 }
1442 
1443 } // namespace llvm
i
i
Definition: README.txt:29
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4671
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::LoopVectorizationLegality::getReductionVars
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Definition: LoopVectorizationLegality.h:296
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::storeToSameAddress
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
Definition: LoopVectorizationLegality.cpp:443
llvm::PredicatedScalarEvolution::getPredicate
const SCEVPredicate & getPredicate() const
Definition: ScalarEvolution.cpp:14327
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2363
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
llvm::ElementCount
Definition: TypeSize.h:404
EnableIfConversion
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:135
Loads.h
T
llvm::InductionDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Definition: IVDescriptors.h:357
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::RecurrenceDescriptor::hasExactFPMath
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Definition: IVDescriptors.h:207
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:138
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
llvm::LinearPolySize< ElementCount >::isKnownLE
static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:340
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::LoopVectorizationRequirements::addRuntimePointerChecks
void addRuntimePointerChecks(unsigned Num)
Definition: LoopVectorizationLegality.h:222
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:630
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:331
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1807
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:43
llvm::LoopVectorizeHints::SK_Unspecified
@ SK_Unspecified
Not selected.
Definition: LoopVectorizationLegality.h:116
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:101
llvm::OptimizationRemarkEmitter::allowExtraAnalysis
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Definition: OptimizationRemarkEmitter.h:98
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
VectorizeSCEVCheckThreshold
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition: IVDescriptors.h:311
llvm::LoopVectorizeHints::SK_FixedWidthOnly
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
Definition: LoopVectorizationLegality.h:118
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1186
llvm::LoopVectorizeHints::getWidth
ElementCount getWidth() const
Definition: LoopVectorizationLegality.h:138
llvm::LoopVectorizationLegality::canVectorizeFPMath
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
Definition: LoopVectorizationLegality.cpp:1001
llvm::LoopVectorizeHints::getIsVectorized
unsigned getIsVectorized() const
Definition: LoopVectorizationLegality.h:152
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:401
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:396
llvm::LoopVectorizeHints::LoopVectorizeHints
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Definition: LoopVectorizationLegality.cpp:97
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:420
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::reportVectorizationFailure
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition: LoopVectorize.cpp:984
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:260
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::isUniformLoopNest
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:377
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:97
llvm::LoopVectorizationLegality::prepareToFoldTailByMasking
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
Definition: LoopVectorizationLegality.cpp:1391
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1384
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1284
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1125
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LoopVectorizeHints::FK_Undefined
@ FK_Undefined
Not selected.
Definition: LoopVectorizationLegality.h:109
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:306
llvm::LoopAccessInfo::hasDependenceInvolvingLoopInvariantAddress
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
Definition: LoopAccessAnalysis.h:622
MaxInterleaveFactor
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition: LoopVectorizationLegality.cpp:77
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:524
llvm::InductionDescriptor::IK_PtrInduction
@ IK_PtrInduction
Pointer induction var. Step = C / sizeof(elem).
Definition: IVDescriptors.h:312
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2334
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:906
llvm::LoopVectorizationLegality::isInvariantStoreOfReduction
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
Definition: LoopVectorizationLegality.cpp:1027
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::isUniformLoop
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:332
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition: IVDescriptors.h:210
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::LoopVectorizeHints::vectorizeAnalysisPassName
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Definition: LoopVectorizationLegality.cpp:231
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopVectorizationRequirements::addExactFPMathInst
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
Definition: LoopVectorizationLegality.h:217
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
llvm::LoopVectorizationLegality::blockNeedsPredication
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopVectorizationLegality.cpp:1092
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::LoopVectorizationLegality::getPointerInductionDescriptor
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
Definition: LoopVectorizationLegality.cpp:1068
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2836
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition: TargetTransformInfo.cpp:1113
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:294
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::LoopVectorizeHints::getForce
enum ForceKind getForce() const
Definition: LoopVectorizationLegality.h:154
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:355
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:926
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2743
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
LoopInfo.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4402
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1278
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:675
llvm::canIfConvertPHINodes
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
Definition: LoopVectorizationLegality.cpp:393
VectorUtils.h
llvm::cl::opt< bool >
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::LoopVectorizeHints::SK_PreferScalable
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
Definition: LoopVectorizationLegality.h:122
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:685
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:477
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::HintsAllowReordering
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:408
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::LoopVectorizeHints::FK_Enabled
@ FK_Enabled
Forcing enabled.
Definition: LoopVectorizationLegality.h:111
llvm::LoopVectorizeHints::getInterleave
unsigned getInterleave() const
Definition: LoopVectorizationLegality.h:143
llvm::LoopVectorizeHints::allowVectorization
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
Definition: LoopVectorizationLegality.cpp:172
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< const Value *, Value * >
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:573
I
#define I(x, y, z)
Definition: MD5.cpp:58
ForceScalableVectorization
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:38
llvm::LoopVectorizationLegality::isInvariantAddressOfReduction
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
Definition: LoopVectorizationLegality.cpp:1034
llvm::LoopAccessInfo::getReport
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
Definition: LoopAccessAnalysis.h:597
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:497
llvm::LoopAccessInfo::getStoresToInvariantAddresses
const ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
Definition: LoopAccessAnalysis.h:627
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:215
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:14287
llvm::LoopVectorizationLegality::isCastedInductionVariable
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Definition: LoopVectorizationLegality.cpp:1077
llvm::LoopVectorizationLegality::isInductionVariable
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:1083
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:566
llvm::SCEVPredicate::isAlwaysTrue
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
llvm::InductionDescriptor::IK_FpInduction
@ IK_FpInduction
Floating point induction variable.
Definition: IVDescriptors.h:313
llvm::isTLIScalarize
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Definition: LoopVectorizationLegality.cpp:638
llvm::LoopVectorizationRequirements::getExactFPInst
Instruction * getExactFPInst()
Definition: LoopVectorizationLegality.h:224
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::isDereferenceableAndAlignedInLoop
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:266
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::LoopVectorizationLegality::isUniform
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
Definition: LoopVectorizationLegality.cpp:479
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
LV_NAME
#define LV_NAME
Definition: LoopVectorizationLegality.cpp:33
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1598
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::RecurrenceDescriptor::IntermediateStore
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
Definition: IVDescriptors.h:276
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::LoopVectorizationLegality::isConsecutivePtr
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
Definition: LoopVectorizationLegality.cpp:462
llvm::SCEVPredicate::getComplexity
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
Definition: ScalarEvolution.h:238
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::LoopVectorizeHints::ScalableForceKind
ScalableForceKind
Definition: LoopVectorizationLegality.h:114
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:146
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:660
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LoopVectorizeHints::emitRemarkWithHints
void emitRemarkWithHints() const
Dumps all the hint information.
Definition: LoopVectorizationLegality.cpp:205
llvm::convertPointerToIntegerType
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
Definition: LoopVectorizationLegality.cpp:403
llvm::LoopVectorizationLegality::getIntOrFpInductionDescriptor
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
Definition: LoopVectorizationLegality.cpp:1057
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:781
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:133
LoopVectorize.h
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:80
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:181
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1504
PragmaVectorizeSCEVCheckThreshold
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
llvm::LoopInfoBase::isLoopHeader
bool isLoopHeader(const BlockT *BB) const
Definition: LoopInfo.h:999
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:143
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::LoopVectorizationLegality::isFirstOrderRecurrence
bool isFirstOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a first-order recurrence in this loop.
Definition: LoopVectorizationLegality.cpp:1087
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:524
llvm::VectorizationFactor
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition: LoopVectorizationPlanner.h:185
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:500
llvm::RecurrenceDescriptor::isFirstOrderRecurrence
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
Definition: IVDescriptors.cpp:938
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LinearPolySize< ElementCount >::getScalable
static ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
LoopVectorizationLegality.h
llvm::LoopAccessInfo::getNumRuntimePointerChecks
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
Definition: LoopAccessAnalysis.h:579
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:14316
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:266
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:821
llvm::UnivariateLinearPolyBase::isZero
bool isZero() const
Definition: TypeSize.h:229
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopVectorizationLegality.cpp:34
llvm::LoopVectorizationLegality::isInductionPhi
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:1047
llvm::StoreInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:389
llvm::LoopVectorizeHints::setAlreadyVectorized
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
Definition: LoopVectorizationLegality.cpp:153
llvm::VectorizerParams
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
Definition: LoopAccessAnalysis.h:36
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2651
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::LoopVectorizeHints::allowReordering
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
Definition: LoopVectorizationLegality.cpp:241
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl< Instruction * >
llvm::TargetLibraryInfo::getWidestVF
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
Definition: TargetLibraryInfo.h:428
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::SmallPtrSetImpl< Value * >
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5317
llvm::RecurrenceDescriptor::isOrdered
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
Definition: IVDescriptors.h:260
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:839
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
llvm::LoopVectorizationLegality::getInductionVars
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
Definition: LoopVectorizationLegality.h:299
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getWiderType
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
Definition: LoopVectorizationLegality.cpp:415
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:343
llvm::RecurrenceDescriptor::getLoopExitInstr
Instruction * getLoopExitInstr() const
Definition: IVDescriptors.h:203
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::cl::desc
Definition: CommandLine.h:405
llvm::hasOutsideLoopUser
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition: LoopVectorizationLegality.cpp:425
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:636
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2216
llvm::LoopVectorizeHints::FK_Disabled
@ FK_Disabled
Forcing disabled.
Definition: LoopVectorizationLegality.h:110
Reduction
loop Loop Strength Reduction
Definition: LoopStrengthReduce.cpp:6711
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::LoopVectorizationLegality::canVectorize
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
Definition: LoopVectorizationLegality.cpp:1297
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38