LLVM  10.0.0svn
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
19 #include "llvm/IR/IntrinsicInst.h"
20 
21 using namespace llvm;
22 
23 #define LV_NAME "loop-vectorize"
24 #define DEBUG_TYPE LV_NAME
25 
27 
28 static cl::opt<bool>
29  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
30  cl::desc("Enable if-conversion during vectorization."));
31 
33  "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
34  cl::desc("The maximum allowed number of runtime memory checks with a "
35  "vectorize(enable) pragma."));
36 
38  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
39  cl::desc("The maximum number of SCEV checks allowed."));
40 
42  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
43  cl::desc("The maximum number of SCEV checks allowed with a "
44  "vectorize(enable) pragma"));
45 
46 /// Maximum vectorization interleave count.
47 static const unsigned MaxInterleaveFactor = 16;
48 
49 namespace llvm {
50 
51 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
52  switch (Kind) {
53  case HK_WIDTH:
54  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
55  case HK_UNROLL:
56  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
57  case HK_FORCE:
58  return (Val <= 1);
59  case HK_ISVECTORIZED:
60  case HK_PREDICATE:
61  return (Val == 0 || Val == 1);
62  }
63  return false;
64 }
65 
67  bool InterleaveOnlyWhenForced,
69  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
70  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
71  Force("vectorize.enable", FK_Undefined, HK_FORCE),
72  IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
73  Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
74  ORE(ORE) {
75  // Populate values with existing loop metadata.
76  getHintsFromMetadata();
77 
78  // force-vector-interleave overrides DisableInterleaving.
81 
82  if (IsVectorized.Value != 1)
83  // If the vectorization width and interleaving count are both 1 then
84  // consider the loop to have been already vectorized because there's
85  // nothing more that we can do.
86  IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
87  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
88  << "LV: Interleaving disabled by the pass manager\n");
89 }
90 
92  LLVMContext &Context = TheLoop->getHeader()->getContext();
93 
94  MDNode *IsVectorizedMD = MDNode::get(
95  Context,
96  {MDString::get(Context, "llvm.loop.isvectorized"),
98  MDNode *LoopID = TheLoop->getLoopID();
99  MDNode *NewLoopID =
100  makePostTransformationMetadata(Context, LoopID,
101  {Twine(Prefix(), "vectorize.").str(),
102  Twine(Prefix(), "interleave.").str()},
103  {IsVectorizedMD});
104  TheLoop->setLoopID(NewLoopID);
105 
106  // Update internal cache.
107  IsVectorized.Value = 1;
108 }
109 
111  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
113  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
115  return false;
116  }
117 
118  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
119  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
121  return false;
122  }
123 
124  if (getIsVectorized() == 1) {
125  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
126  // FIXME: Add interleave.disable metadata. This will allow
127  // vectorize.disable to be used without disabling the pass and errors
128  // to differentiate between disabled vectorization and a width of 1.
129  ORE.emit([&]() {
131  "AllDisabled", L->getStartLoc(),
132  L->getHeader())
133  << "loop not vectorized: vectorization and interleaving are "
134  "explicitly disabled, or the loop has already been "
135  "vectorized";
136  });
137  return false;
138  }
139 
140  return true;
141 }
142 
144  using namespace ore;
145 
146  ORE.emit([&]() {
147  if (Force.Value == LoopVectorizeHints::FK_Disabled)
148  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
149  TheLoop->getStartLoc(),
150  TheLoop->getHeader())
151  << "loop not vectorized: vectorization is explicitly disabled";
152  else {
153  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
154  TheLoop->getStartLoc(), TheLoop->getHeader());
155  R << "loop not vectorized";
156  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
157  R << " (Force=" << NV("Force", true);
158  if (Width.Value != 0)
159  R << ", Vector Width=" << NV("VectorWidth", Width.Value);
160  if (Interleave.Value != 0)
161  R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
162  R << ")";
163  }
164  return R;
165  }
166  });
167 }
168 
170  if (getWidth() == 1)
171  return LV_NAME;
173  return LV_NAME;
175  return LV_NAME;
177 }
178 
179 void LoopVectorizeHints::getHintsFromMetadata() {
180  MDNode *LoopID = TheLoop->getLoopID();
181  if (!LoopID)
182  return;
183 
184  // First operand should refer to the loop id itself.
185  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
186  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
187 
188  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
189  const MDString *S = nullptr;
191 
192  // The expected hint is either a MDString or a MDNode with the first
193  // operand a MDString.
194  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
195  if (!MD || MD->getNumOperands() == 0)
196  continue;
197  S = dyn_cast<MDString>(MD->getOperand(0));
198  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
199  Args.push_back(MD->getOperand(i));
200  } else {
201  S = dyn_cast<MDString>(LoopID->getOperand(i));
202  assert(Args.size() == 0 && "too many arguments for MDString");
203  }
204 
205  if (!S)
206  continue;
207 
208  // Check if the hint starts with the loop metadata prefix.
209  StringRef Name = S->getString();
210  if (Args.size() == 1)
211  setHint(Name, Args[0]);
212  }
213 }
214 
215 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
216  if (!Name.startswith(Prefix()))
217  return;
218  Name = Name.substr(Prefix().size(), StringRef::npos);
219 
220  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
221  if (!C)
222  return;
223  unsigned Val = C->getZExtValue();
224 
225  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
226  for (auto H : Hints) {
227  if (Name == H->Name) {
228  if (H->validate(Val))
229  H->Value = Val;
230  else
231  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
232  break;
233  }
234  }
235 }
236 
238  Function *F, Loop *L, const LoopVectorizeHints &Hints) {
239  const char *PassName = Hints.vectorizeAnalysisPassName();
240  bool Failed = false;
241  if (UnsafeAlgebraInst && !Hints.allowReordering()) {
242  ORE.emit([&]() {
244  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
245  UnsafeAlgebraInst->getParent())
246  << "loop not vectorized: cannot prove it is safe to reorder "
247  "floating-point operations";
248  });
249  Failed = true;
250  }
251 
252  // Test if runtime memcheck thresholds are exceeded.
253  bool PragmaThresholdReached =
254  NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
255  bool ThresholdReached =
256  NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
257  if ((ThresholdReached && !Hints.allowReordering()) ||
258  PragmaThresholdReached) {
259  ORE.emit([&]() {
260  return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
261  L->getStartLoc(),
262  L->getHeader())
263  << "loop not vectorized: cannot prove it is safe to reorder "
264  "memory operations";
265  });
266  LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
267  Failed = true;
268  }
269 
270  return Failed;
271 }
272 
273 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
274 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
275 // executing the inner loop will execute the same iterations). This check is
276 // very constrained for now but it will be relaxed in the future. \p Lp is
277 // considered uniform if it meets all the following conditions:
278 // 1) it has a canonical IV (starting from 0 and with stride 1),
279 // 2) its latch terminator is a conditional branch and,
280 // 3) its latch condition is a compare instruction whose operands are the
281 // canonical IV and an OuterLp invariant.
282 // This check doesn't take into account the uniformity of other conditions not
283 // related to the loop latch because they don't affect the loop uniformity.
284 //
285 // NOTE: We decided to keep all these checks and its associated documentation
286 // together so that we can easily have a picture of the current supported loop
287 // nests. However, some of the current checks don't depend on \p OuterLp and
288 // would be redundantly executed for each \p Lp if we invoked this function for
289 // different candidate outer loops. This is not the case for now because we
290 // don't currently have the infrastructure to evaluate multiple candidate outer
291 // loops and \p OuterLp will be a fixed parameter while we only support explicit
292 // outer loop vectorization. It's also very likely that these checks go away
293 // before introducing the aforementioned infrastructure. However, if this is not
294 // the case, we should move the \p OuterLp independent checks to a separate
295 // function that is only executed once for each \p Lp.
296 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
297  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
298 
299  // If Lp is the outer loop, it's uniform by definition.
300  if (Lp == OuterLp)
301  return true;
302  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
303 
304  // 1.
306  if (!IV) {
307  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
308  return false;
309  }
310 
311  // 2.
312  BasicBlock *Latch = Lp->getLoopLatch();
313  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
314  if (!LatchBr || LatchBr->isUnconditional()) {
315  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
316  return false;
317  }
318 
319  // 3.
320  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
321  if (!LatchCmp) {
322  LLVM_DEBUG(
323  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
324  return false;
325  }
326 
327  Value *CondOp0 = LatchCmp->getOperand(0);
328  Value *CondOp1 = LatchCmp->getOperand(1);
329  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
330  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
331  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
332  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
333  return false;
334  }
335 
336  return true;
337 }
338 
339 // Return true if \p Lp and all its nested loops are uniform with regard to \p
340 // OuterLp.
341 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
342  if (!isUniformLoop(Lp, OuterLp))
343  return false;
344 
345  // Check if nested loops are uniform.
346  for (Loop *SubLp : *Lp)
347  if (!isUniformLoopNest(SubLp, OuterLp))
348  return false;
349 
350  return true;
351 }
352 
353 /// Check whether it is safe to if-convert this phi node.
354 ///
355 /// Phi nodes with constant expressions that can trap are not safe to if
356 /// convert.
358  for (PHINode &Phi : BB->phis()) {
359  for (Value *V : Phi.incoming_values())
360  if (auto *C = dyn_cast<Constant>(V))
361  if (C->canTrap())
362  return false;
363  }
364  return true;
365 }
366 
368  if (Ty->isPointerTy())
369  return DL.getIntPtrType(Ty);
370 
371  // It is possible that char's or short's overflow when we ask for the loop's
372  // trip count, work around this by changing the type size.
373  if (Ty->getScalarSizeInBits() < 32)
374  return Type::getInt32Ty(Ty->getContext());
375 
376  return Ty;
377 }
378 
379 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
380  Ty0 = convertPointerToIntegerType(DL, Ty0);
381  Ty1 = convertPointerToIntegerType(DL, Ty1);
382  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
383  return Ty0;
384  return Ty1;
385 }
386 
387 /// Check that the instruction has outside loop users and is not an
388 /// identified reduction variable.
389 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
390  SmallPtrSetImpl<Value *> &AllowedExit) {
391  // Reductions, Inductions and non-header phis are allowed to have exit users. All
392  // other instructions must not have external users.
393  if (!AllowedExit.count(Inst))
394  // Check that all of the users of the loop are inside the BB.
395  for (User *U : Inst->users()) {
396  Instruction *UI = cast<Instruction>(U);
397  // This user may be a reduction exit value.
398  if (!TheLoop->contains(UI)) {
399  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
400  return true;
401  }
402  }
403  return false;
404 }
405 
407  const ValueToValueMap &Strides =
408  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
409 
410  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
411  if (Stride == 1 || Stride == -1)
412  return Stride;
413  return 0;
414 }
415 
417  return LAI->isUniform(V);
418 }
419 
420 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
421  assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
422  // Store the result and return it at the end instead of exiting early, in case
423  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
424  bool Result = true;
425  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
426 
427  for (BasicBlock *BB : TheLoop->blocks()) {
428  // Check whether the BB terminator is a BranchInst. Any other terminator is
429  // not supported yet.
430  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
431  if (!Br) {
432  reportVectorizationFailure("Unsupported basic block terminator",
433  "loop control flow is not understood by vectorizer",
434  "CFGNotUnderstood", ORE, TheLoop);
435  if (DoExtraAnalysis)
436  Result = false;
437  else
438  return false;
439  }
440 
441  // Check whether the BranchInst is a supported one. Only unconditional
442  // branches, conditional branches with an outer loop invariant condition or
443  // backedges are supported.
444  // FIXME: We skip these checks when VPlan predication is enabled as we
445  // want to allow divergent branches. This whole check will be removed
446  // once VPlan predication is on by default.
447  if (!EnableVPlanPredication && Br && Br->isConditional() &&
448  !TheLoop->isLoopInvariant(Br->getCondition()) &&
449  !LI->isLoopHeader(Br->getSuccessor(0)) &&
450  !LI->isLoopHeader(Br->getSuccessor(1))) {
451  reportVectorizationFailure("Unsupported conditional branch",
452  "loop control flow is not understood by vectorizer",
453  "CFGNotUnderstood", ORE, TheLoop);
454  if (DoExtraAnalysis)
455  Result = false;
456  else
457  return false;
458  }
459  }
460 
461  // Check whether inner loops are uniform. At this point, we only support
462  // simple outer loops scenarios with uniform nested loops.
463  if (!isUniformLoopNest(TheLoop /*loop nest*/,
464  TheLoop /*context outer loop*/)) {
465  reportVectorizationFailure("Outer loop contains divergent loops",
466  "loop control flow is not understood by vectorizer",
467  "CFGNotUnderstood", ORE, TheLoop);
468  if (DoExtraAnalysis)
469  Result = false;
470  else
471  return false;
472  }
473 
474  // Check whether we are able to set up outer loop induction.
475  if (!setupOuterLoopInductions()) {
476  reportVectorizationFailure("Unsupported outer loop Phi(s)",
477  "Unsupported outer loop Phi(s)",
478  "UnsupportedPhi", ORE, TheLoop);
479  if (DoExtraAnalysis)
480  Result = false;
481  else
482  return false;
483  }
484 
485  return Result;
486 }
487 
488 void LoopVectorizationLegality::addInductionPhi(
489  PHINode *Phi, const InductionDescriptor &ID,
490  SmallPtrSetImpl<Value *> &AllowedExit) {
491  Inductions[Phi] = ID;
492 
493  // In case this induction also comes with casts that we know we can ignore
494  // in the vectorized loop body, record them here. All casts could be recorded
495  // here for ignoring, but suffices to record only the first (as it is the
496  // only one that may bw used outside the cast sequence).
497  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
498  if (!Casts.empty())
499  InductionCastsToIgnore.insert(*Casts.begin());
500 
501  Type *PhiTy = Phi->getType();
502  const DataLayout &DL = Phi->getModule()->getDataLayout();
503 
504  // Get the widest type.
505  if (!PhiTy->isFloatingPointTy()) {
506  if (!WidestIndTy)
507  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
508  else
509  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
510  }
511 
512  // Int inductions are special because we only allow one IV.
515  isa<Constant>(ID.getStartValue()) &&
516  cast<Constant>(ID.getStartValue())->isNullValue()) {
517 
518  // Use the phi node with the widest type as induction. Use the last
519  // one if there are multiple (no good reason for doing this other
520  // than it is expedient). We've checked that it begins at zero and
521  // steps by one, so this is a canonical induction variable.
522  if (!PrimaryInduction || PhiTy == WidestIndTy)
523  PrimaryInduction = Phi;
524  }
525 
526  // Both the PHI node itself, and the "post-increment" value feeding
527  // back into the PHI node may have external users.
528  // We can allow those uses, except if the SCEVs we have for them rely
529  // on predicates that only hold within the loop, since allowing the exit
530  // currently means re-using this SCEV outside the loop (see PR33706 for more
531  // details).
532  if (PSE.getUnionPredicate().isAlwaysTrue()) {
533  AllowedExit.insert(Phi);
534  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
535  }
536 
537  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
538 }
539 
540 bool LoopVectorizationLegality::setupOuterLoopInductions() {
541  BasicBlock *Header = TheLoop->getHeader();
542 
543  // Returns true if a given Phi is a supported induction.
544  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
546  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
548  addInductionPhi(&Phi, ID, AllowedExit);
549  return true;
550  } else {
551  // Bail out for any Phi in the outer loop header that is not a supported
552  // induction.
553  LLVM_DEBUG(
554  dbgs()
555  << "LV: Found unsupported PHI for outer loop vectorization.\n");
556  return false;
557  }
558  };
559 
560  if (llvm::all_of(Header->phis(), isSupportedPhi))
561  return true;
562  else
563  return false;
564 }
565 
566 bool LoopVectorizationLegality::canVectorizeInstrs() {
567  BasicBlock *Header = TheLoop->getHeader();
568 
569  // Look for the attribute signaling the absence of NaNs.
570  Function &F = *Header->getParent();
571  HasFunNoNaNAttr =
572  F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
573 
574  // For each block in the loop.
575  for (BasicBlock *BB : TheLoop->blocks()) {
576  // Scan the instructions in the block and look for hazards.
577  for (Instruction &I : *BB) {
578  if (auto *Phi = dyn_cast<PHINode>(&I)) {
579  Type *PhiTy = Phi->getType();
580  // Check that this PHI type is allowed.
581  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
582  !PhiTy->isPointerTy()) {
583  reportVectorizationFailure("Found a non-int non-pointer PHI",
584  "loop control flow is not understood by vectorizer",
585  "CFGNotUnderstood", ORE, TheLoop);
586  return false;
587  }
588 
589  // If this PHINode is not in the header block, then we know that we
590  // can convert it to select during if-conversion. No need to check if
591  // the PHIs in this block are induction or reduction variables.
592  if (BB != Header) {
593  // Non-header phi nodes that have outside uses can be vectorized. Add
594  // them to the list of allowed exits.
595  // Unsafe cyclic dependencies with header phis are identified during
596  // legalization for reduction, induction and first order
597  // recurrences.
598  continue;
599  }
600 
601  // We only allow if-converted PHIs with exactly two incoming values.
602  if (Phi->getNumIncomingValues() != 2) {
603  reportVectorizationFailure("Found an invalid PHI",
604  "loop control flow is not understood by vectorizer",
605  "CFGNotUnderstood", ORE, TheLoop, Phi);
606  return false;
607  }
608 
609  RecurrenceDescriptor RedDes;
610  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
611  DT)) {
612  if (RedDes.hasUnsafeAlgebra())
613  Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
614  AllowedExit.insert(RedDes.getLoopExitInstr());
615  Reductions[Phi] = RedDes;
616  continue;
617  }
618 
619  // TODO: Instead of recording the AllowedExit, it would be good to record the
620  // complementary set: NotAllowedExit. These include (but may not be
621  // limited to):
622  // 1. Reduction phis as they represent the one-before-last value, which
623  // is not available when vectorized
624  // 2. Induction phis and increment when SCEV predicates cannot be used
625  // outside the loop - see addInductionPhi
626  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
627  // outside the loop - see call to hasOutsideLoopUser in the non-phi
628  // handling below
629  // 4. FirstOrderRecurrence phis that can possibly be handled by
630  // extraction.
631  // By recording these, we can then reason about ways to vectorize each
632  // of these NotAllowedExit.
634  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
635  addInductionPhi(Phi, ID, AllowedExit);
636  if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
637  Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
638  continue;
639  }
640 
642  SinkAfter, DT)) {
643  FirstOrderRecurrences.insert(Phi);
644  continue;
645  }
646 
647  // As a last resort, coerce the PHI to a AddRec expression
648  // and re-try classifying it a an induction PHI.
649  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
650  addInductionPhi(Phi, ID, AllowedExit);
651  continue;
652  }
653 
654  reportVectorizationFailure("Found an unidentified PHI",
655  "value that could not be identified as "
656  "reduction is used outside the loop",
657  "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
658  return false;
659  } // end of PHI handling
660 
661  // We handle calls that:
662  // * Are debug info intrinsics.
663  // * Have a mapping to an IR intrinsic.
664  // * Have a vector version available.
665  auto *CI = dyn_cast<CallInst>(&I);
666  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
667  !isa<DbgInfoIntrinsic>(CI) &&
668  !(CI->getCalledFunction() && TLI &&
669  TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
670  // If the call is a recognized math libary call, it is likely that
671  // we can vectorize it given loosened floating-point constraints.
672  LibFunc Func;
673  bool IsMathLibCall =
674  TLI && CI->getCalledFunction() &&
675  CI->getType()->isFloatingPointTy() &&
676  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
677  TLI->hasOptimizedCodeGen(Func);
678 
679  if (IsMathLibCall) {
680  // TODO: Ideally, we should not use clang-specific language here,
681  // but it's hard to provide meaningful yet generic advice.
682  // Also, should this be guarded by allowExtraAnalysis() and/or be part
683  // of the returned info from isFunctionVectorizable()?
684  reportVectorizationFailure("Found a non-intrinsic callsite",
685  "library call cannot be vectorized. "
686  "Try compiling with -fno-math-errno, -ffast-math, "
687  "or similar flags",
688  "CantVectorizeLibcall", ORE, TheLoop, CI);
689  } else {
690  reportVectorizationFailure("Found a non-intrinsic callsite",
691  "call instruction cannot be vectorized",
692  "CantVectorizeLibcall", ORE, TheLoop, CI);
693  }
694  return false;
695  }
696 
697  // Some intrinsics have scalar arguments and should be same in order for
698  // them to be vectorized (i.e. loop invariant).
699  if (CI) {
700  auto *SE = PSE.getSE();
701  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
702  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
703  if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
704  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
705  reportVectorizationFailure("Found unvectorizable intrinsic",
706  "intrinsic instruction cannot be vectorized",
707  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
708  return false;
709  }
710  }
711  }
712 
713  // Check that the instruction return type is vectorizable.
714  // Also, we can't vectorize extractelement instructions.
715  if ((!VectorType::isValidElementType(I.getType()) &&
716  !I.getType()->isVoidTy()) ||
717  isa<ExtractElementInst>(I)) {
718  reportVectorizationFailure("Found unvectorizable type",
719  "instruction return type cannot be vectorized",
720  "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
721  return false;
722  }
723 
724  // Check that the stored type is vectorizable.
725  if (auto *ST = dyn_cast<StoreInst>(&I)) {
726  Type *T = ST->getValueOperand()->getType();
728  reportVectorizationFailure("Store instruction cannot be vectorized",
729  "store instruction cannot be vectorized",
730  "CantVectorizeStore", ORE, TheLoop, ST);
731  return false;
732  }
733 
734  // For nontemporal stores, check that a nontemporal vector version is
735  // supported on the target.
736  if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
737  // Arbitrarily try a vector of 2 elements.
738  Type *VecTy = VectorType::get(T, /*NumElements=*/2);
739  assert(VecTy && "did not find vectorized version of stored type");
740  unsigned Alignment = getLoadStoreAlignment(ST);
741  if (!TTI->isLegalNTStore(VecTy, Alignment)) {
743  "nontemporal store instruction cannot be vectorized",
744  "nontemporal store instruction cannot be vectorized",
745  "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
746  return false;
747  }
748  }
749 
750  } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
751  if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
752  // For nontemporal loads, check that a nontemporal vector version is
753  // supported on the target (arbitrarily try a vector of 2 elements).
754  Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
755  assert(VecTy && "did not find vectorized version of load type");
756  unsigned Alignment = getLoadStoreAlignment(LD);
757  if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
759  "nontemporal load instruction cannot be vectorized",
760  "nontemporal load instruction cannot be vectorized",
761  "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
762  return false;
763  }
764  }
765 
766  // FP instructions can allow unsafe algebra, thus vectorizable by
767  // non-IEEE-754 compliant SIMD units.
768  // This applies to floating-point math operations and calls, not memory
769  // operations, shuffles, or casts, as they don't change precision or
770  // semantics.
771  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
772  !I.isFast()) {
773  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
774  Hints->setPotentiallyUnsafe();
775  }
776 
777  // Reduction instructions are allowed to have exit users.
778  // All other instructions must not have external users.
779  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
780  // We can safely vectorize loops where instructions within the loop are
781  // used outside the loop only if the SCEV predicates within the loop is
782  // same as outside the loop. Allowing the exit means reusing the SCEV
783  // outside the loop.
784  if (PSE.getUnionPredicate().isAlwaysTrue()) {
785  AllowedExit.insert(&I);
786  continue;
787  }
788  reportVectorizationFailure("Value cannot be used outside the loop",
789  "value cannot be used outside the loop",
790  "ValueUsedOutsideLoop", ORE, TheLoop, &I);
791  return false;
792  }
793  } // next instr.
794  }
795 
796  if (!PrimaryInduction) {
797  if (Inductions.empty()) {
798  reportVectorizationFailure("Did not find one integer induction var",
799  "loop induction variable could not be identified",
800  "NoInductionVariable", ORE, TheLoop);
801  return false;
802  } else if (!WidestIndTy) {
803  reportVectorizationFailure("Did not find one integer induction var",
804  "integer loop induction variable could not be identified",
805  "NoIntegerInductionVariable", ORE, TheLoop);
806  return false;
807  } else {
808  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
809  }
810  }
811 
812  // Now we know the widest induction type, check if our found induction
813  // is the same size. If it's not, unset it here and InnerLoopVectorizer
814  // will create another.
815  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
816  PrimaryInduction = nullptr;
817 
818  return true;
819 }
820 
821 bool LoopVectorizationLegality::canVectorizeMemory() {
822  LAI = &(*GetLAA)(*TheLoop);
823  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
824  if (LAR) {
825  ORE->emit([&]() {
826  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
827  "loop not vectorized: ", *LAR);
828  });
829  }
830  if (!LAI->canVectorizeMemory())
831  return false;
832 
833  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
834  reportVectorizationFailure("Stores to a uniform address",
835  "write to a loop invariant address could not be vectorized",
836  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
837  return false;
838  }
839  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
840  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
841 
842  return true;
843 }
844 
846  Value *In0 = const_cast<Value *>(V);
847  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
848  if (!PN)
849  return false;
850 
851  return Inductions.count(PN);
852 }
853 
855  auto *Inst = dyn_cast<Instruction>(V);
856  return (Inst && InductionCastsToIgnore.count(Inst));
857 }
858 
860  return isInductionPhi(V) || isCastedInductionVariable(V);
861 }
862 
864  return FirstOrderRecurrences.count(Phi);
865 }
866 
868  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
869 }
870 
871 bool LoopVectorizationLegality::blockCanBePredicated(
872  BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
873  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
874 
875  for (Instruction &I : *BB) {
876  // Check that we don't have a constant expression that can trap as operand.
877  for (Value *Operand : I.operands()) {
878  if (auto *C = dyn_cast<Constant>(Operand))
879  if (C->canTrap())
880  return false;
881  }
882  // We might be able to hoist the load.
883  if (I.mayReadFromMemory()) {
884  auto *LI = dyn_cast<LoadInst>(&I);
885  if (!LI)
886  return false;
887  if (!SafePtrs.count(LI->getPointerOperand())) {
888  // !llvm.mem.parallel_loop_access implies if-conversion safety.
889  // Otherwise, record that the load needs (real or emulated) masking
890  // and let the cost model decide.
891  if (!IsAnnotatedParallel || PreserveGuards)
892  MaskedOp.insert(LI);
893  continue;
894  }
895  }
896 
897  if (I.mayWriteToMemory()) {
898  auto *SI = dyn_cast<StoreInst>(&I);
899  if (!SI)
900  return false;
901  // Predicated store requires some form of masking:
902  // 1) masked store HW instruction,
903  // 2) emulation via load-blend-store (only if safe and legal to do so,
904  // be aware on the race conditions), or
905  // 3) element-by-element predicate check and scalar store.
906  MaskedOp.insert(SI);
907  continue;
908  }
909  if (I.mayThrow())
910  return false;
911  }
912 
913  return true;
914 }
915 
916 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
917  if (!EnableIfConversion) {
918  reportVectorizationFailure("If-conversion is disabled",
919  "if-conversion is disabled",
920  "IfConversionDisabled",
921  ORE, TheLoop);
922  return false;
923  }
924 
925  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
926 
927  // A list of pointers that we can safely read and write to.
928  SmallPtrSet<Value *, 8> SafePointes;
929 
930  // Collect safe addresses.
931  for (BasicBlock *BB : TheLoop->blocks()) {
932  if (blockNeedsPredication(BB))
933  continue;
934 
935  for (Instruction &I : *BB)
936  if (auto *Ptr = getLoadStorePointerOperand(&I))
937  SafePointes.insert(Ptr);
938  }
939 
940  // Collect the blocks that need predication.
941  BasicBlock *Header = TheLoop->getHeader();
942  for (BasicBlock *BB : TheLoop->blocks()) {
943  // We don't support switch statements inside loops.
944  if (!isa<BranchInst>(BB->getTerminator())) {
945  reportVectorizationFailure("Loop contains a switch statement",
946  "loop contains a switch statement",
947  "LoopContainsSwitch", ORE, TheLoop,
948  BB->getTerminator());
949  return false;
950  }
951 
952  // We must be able to predicate all blocks that need to be predicated.
953  if (blockNeedsPredication(BB)) {
954  if (!blockCanBePredicated(BB, SafePointes)) {
956  "Control flow cannot be substituted for a select",
957  "control flow cannot be substituted for a select",
958  "NoCFGForSelect", ORE, TheLoop,
959  BB->getTerminator());
960  return false;
961  }
962  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
964  "Control flow cannot be substituted for a select",
965  "control flow cannot be substituted for a select",
966  "NoCFGForSelect", ORE, TheLoop,
967  BB->getTerminator());
968  return false;
969  }
970  }
971 
972  // We can if-convert this loop.
973  return true;
974 }
975 
976 // Helper function to canVectorizeLoopNestCFG.
977 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
978  bool UseVPlanNativePath) {
979  assert((UseVPlanNativePath || Lp->empty()) &&
980  "VPlan-native path is not enabled.");
981 
982  // TODO: ORE should be improved to show more accurate information when an
983  // outer loop can't be vectorized because a nested loop is not understood or
984  // legal. Something like: "outer_loop_location: loop not vectorized:
985  // (inner_loop_location) loop control flow is not understood by vectorizer".
986 
987  // Store the result and return it at the end instead of exiting early, in case
988  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
989  bool Result = true;
990  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
991 
992  // We must have a loop in canonical form. Loops with indirectbr in them cannot
993  // be canonicalized.
994  if (!Lp->getLoopPreheader()) {
995  reportVectorizationFailure("Loop doesn't have a legal pre-header",
996  "loop control flow is not understood by vectorizer",
997  "CFGNotUnderstood", ORE, TheLoop);
998  if (DoExtraAnalysis)
999  Result = false;
1000  else
1001  return false;
1002  }
1003 
1004  // We must have a single backedge.
1005  if (Lp->getNumBackEdges() != 1) {
1006  reportVectorizationFailure("The loop must have a single backedge",
1007  "loop control flow is not understood by vectorizer",
1008  "CFGNotUnderstood", ORE, TheLoop);
1009  if (DoExtraAnalysis)
1010  Result = false;
1011  else
1012  return false;
1013  }
1014 
1015  // We must have a single exiting block.
1016  if (!Lp->getExitingBlock()) {
1017  reportVectorizationFailure("The loop must have an exiting block",
1018  "loop control flow is not understood by vectorizer",
1019  "CFGNotUnderstood", ORE, TheLoop);
1020  if (DoExtraAnalysis)
1021  Result = false;
1022  else
1023  return false;
1024  }
1025 
1026  // We only handle bottom-tested loops, i.e. loop in which the condition is
1027  // checked at the end of each iteration. With that we can assume that all
1028  // instructions in the loop are executed the same number of times.
1029  if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1030  reportVectorizationFailure("The exiting block is not the loop latch",
1031  "loop control flow is not understood by vectorizer",
1032  "CFGNotUnderstood", ORE, TheLoop);
1033  if (DoExtraAnalysis)
1034  Result = false;
1035  else
1036  return false;
1037  }
1038 
1039  return Result;
1040 }
1041 
1042 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1043  Loop *Lp, bool UseVPlanNativePath) {
1044  // Store the result and return it at the end instead of exiting early, in case
1045  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1046  bool Result = true;
1047  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1048  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1049  if (DoExtraAnalysis)
1050  Result = false;
1051  else
1052  return false;
1053  }
1054 
1055  // Recursively check whether the loop control flow of nested loops is
1056  // understood.
1057  for (Loop *SubLp : *Lp)
1058  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1059  if (DoExtraAnalysis)
1060  Result = false;
1061  else
1062  return false;
1063  }
1064 
1065  return Result;
1066 }
1067 
1068 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1069  // Store the result and return it at the end instead of exiting early, in case
1070  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1071  bool Result = true;
1072 
1073  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1074  // Check whether the loop-related control flow in the loop nest is expected by
1075  // vectorizer.
1076  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1077  if (DoExtraAnalysis)
1078  Result = false;
1079  else
1080  return false;
1081  }
1082 
1083  // We need to have a loop header.
1084  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1085  << '\n');
1086 
1087  // Specific checks for outer loops. We skip the remaining legal checks at this
1088  // point because they don't support outer loops.
1089  if (!TheLoop->empty()) {
1090  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1091 
1092  if (!canVectorizeOuterLoop()) {
1093  reportVectorizationFailure("Unsupported outer loop",
1094  "unsupported outer loop",
1095  "UnsupportedOuterLoop",
1096  ORE, TheLoop);
1097  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1098  // outer loops.
1099  return false;
1100  }
1101 
1102  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1103  return Result;
1104  }
1105 
1106  assert(TheLoop->empty() && "Inner loop expected.");
1107  // Check if we can if-convert non-single-bb loops.
1108  unsigned NumBlocks = TheLoop->getNumBlocks();
1109  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1110  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1111  if (DoExtraAnalysis)
1112  Result = false;
1113  else
1114  return false;
1115  }
1116 
1117  // Check if we can vectorize the instructions and CFG in this loop.
1118  if (!canVectorizeInstrs()) {
1119  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1120  if (DoExtraAnalysis)
1121  Result = false;
1122  else
1123  return false;
1124  }
1125 
1126  // Go over each instruction and look at memory deps.
1127  if (!canVectorizeMemory()) {
1128  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1129  if (DoExtraAnalysis)
1130  Result = false;
1131  else
1132  return false;
1133  }
1134 
1135  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1136  << (LAI->getRuntimePointerChecking()->Need
1137  ? " (with a runtime bound check)"
1138  : "")
1139  << "!\n");
1140 
1141  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1142  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1143  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1144 
1145  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1146  reportVectorizationFailure("Too many SCEV checks needed",
1147  "Too many SCEV assumptions need to be made and checked at runtime",
1148  "TooManySCEVRunTimeChecks", ORE, TheLoop);
1149  if (DoExtraAnalysis)
1150  Result = false;
1151  else
1152  return false;
1153  }
1154 
1155  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1156  // we can vectorize, and at this point we don't have any other mem analysis
1157  // which may limit our maximum vectorization factor, so just return true with
1158  // no restrictions.
1159  return Result;
1160 }
1161 
1163 
1164  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1165 
1166  if (!PrimaryInduction) {
1168  "No primary induction, cannot fold tail by masking",
1169  "Missing a primary induction variable in the loop, which is "
1170  "needed in order to fold tail by masking as required.",
1171  "NoPrimaryInduction", ORE, TheLoop);
1172  return false;
1173  }
1174 
1175  // TODO: handle reductions when tail is folded by masking.
1176  if (!Reductions.empty()) {
1178  "Loop has reductions, cannot fold tail by masking",
1179  "Cannot fold tail by masking in the presence of reductions.",
1180  "ReductionFoldingTailByMasking", ORE, TheLoop);
1181  return false;
1182  }
1183 
1184  // TODO: handle outside users when tail is folded by masking.
1185  for (auto *AE : AllowedExit) {
1186  // Check that all users of allowed exit values are inside the loop.
1187  for (User *U : AE->users()) {
1188  Instruction *UI = cast<Instruction>(U);
1189  if (TheLoop->contains(UI))
1190  continue;
1192  "Cannot fold tail by masking, loop has an outside user for",
1193  "Cannot fold tail by masking in the presence of live outs.",
1194  "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1195  return false;
1196  }
1197  }
1198 
1199  // The list of pointers that we can safely read and write to remains empty.
1200  SmallPtrSet<Value *, 8> SafePointers;
1201 
1202  // Check and mark all blocks for predication, including those that ordinarily
1203  // do not need predication such as the header block.
1204  for (BasicBlock *BB : TheLoop->blocks()) {
1205  if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1207  "Cannot fold tail by masking as required",
1208  "control flow cannot be substituted for a select",
1209  "NoCFGForSelect", ORE, TheLoop,
1210  BB->getTerminator());
1211  return false;
1212  }
1213  }
1214 
1215  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1216  return true;
1217 }
1218 
1219 } // namespace llvm
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
uint64_t CallInst * C
#define LV_NAME
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:211
LLVMContext & Context
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Instruction * getUnsafeAlgebraInst()
Returns first unsafe algebra instruction in the PHI node&#39;s use-chain.
bool isCastedInductionVariable(const Value *V)
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction *> *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:453
ConstantInt * getConstIntStepValue() const
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode *> AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1006
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class...
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
InductionKind getKind() const
Metadata node.
Definition: Metadata.h:863
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
An instruction for reading from memory.
Definition: Instructions.h:167
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
cl::opt< bool > EnableVPlanPredication
Value * getStartValue() const
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
#define DEBUG_TYPE
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
bool hasUnsafeAlgebra()
Returns true if the recurrence has unsafe algebra which requires a relaxed floating-point model...
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:233
This file defines the LoopVectorizationLegality class.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
static const unsigned MaxVectorWidth
Maximum SIMD width.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:578
BlockT * getHeader() const
Definition: LoopInfo.h:105
int isConsecutivePtr(Value *Ptr)
Check if this pointer is consecutive when vectorizing.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInductionVariable(const Value *V)
Returns True if V can be considered as an induction variable in this loop.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:509
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:623
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< unsigned > PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
bool blockNeedsPredication(BasicBlock *BB)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Diagnostic information for optimization analysis remarks related to pointer aliasing.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:409
StringRef getString() const
Definition: Metadata.cpp:463
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:766
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Integer induction variable. Step = C.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value *> &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
Instruction * getUnsafeAlgebraInst()
Returns induction operator that does not have "fast-math" property and requires FP unsafe mode...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints)
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:607
size_t size() const
Definition: SmallVector.h:52
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, DenseMap< Instruction *, Instruction *> &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:62
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
Diagnostic information for optimization analysis remarks related to floating-point non-commutativity...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
A struct for saving information about induction variables.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:147
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
DenseMap< const Value *, Value * > ValueToValueMap
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool isFirstOrderRecurrence(const PHINode *Phi)
Returns True if Phi is a first-order recurrence in this loop.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:91
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:146
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
bool isInductionPhi(const Value *V)
Returns True if V is a Phi node of an induction variable in this loop.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:419
bool hasUnsafeAlgebra()
Returns true if the induction type is FP and the binary operator does not have the "fast-math" proper...
const SmallVectorImpl< Instruction * > & getCastInsts() const
Returns a reference to the type cast instructions in the induction update chain, that are redundant w...
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:485
static const size_t npos
Definition: StringRef.h:50
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Instruction * getLoopExitInstr()
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:223
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:324
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
bool empty() const
Definition: LoopInfo.h:151
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
A single uniqued string.
Definition: Metadata.h:603
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
Root of the metadata hierarchy.
Definition: Metadata.h:57
The optimization diagnostic interface.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
void emitRemarkWithHints() const
Dumps all the hint information.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)