LLVM  10.0.0svn
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
18 #include "llvm/Analysis/Loads.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 
23 using namespace llvm;
24 
25 #define LV_NAME "loop-vectorize"
26 #define DEBUG_TYPE LV_NAME
27 
29 
30 static cl::opt<bool>
31  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
32  cl::desc("Enable if-conversion during vectorization."));
33 
35  "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
36  cl::desc("The maximum allowed number of runtime memory checks with a "
37  "vectorize(enable) pragma."));
38 
40  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
41  cl::desc("The maximum number of SCEV checks allowed."));
42 
44  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
45  cl::desc("The maximum number of SCEV checks allowed with a "
46  "vectorize(enable) pragma"));
47 
48 /// Maximum vectorization interleave count.
49 static const unsigned MaxInterleaveFactor = 16;
50 
51 namespace llvm {
52 
53 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
54  switch (Kind) {
55  case HK_WIDTH:
56  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
57  case HK_UNROLL:
58  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
59  case HK_FORCE:
60  return (Val <= 1);
61  case HK_ISVECTORIZED:
62  case HK_PREDICATE:
63  return (Val == 0 || Val == 1);
64  }
65  return false;
66 }
67 
69  bool InterleaveOnlyWhenForced,
71  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
72  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
73  Force("vectorize.enable", FK_Undefined, HK_FORCE),
74  IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
75  Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
76  ORE(ORE) {
77  // Populate values with existing loop metadata.
78  getHintsFromMetadata();
79 
80  // force-vector-interleave overrides DisableInterleaving.
83 
84  if (IsVectorized.Value != 1)
85  // If the vectorization width and interleaving count are both 1 then
86  // consider the loop to have been already vectorized because there's
87  // nothing more that we can do.
88  IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
89  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
90  << "LV: Interleaving disabled by the pass manager\n");
91 }
92 
94  LLVMContext &Context = TheLoop->getHeader()->getContext();
95 
96  MDNode *IsVectorizedMD = MDNode::get(
97  Context,
98  {MDString::get(Context, "llvm.loop.isvectorized"),
100  MDNode *LoopID = TheLoop->getLoopID();
101  MDNode *NewLoopID =
102  makePostTransformationMetadata(Context, LoopID,
103  {Twine(Prefix(), "vectorize.").str(),
104  Twine(Prefix(), "interleave.").str()},
105  {IsVectorizedMD});
106  TheLoop->setLoopID(NewLoopID);
107 
108  // Update internal cache.
109  IsVectorized.Value = 1;
110 }
111 
113  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
115  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
117  return false;
118  }
119 
120  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
121  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
123  return false;
124  }
125 
126  if (getIsVectorized() == 1) {
127  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
128  // FIXME: Add interleave.disable metadata. This will allow
129  // vectorize.disable to be used without disabling the pass and errors
130  // to differentiate between disabled vectorization and a width of 1.
131  ORE.emit([&]() {
133  "AllDisabled", L->getStartLoc(),
134  L->getHeader())
135  << "loop not vectorized: vectorization and interleaving are "
136  "explicitly disabled, or the loop has already been "
137  "vectorized";
138  });
139  return false;
140  }
141 
142  return true;
143 }
144 
146  using namespace ore;
147 
148  ORE.emit([&]() {
149  if (Force.Value == LoopVectorizeHints::FK_Disabled)
150  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
151  TheLoop->getStartLoc(),
152  TheLoop->getHeader())
153  << "loop not vectorized: vectorization is explicitly disabled";
154  else {
155  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
156  TheLoop->getStartLoc(), TheLoop->getHeader());
157  R << "loop not vectorized";
158  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
159  R << " (Force=" << NV("Force", true);
160  if (Width.Value != 0)
161  R << ", Vector Width=" << NV("VectorWidth", Width.Value);
162  if (Interleave.Value != 0)
163  R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
164  R << ")";
165  }
166  return R;
167  }
168  });
169 }
170 
172  if (getWidth() == 1)
173  return LV_NAME;
175  return LV_NAME;
177  return LV_NAME;
179 }
180 
181 void LoopVectorizeHints::getHintsFromMetadata() {
182  MDNode *LoopID = TheLoop->getLoopID();
183  if (!LoopID)
184  return;
185 
186  // First operand should refer to the loop id itself.
187  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
188  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
189 
190  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
191  const MDString *S = nullptr;
193 
194  // The expected hint is either a MDString or a MDNode with the first
195  // operand a MDString.
196  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
197  if (!MD || MD->getNumOperands() == 0)
198  continue;
199  S = dyn_cast<MDString>(MD->getOperand(0));
200  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
201  Args.push_back(MD->getOperand(i));
202  } else {
203  S = dyn_cast<MDString>(LoopID->getOperand(i));
204  assert(Args.size() == 0 && "too many arguments for MDString");
205  }
206 
207  if (!S)
208  continue;
209 
210  // Check if the hint starts with the loop metadata prefix.
211  StringRef Name = S->getString();
212  if (Args.size() == 1)
213  setHint(Name, Args[0]);
214  }
215 }
216 
217 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
218  if (!Name.startswith(Prefix()))
219  return;
220  Name = Name.substr(Prefix().size(), StringRef::npos);
221 
222  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
223  if (!C)
224  return;
225  unsigned Val = C->getZExtValue();
226 
227  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
228  for (auto H : Hints) {
229  if (Name == H->Name) {
230  if (H->validate(Val))
231  H->Value = Val;
232  else
233  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
234  break;
235  }
236  }
237 }
238 
240  Function *F, Loop *L, const LoopVectorizeHints &Hints) {
241  const char *PassName = Hints.vectorizeAnalysisPassName();
242  bool Failed = false;
243  if (UnsafeAlgebraInst && !Hints.allowReordering()) {
244  ORE.emit([&]() {
246  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
247  UnsafeAlgebraInst->getParent())
248  << "loop not vectorized: cannot prove it is safe to reorder "
249  "floating-point operations";
250  });
251  Failed = true;
252  }
253 
254  // Test if runtime memcheck thresholds are exceeded.
255  bool PragmaThresholdReached =
256  NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
257  bool ThresholdReached =
258  NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
259  if ((ThresholdReached && !Hints.allowReordering()) ||
260  PragmaThresholdReached) {
261  ORE.emit([&]() {
262  return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
263  L->getStartLoc(),
264  L->getHeader())
265  << "loop not vectorized: cannot prove it is safe to reorder "
266  "memory operations";
267  });
268  LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
269  Failed = true;
270  }
271 
272  return Failed;
273 }
274 
275 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
276 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
277 // executing the inner loop will execute the same iterations). This check is
278 // very constrained for now but it will be relaxed in the future. \p Lp is
279 // considered uniform if it meets all the following conditions:
280 // 1) it has a canonical IV (starting from 0 and with stride 1),
281 // 2) its latch terminator is a conditional branch and,
282 // 3) its latch condition is a compare instruction whose operands are the
283 // canonical IV and an OuterLp invariant.
284 // This check doesn't take into account the uniformity of other conditions not
285 // related to the loop latch because they don't affect the loop uniformity.
286 //
287 // NOTE: We decided to keep all these checks and its associated documentation
288 // together so that we can easily have a picture of the current supported loop
289 // nests. However, some of the current checks don't depend on \p OuterLp and
290 // would be redundantly executed for each \p Lp if we invoked this function for
291 // different candidate outer loops. This is not the case for now because we
292 // don't currently have the infrastructure to evaluate multiple candidate outer
293 // loops and \p OuterLp will be a fixed parameter while we only support explicit
294 // outer loop vectorization. It's also very likely that these checks go away
295 // before introducing the aforementioned infrastructure. However, if this is not
296 // the case, we should move the \p OuterLp independent checks to a separate
297 // function that is only executed once for each \p Lp.
298 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
299  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
300 
301  // If Lp is the outer loop, it's uniform by definition.
302  if (Lp == OuterLp)
303  return true;
304  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
305 
306  // 1.
308  if (!IV) {
309  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
310  return false;
311  }
312 
313  // 2.
314  BasicBlock *Latch = Lp->getLoopLatch();
315  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
316  if (!LatchBr || LatchBr->isUnconditional()) {
317  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
318  return false;
319  }
320 
321  // 3.
322  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
323  if (!LatchCmp) {
324  LLVM_DEBUG(
325  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
326  return false;
327  }
328 
329  Value *CondOp0 = LatchCmp->getOperand(0);
330  Value *CondOp1 = LatchCmp->getOperand(1);
331  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
332  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
333  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
334  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
335  return false;
336  }
337 
338  return true;
339 }
340 
341 // Return true if \p Lp and all its nested loops are uniform with regard to \p
342 // OuterLp.
343 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
344  if (!isUniformLoop(Lp, OuterLp))
345  return false;
346 
347  // Check if nested loops are uniform.
348  for (Loop *SubLp : *Lp)
349  if (!isUniformLoopNest(SubLp, OuterLp))
350  return false;
351 
352  return true;
353 }
354 
355 /// Check whether it is safe to if-convert this phi node.
356 ///
357 /// Phi nodes with constant expressions that can trap are not safe to if
358 /// convert.
360  for (PHINode &Phi : BB->phis()) {
361  for (Value *V : Phi.incoming_values())
362  if (auto *C = dyn_cast<Constant>(V))
363  if (C->canTrap())
364  return false;
365  }
366  return true;
367 }
368 
370  if (Ty->isPointerTy())
371  return DL.getIntPtrType(Ty);
372 
373  // It is possible that char's or short's overflow when we ask for the loop's
374  // trip count, work around this by changing the type size.
375  if (Ty->getScalarSizeInBits() < 32)
376  return Type::getInt32Ty(Ty->getContext());
377 
378  return Ty;
379 }
380 
381 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
382  Ty0 = convertPointerToIntegerType(DL, Ty0);
383  Ty1 = convertPointerToIntegerType(DL, Ty1);
384  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
385  return Ty0;
386  return Ty1;
387 }
388 
389 /// Check that the instruction has outside loop users and is not an
390 /// identified reduction variable.
391 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
392  SmallPtrSetImpl<Value *> &AllowedExit) {
393  // Reductions, Inductions and non-header phis are allowed to have exit users. All
394  // other instructions must not have external users.
395  if (!AllowedExit.count(Inst))
396  // Check that all of the users of the loop are inside the BB.
397  for (User *U : Inst->users()) {
398  Instruction *UI = cast<Instruction>(U);
399  // This user may be a reduction exit value.
400  if (!TheLoop->contains(UI)) {
401  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
402  return true;
403  }
404  }
405  return false;
406 }
407 
409  const ValueToValueMap &Strides =
410  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
411 
412  bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
413  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
414  if (Stride == 1 || Stride == -1)
415  return Stride;
416  return 0;
417 }
418 
420  return LAI->isUniform(V);
421 }
422 
423 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
424  assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
425  // Store the result and return it at the end instead of exiting early, in case
426  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
427  bool Result = true;
428  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
429 
430  for (BasicBlock *BB : TheLoop->blocks()) {
431  // Check whether the BB terminator is a BranchInst. Any other terminator is
432  // not supported yet.
433  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
434  if (!Br) {
435  reportVectorizationFailure("Unsupported basic block terminator",
436  "loop control flow is not understood by vectorizer",
437  "CFGNotUnderstood", ORE, TheLoop);
438  if (DoExtraAnalysis)
439  Result = false;
440  else
441  return false;
442  }
443 
444  // Check whether the BranchInst is a supported one. Only unconditional
445  // branches, conditional branches with an outer loop invariant condition or
446  // backedges are supported.
447  // FIXME: We skip these checks when VPlan predication is enabled as we
448  // want to allow divergent branches. This whole check will be removed
449  // once VPlan predication is on by default.
450  if (!EnableVPlanPredication && Br && Br->isConditional() &&
451  !TheLoop->isLoopInvariant(Br->getCondition()) &&
452  !LI->isLoopHeader(Br->getSuccessor(0)) &&
453  !LI->isLoopHeader(Br->getSuccessor(1))) {
454  reportVectorizationFailure("Unsupported conditional branch",
455  "loop control flow is not understood by vectorizer",
456  "CFGNotUnderstood", ORE, TheLoop);
457  if (DoExtraAnalysis)
458  Result = false;
459  else
460  return false;
461  }
462  }
463 
464  // Check whether inner loops are uniform. At this point, we only support
465  // simple outer loops scenarios with uniform nested loops.
466  if (!isUniformLoopNest(TheLoop /*loop nest*/,
467  TheLoop /*context outer loop*/)) {
468  reportVectorizationFailure("Outer loop contains divergent loops",
469  "loop control flow is not understood by vectorizer",
470  "CFGNotUnderstood", ORE, TheLoop);
471  if (DoExtraAnalysis)
472  Result = false;
473  else
474  return false;
475  }
476 
477  // Check whether we are able to set up outer loop induction.
478  if (!setupOuterLoopInductions()) {
479  reportVectorizationFailure("Unsupported outer loop Phi(s)",
480  "Unsupported outer loop Phi(s)",
481  "UnsupportedPhi", ORE, TheLoop);
482  if (DoExtraAnalysis)
483  Result = false;
484  else
485  return false;
486  }
487 
488  return Result;
489 }
490 
491 void LoopVectorizationLegality::addInductionPhi(
492  PHINode *Phi, const InductionDescriptor &ID,
493  SmallPtrSetImpl<Value *> &AllowedExit) {
494  Inductions[Phi] = ID;
495 
496  // In case this induction also comes with casts that we know we can ignore
497  // in the vectorized loop body, record them here. All casts could be recorded
498  // here for ignoring, but suffices to record only the first (as it is the
499  // only one that may bw used outside the cast sequence).
500  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
501  if (!Casts.empty())
502  InductionCastsToIgnore.insert(*Casts.begin());
503 
504  Type *PhiTy = Phi->getType();
505  const DataLayout &DL = Phi->getModule()->getDataLayout();
506 
507  // Get the widest type.
508  if (!PhiTy->isFloatingPointTy()) {
509  if (!WidestIndTy)
510  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
511  else
512  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
513  }
514 
515  // Int inductions are special because we only allow one IV.
518  isa<Constant>(ID.getStartValue()) &&
519  cast<Constant>(ID.getStartValue())->isNullValue()) {
520 
521  // Use the phi node with the widest type as induction. Use the last
522  // one if there are multiple (no good reason for doing this other
523  // than it is expedient). We've checked that it begins at zero and
524  // steps by one, so this is a canonical induction variable.
525  if (!PrimaryInduction || PhiTy == WidestIndTy)
526  PrimaryInduction = Phi;
527  }
528 
529  // Both the PHI node itself, and the "post-increment" value feeding
530  // back into the PHI node may have external users.
531  // We can allow those uses, except if the SCEVs we have for them rely
532  // on predicates that only hold within the loop, since allowing the exit
533  // currently means re-using this SCEV outside the loop (see PR33706 for more
534  // details).
535  if (PSE.getUnionPredicate().isAlwaysTrue()) {
536  AllowedExit.insert(Phi);
537  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
538  }
539 
540  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
541 }
542 
543 bool LoopVectorizationLegality::setupOuterLoopInductions() {
544  BasicBlock *Header = TheLoop->getHeader();
545 
546  // Returns true if a given Phi is a supported induction.
547  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
549  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
551  addInductionPhi(&Phi, ID, AllowedExit);
552  return true;
553  } else {
554  // Bail out for any Phi in the outer loop header that is not a supported
555  // induction.
556  LLVM_DEBUG(
557  dbgs()
558  << "LV: Found unsupported PHI for outer loop vectorization.\n");
559  return false;
560  }
561  };
562 
563  if (llvm::all_of(Header->phis(), isSupportedPhi))
564  return true;
565  else
566  return false;
567 }
568 
569 bool LoopVectorizationLegality::canVectorizeInstrs() {
570  BasicBlock *Header = TheLoop->getHeader();
571 
572  // Look for the attribute signaling the absence of NaNs.
573  Function &F = *Header->getParent();
574  HasFunNoNaNAttr =
575  F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
576 
577  // For each block in the loop.
578  for (BasicBlock *BB : TheLoop->blocks()) {
579  // Scan the instructions in the block and look for hazards.
580  for (Instruction &I : *BB) {
581  if (auto *Phi = dyn_cast<PHINode>(&I)) {
582  Type *PhiTy = Phi->getType();
583  // Check that this PHI type is allowed.
584  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
585  !PhiTy->isPointerTy()) {
586  reportVectorizationFailure("Found a non-int non-pointer PHI",
587  "loop control flow is not understood by vectorizer",
588  "CFGNotUnderstood", ORE, TheLoop);
589  return false;
590  }
591 
592  // If this PHINode is not in the header block, then we know that we
593  // can convert it to select during if-conversion. No need to check if
594  // the PHIs in this block are induction or reduction variables.
595  if (BB != Header) {
596  // Non-header phi nodes that have outside uses can be vectorized. Add
597  // them to the list of allowed exits.
598  // Unsafe cyclic dependencies with header phis are identified during
599  // legalization for reduction, induction and first order
600  // recurrences.
601  AllowedExit.insert(&I);
602  continue;
603  }
604 
605  // We only allow if-converted PHIs with exactly two incoming values.
606  if (Phi->getNumIncomingValues() != 2) {
607  reportVectorizationFailure("Found an invalid PHI",
608  "loop control flow is not understood by vectorizer",
609  "CFGNotUnderstood", ORE, TheLoop, Phi);
610  return false;
611  }
612 
613  RecurrenceDescriptor RedDes;
614  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
615  DT)) {
616  if (RedDes.hasUnsafeAlgebra())
617  Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
618  AllowedExit.insert(RedDes.getLoopExitInstr());
619  Reductions[Phi] = RedDes;
620  continue;
621  }
622 
623  // TODO: Instead of recording the AllowedExit, it would be good to record the
624  // complementary set: NotAllowedExit. These include (but may not be
625  // limited to):
626  // 1. Reduction phis as they represent the one-before-last value, which
627  // is not available when vectorized
628  // 2. Induction phis and increment when SCEV predicates cannot be used
629  // outside the loop - see addInductionPhi
630  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
631  // outside the loop - see call to hasOutsideLoopUser in the non-phi
632  // handling below
633  // 4. FirstOrderRecurrence phis that can possibly be handled by
634  // extraction.
635  // By recording these, we can then reason about ways to vectorize each
636  // of these NotAllowedExit.
638  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
639  addInductionPhi(Phi, ID, AllowedExit);
640  if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
641  Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
642  continue;
643  }
644 
646  SinkAfter, DT)) {
647  FirstOrderRecurrences.insert(Phi);
648  continue;
649  }
650 
651  // As a last resort, coerce the PHI to a AddRec expression
652  // and re-try classifying it a an induction PHI.
653  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
654  addInductionPhi(Phi, ID, AllowedExit);
655  continue;
656  }
657 
658  reportVectorizationFailure("Found an unidentified PHI",
659  "value that could not be identified as "
660  "reduction is used outside the loop",
661  "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
662  return false;
663  } // end of PHI handling
664 
665  // We handle calls that:
666  // * Are debug info intrinsics.
667  // * Have a mapping to an IR intrinsic.
668  // * Have a vector version available.
669  auto *CI = dyn_cast<CallInst>(&I);
670  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
671  !isa<DbgInfoIntrinsic>(CI) &&
672  !(CI->getCalledFunction() && TLI &&
673  TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
674  // If the call is a recognized math libary call, it is likely that
675  // we can vectorize it given loosened floating-point constraints.
676  LibFunc Func;
677  bool IsMathLibCall =
678  TLI && CI->getCalledFunction() &&
679  CI->getType()->isFloatingPointTy() &&
680  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
681  TLI->hasOptimizedCodeGen(Func);
682 
683  if (IsMathLibCall) {
684  // TODO: Ideally, we should not use clang-specific language here,
685  // but it's hard to provide meaningful yet generic advice.
686  // Also, should this be guarded by allowExtraAnalysis() and/or be part
687  // of the returned info from isFunctionVectorizable()?
688  reportVectorizationFailure("Found a non-intrinsic callsite",
689  "library call cannot be vectorized. "
690  "Try compiling with -fno-math-errno, -ffast-math, "
691  "or similar flags",
692  "CantVectorizeLibcall", ORE, TheLoop, CI);
693  } else {
694  reportVectorizationFailure("Found a non-intrinsic callsite",
695  "call instruction cannot be vectorized",
696  "CantVectorizeLibcall", ORE, TheLoop, CI);
697  }
698  return false;
699  }
700 
701  // Some intrinsics have scalar arguments and should be same in order for
702  // them to be vectorized (i.e. loop invariant).
703  if (CI) {
704  auto *SE = PSE.getSE();
705  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
706  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
707  if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
708  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
709  reportVectorizationFailure("Found unvectorizable intrinsic",
710  "intrinsic instruction cannot be vectorized",
711  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
712  return false;
713  }
714  }
715  }
716 
717  // Check that the instruction return type is vectorizable.
718  // Also, we can't vectorize extractelement instructions.
719  if ((!VectorType::isValidElementType(I.getType()) &&
720  !I.getType()->isVoidTy()) ||
721  isa<ExtractElementInst>(I)) {
722  reportVectorizationFailure("Found unvectorizable type",
723  "instruction return type cannot be vectorized",
724  "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
725  return false;
726  }
727 
728  // Check that the stored type is vectorizable.
729  if (auto *ST = dyn_cast<StoreInst>(&I)) {
730  Type *T = ST->getValueOperand()->getType();
732  reportVectorizationFailure("Store instruction cannot be vectorized",
733  "store instruction cannot be vectorized",
734  "CantVectorizeStore", ORE, TheLoop, ST);
735  return false;
736  }
737 
738  // For nontemporal stores, check that a nontemporal vector version is
739  // supported on the target.
740  if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
741  // Arbitrarily try a vector of 2 elements.
742  Type *VecTy = VectorType::get(T, /*NumElements=*/2);
743  assert(VecTy && "did not find vectorized version of stored type");
744  const MaybeAlign Alignment = getLoadStoreAlignment(ST);
745  assert(Alignment && "Alignment should be set");
746  if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
748  "nontemporal store instruction cannot be vectorized",
749  "nontemporal store instruction cannot be vectorized",
750  "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
751  return false;
752  }
753  }
754 
755  } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
756  if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
757  // For nontemporal loads, check that a nontemporal vector version is
758  // supported on the target (arbitrarily try a vector of 2 elements).
759  Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
760  assert(VecTy && "did not find vectorized version of load type");
761  const MaybeAlign Alignment = getLoadStoreAlignment(LD);
762  assert(Alignment && "Alignment should be set");
763  if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
765  "nontemporal load instruction cannot be vectorized",
766  "nontemporal load instruction cannot be vectorized",
767  "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
768  return false;
769  }
770  }
771 
772  // FP instructions can allow unsafe algebra, thus vectorizable by
773  // non-IEEE-754 compliant SIMD units.
774  // This applies to floating-point math operations and calls, not memory
775  // operations, shuffles, or casts, as they don't change precision or
776  // semantics.
777  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
778  !I.isFast()) {
779  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
780  Hints->setPotentiallyUnsafe();
781  }
782 
783  // Reduction instructions are allowed to have exit users.
784  // All other instructions must not have external users.
785  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
786  // We can safely vectorize loops where instructions within the loop are
787  // used outside the loop only if the SCEV predicates within the loop is
788  // same as outside the loop. Allowing the exit means reusing the SCEV
789  // outside the loop.
790  if (PSE.getUnionPredicate().isAlwaysTrue()) {
791  AllowedExit.insert(&I);
792  continue;
793  }
794  reportVectorizationFailure("Value cannot be used outside the loop",
795  "value cannot be used outside the loop",
796  "ValueUsedOutsideLoop", ORE, TheLoop, &I);
797  return false;
798  }
799  } // next instr.
800  }
801 
802  if (!PrimaryInduction) {
803  if (Inductions.empty()) {
804  reportVectorizationFailure("Did not find one integer induction var",
805  "loop induction variable could not be identified",
806  "NoInductionVariable", ORE, TheLoop);
807  return false;
808  } else if (!WidestIndTy) {
809  reportVectorizationFailure("Did not find one integer induction var",
810  "integer loop induction variable could not be identified",
811  "NoIntegerInductionVariable", ORE, TheLoop);
812  return false;
813  } else {
814  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
815  }
816  }
817 
818  // Now we know the widest induction type, check if our found induction
819  // is the same size. If it's not, unset it here and InnerLoopVectorizer
820  // will create another.
821  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
822  PrimaryInduction = nullptr;
823 
824  return true;
825 }
826 
827 bool LoopVectorizationLegality::canVectorizeMemory() {
828  LAI = &(*GetLAA)(*TheLoop);
829  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
830  if (LAR) {
831  ORE->emit([&]() {
832  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
833  "loop not vectorized: ", *LAR);
834  });
835  }
836  if (!LAI->canVectorizeMemory())
837  return false;
838 
839  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
840  reportVectorizationFailure("Stores to a uniform address",
841  "write to a loop invariant address could not be vectorized",
842  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
843  return false;
844  }
845  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
846  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
847 
848  return true;
849 }
850 
852  Value *In0 = const_cast<Value *>(V);
853  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
854  if (!PN)
855  return false;
856 
857  return Inductions.count(PN);
858 }
859 
861  auto *Inst = dyn_cast<Instruction>(V);
862  return (Inst && InductionCastsToIgnore.count(Inst));
863 }
864 
866  return isInductionPhi(V) || isCastedInductionVariable(V);
867 }
868 
870  return FirstOrderRecurrences.count(Phi);
871 }
872 
874  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
875 }
876 
877 bool LoopVectorizationLegality::blockCanBePredicated(
878  BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
879  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
880 
881  for (Instruction &I : *BB) {
882  // Check that we don't have a constant expression that can trap as operand.
883  for (Value *Operand : I.operands()) {
884  if (auto *C = dyn_cast<Constant>(Operand))
885  if (C->canTrap())
886  return false;
887  }
888  // We might be able to hoist the load.
889  if (I.mayReadFromMemory()) {
890  auto *LI = dyn_cast<LoadInst>(&I);
891  if (!LI)
892  return false;
893  if (!SafePtrs.count(LI->getPointerOperand())) {
894  // !llvm.mem.parallel_loop_access implies if-conversion safety.
895  // Otherwise, record that the load needs (real or emulated) masking
896  // and let the cost model decide.
897  if (!IsAnnotatedParallel || PreserveGuards)
898  MaskedOp.insert(LI);
899  continue;
900  }
901  }
902 
903  if (I.mayWriteToMemory()) {
904  auto *SI = dyn_cast<StoreInst>(&I);
905  if (!SI)
906  return false;
907  // Predicated store requires some form of masking:
908  // 1) masked store HW instruction,
909  // 2) emulation via load-blend-store (only if safe and legal to do so,
910  // be aware on the race conditions), or
911  // 3) element-by-element predicate check and scalar store.
912  MaskedOp.insert(SI);
913  continue;
914  }
915  if (I.mayThrow())
916  return false;
917  }
918 
919  return true;
920 }
921 
922 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
923  if (!EnableIfConversion) {
924  reportVectorizationFailure("If-conversion is disabled",
925  "if-conversion is disabled",
926  "IfConversionDisabled",
927  ORE, TheLoop);
928  return false;
929  }
930 
931  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
932 
933  // A list of pointers which are known to be dereferenceable within scope of
934  // the loop body for each iteration of the loop which executes. That is,
935  // the memory pointed to can be dereferenced (with the access size implied by
936  // the value's type) unconditionally within the loop header without
937  // introducing a new fault.
938  SmallPtrSet<Value *, 8> SafePointes;
939 
940  // Collect safe addresses.
941  for (BasicBlock *BB : TheLoop->blocks()) {
942  if (!blockNeedsPredication(BB)) {
943  for (Instruction &I : *BB)
944  if (auto *Ptr = getLoadStorePointerOperand(&I))
945  SafePointes.insert(Ptr);
946  continue;
947  }
948 
949  // For a block which requires predication, a address may be safe to access
950  // in the loop w/o predication if we can prove dereferenceability facts
951  // sufficient to ensure it'll never fault within the loop. For the moment,
952  // we restrict this to loads; stores are more complicated due to
953  // concurrency restrictions.
954  ScalarEvolution &SE = *PSE.getSE();
955  for (Instruction &I : *BB) {
956  LoadInst *LI = dyn_cast<LoadInst>(&I);
957  if (LI && !mustSuppressSpeculation(*LI) &&
958  isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
959  SafePointes.insert(LI->getPointerOperand());
960  }
961  }
962 
963  // Collect the blocks that need predication.
964  BasicBlock *Header = TheLoop->getHeader();
965  for (BasicBlock *BB : TheLoop->blocks()) {
966  // We don't support switch statements inside loops.
967  if (!isa<BranchInst>(BB->getTerminator())) {
968  reportVectorizationFailure("Loop contains a switch statement",
969  "loop contains a switch statement",
970  "LoopContainsSwitch", ORE, TheLoop,
971  BB->getTerminator());
972  return false;
973  }
974 
975  // We must be able to predicate all blocks that need to be predicated.
976  if (blockNeedsPredication(BB)) {
977  if (!blockCanBePredicated(BB, SafePointes)) {
979  "Control flow cannot be substituted for a select",
980  "control flow cannot be substituted for a select",
981  "NoCFGForSelect", ORE, TheLoop,
982  BB->getTerminator());
983  return false;
984  }
985  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
987  "Control flow cannot be substituted for a select",
988  "control flow cannot be substituted for a select",
989  "NoCFGForSelect", ORE, TheLoop,
990  BB->getTerminator());
991  return false;
992  }
993  }
994 
995  // We can if-convert this loop.
996  return true;
997 }
998 
999 // Helper function to canVectorizeLoopNestCFG.
1000 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1001  bool UseVPlanNativePath) {
1002  assert((UseVPlanNativePath || Lp->empty()) &&
1003  "VPlan-native path is not enabled.");
1004 
1005  // TODO: ORE should be improved to show more accurate information when an
1006  // outer loop can't be vectorized because a nested loop is not understood or
1007  // legal. Something like: "outer_loop_location: loop not vectorized:
1008  // (inner_loop_location) loop control flow is not understood by vectorizer".
1009 
1010  // Store the result and return it at the end instead of exiting early, in case
1011  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1012  bool Result = true;
1013  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1014 
1015  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1016  // be canonicalized.
1017  if (!Lp->getLoopPreheader()) {
1018  reportVectorizationFailure("Loop doesn't have a legal pre-header",
1019  "loop control flow is not understood by vectorizer",
1020  "CFGNotUnderstood", ORE, TheLoop);
1021  if (DoExtraAnalysis)
1022  Result = false;
1023  else
1024  return false;
1025  }
1026 
1027  // We must have a single backedge.
1028  if (Lp->getNumBackEdges() != 1) {
1029  reportVectorizationFailure("The loop must have a single backedge",
1030  "loop control flow is not understood by vectorizer",
1031  "CFGNotUnderstood", ORE, TheLoop);
1032  if (DoExtraAnalysis)
1033  Result = false;
1034  else
1035  return false;
1036  }
1037 
1038  // We must have a single exiting block.
1039  if (!Lp->getExitingBlock()) {
1040  reportVectorizationFailure("The loop must have an exiting block",
1041  "loop control flow is not understood by vectorizer",
1042  "CFGNotUnderstood", ORE, TheLoop);
1043  if (DoExtraAnalysis)
1044  Result = false;
1045  else
1046  return false;
1047  }
1048 
1049  // We only handle bottom-tested loops, i.e. loop in which the condition is
1050  // checked at the end of each iteration. With that we can assume that all
1051  // instructions in the loop are executed the same number of times.
1052  if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1053  reportVectorizationFailure("The exiting block is not the loop latch",
1054  "loop control flow is not understood by vectorizer",
1055  "CFGNotUnderstood", ORE, TheLoop);
1056  if (DoExtraAnalysis)
1057  Result = false;
1058  else
1059  return false;
1060  }
1061 
1062  return Result;
1063 }
1064 
1065 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1066  Loop *Lp, bool UseVPlanNativePath) {
1067  // Store the result and return it at the end instead of exiting early, in case
1068  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1069  bool Result = true;
1070  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1071  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1072  if (DoExtraAnalysis)
1073  Result = false;
1074  else
1075  return false;
1076  }
1077 
1078  // Recursively check whether the loop control flow of nested loops is
1079  // understood.
1080  for (Loop *SubLp : *Lp)
1081  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1082  if (DoExtraAnalysis)
1083  Result = false;
1084  else
1085  return false;
1086  }
1087 
1088  return Result;
1089 }
1090 
1091 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1092  // Store the result and return it at the end instead of exiting early, in case
1093  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1094  bool Result = true;
1095 
1096  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1097  // Check whether the loop-related control flow in the loop nest is expected by
1098  // vectorizer.
1099  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1100  if (DoExtraAnalysis)
1101  Result = false;
1102  else
1103  return false;
1104  }
1105 
1106  // We need to have a loop header.
1107  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1108  << '\n');
1109 
1110  // Specific checks for outer loops. We skip the remaining legal checks at this
1111  // point because they don't support outer loops.
1112  if (!TheLoop->empty()) {
1113  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1114 
1115  if (!canVectorizeOuterLoop()) {
1116  reportVectorizationFailure("Unsupported outer loop",
1117  "unsupported outer loop",
1118  "UnsupportedOuterLoop",
1119  ORE, TheLoop);
1120  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1121  // outer loops.
1122  return false;
1123  }
1124 
1125  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1126  return Result;
1127  }
1128 
1129  assert(TheLoop->empty() && "Inner loop expected.");
1130  // Check if we can if-convert non-single-bb loops.
1131  unsigned NumBlocks = TheLoop->getNumBlocks();
1132  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1133  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1134  if (DoExtraAnalysis)
1135  Result = false;
1136  else
1137  return false;
1138  }
1139 
1140  // Check if we can vectorize the instructions and CFG in this loop.
1141  if (!canVectorizeInstrs()) {
1142  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1143  if (DoExtraAnalysis)
1144  Result = false;
1145  else
1146  return false;
1147  }
1148 
1149  // Go over each instruction and look at memory deps.
1150  if (!canVectorizeMemory()) {
1151  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1152  if (DoExtraAnalysis)
1153  Result = false;
1154  else
1155  return false;
1156  }
1157 
1158  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1159  << (LAI->getRuntimePointerChecking()->Need
1160  ? " (with a runtime bound check)"
1161  : "")
1162  << "!\n");
1163 
1164  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1165  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1166  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1167 
1168  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1169  reportVectorizationFailure("Too many SCEV checks needed",
1170  "Too many SCEV assumptions need to be made and checked at runtime",
1171  "TooManySCEVRunTimeChecks", ORE, TheLoop);
1172  if (DoExtraAnalysis)
1173  Result = false;
1174  else
1175  return false;
1176  }
1177 
1178  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1179  // we can vectorize, and at this point we don't have any other mem analysis
1180  // which may limit our maximum vectorization factor, so just return true with
1181  // no restrictions.
1182  return Result;
1183 }
1184 
1186 
1187  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1188 
1189  if (!PrimaryInduction) {
1191  "No primary induction, cannot fold tail by masking",
1192  "Missing a primary induction variable in the loop, which is "
1193  "needed in order to fold tail by masking as required.",
1194  "NoPrimaryInduction", ORE, TheLoop);
1195  return false;
1196  }
1197 
1198  SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1199 
1200  for (auto &Reduction : *getReductionVars())
1201  ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1202 
1203  // TODO: handle non-reduction outside users when tail is folded by masking.
1204  for (auto *AE : AllowedExit) {
1205  // Check that all users of allowed exit values are inside the loop or
1206  // are the live-out of a reduction.
1207  if (ReductionLiveOuts.count(AE))
1208  continue;
1209  for (User *U : AE->users()) {
1210  Instruction *UI = cast<Instruction>(U);
1211  if (TheLoop->contains(UI))
1212  continue;
1214  "Cannot fold tail by masking, loop has an outside user for",
1215  "Cannot fold tail by masking in the presence of live outs.",
1216  "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1217  return false;
1218  }
1219  }
1220 
1221  // The list of pointers that we can safely read and write to remains empty.
1222  SmallPtrSet<Value *, 8> SafePointers;
1223 
1224  // Check and mark all blocks for predication, including those that ordinarily
1225  // do not need predication such as the header block.
1226  for (BasicBlock *BB : TheLoop->blocks()) {
1227  if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1229  "Cannot fold tail by masking as required",
1230  "control flow cannot be substituted for a select",
1231  "NoCFGForSelect", ORE, TheLoop,
1232  BB->getTerminator());
1233  return false;
1234  }
1235  }
1236 
1237  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1238  return true;
1239 }
1240 
1241 } // namespace llvm
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
uint64_t CallInst * C
#define LV_NAME
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:209
LLVMContext & Context
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Instruction * getUnsafeAlgebraInst()
Returns first unsafe algebra instruction in the PHI node&#39;s use-chain.
bool isCastedInductionVariable(const Value *V)
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction *> *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:453
ConstantInt * getConstIntStepValue() const
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:270
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode *> AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1010
The main scalar evolution driver.
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class...
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
InductionKind getKind() const
Metadata node.
Definition: Metadata.h:863
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
An instruction for reading from memory.
Definition: Instructions.h:169
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
cl::opt< bool > EnableVPlanPredication
Value * getStartValue() const
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
#define DEBUG_TYPE
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
bool hasUnsafeAlgebra()
Returns true if the recurrence has unsafe algebra which requires a relaxed floating-point model...
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:233
This file defines the LoopVectorizationLegality class.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
static const unsigned MaxVectorWidth
Maximum SIMD width.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:592
BlockT * getHeader() const
Definition: LoopInfo.h:105
int isConsecutivePtr(Value *Ptr)
Check if this pointer is consecutive when vectorizing.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool isInductionVariable(const Value *V)
Returns True if V can be considered as an induction variable in this loop.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:513
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:628
An instruction for storing to memory.
Definition: Instructions.h:325
static cl::opt< unsigned > PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
bool blockNeedsPredication(BasicBlock *BB)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Diagnostic information for optimization analysis remarks related to pointer aliasing.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:409
StringRef getString() const
Definition: Metadata.cpp:463
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Integer induction variable. Step = C.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value *> &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
Instruction * getUnsafeAlgebraInst()
Returns induction operator that does not have "fast-math" property and requires FP unsafe mode...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
constexpr double e
Definition: MathExtras.h:57
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Value * getPointerOperand()
Definition: Instructions.h:289
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints)
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:611
size_t size() const
Definition: SmallVector.h:52
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, DenseMap< Instruction *, Instruction *> &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:62
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
Diagnostic information for optimization analysis remarks related to floating-point non-commutativity...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
A struct for saving information about induction variables.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:147
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:134
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
DenseMap< const Value *, Value * > ValueToValueMap
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool isFirstOrderRecurrence(const PHINode *Phi)
Returns True if Phi is a first-order recurrence in this loop.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:92
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:146
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
bool isInductionPhi(const Value *V)
Returns True if V is a Phi node of an induction variable in this loop.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:420
loop Loop Strength Reduction
bool hasUnsafeAlgebra()
Returns true if the induction type is FP and the binary operator does not have the "fast-math" proper...
const SmallVectorImpl< Instruction * > & getCastInsts() const
Returns a reference to the type cast instructions in the induction update chain, that are redundant w...
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:197
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:489
static const size_t npos
Definition: StringRef.h:50
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:180
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Instruction * getLoopExitInstr()
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:220
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:614
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:329
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
bool empty() const
Definition: LoopInfo.h:151
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
A single uniqued string.
Definition: Metadata.h:603
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
MaybeAlign getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
Root of the metadata hierarchy.
Definition: Metadata.h:57
The optimization diagnostic interface.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
void emitRemarkWithHints() const
Dumps all the hint information.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)