LLVM  9.0.0svn
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
18 #include "llvm/IR/IntrinsicInst.h"
19 
20 using namespace llvm;
21 
22 #define LV_NAME "loop-vectorize"
23 #define DEBUG_TYPE LV_NAME
24 
26 
27 static cl::opt<bool>
28  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
29  cl::desc("Enable if-conversion during vectorization."));
30 
32  "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
33  cl::desc("The maximum allowed number of runtime memory checks with a "
34  "vectorize(enable) pragma."));
35 
37  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
38  cl::desc("The maximum number of SCEV checks allowed."));
39 
41  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
42  cl::desc("The maximum number of SCEV checks allowed with a "
43  "vectorize(enable) pragma"));
44 
45 /// Maximum vectorization interleave count.
46 static const unsigned MaxInterleaveFactor = 16;
47 
48 namespace llvm {
49 
50 #ifndef NDEBUG
51 static void debugVectorizationFailure(const StringRef DebugMsg,
52  Instruction *I) {
53  dbgs() << "LV: Not vectorizing: " << DebugMsg;
54  if (I != nullptr)
55  dbgs() << " " << *I;
56  else
57  dbgs() << '.';
58  dbgs() << '\n';
59 }
60 #endif
61 
63  StringRef RemarkName,
64  Loop *TheLoop,
65  Instruction *I) {
66  Value *CodeRegion = TheLoop->getHeader();
67  DebugLoc DL = TheLoop->getStartLoc();
68 
69  if (I) {
70  CodeRegion = I->getParent();
71  // If there is no debug location attached to the instruction, revert back to
72  // using the loop's.
73  if (I->getDebugLoc())
74  DL = I->getDebugLoc();
75  }
76 
77  OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
78  R << "loop not vectorized: ";
79  return R;
80 }
81 
82 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
83  switch (Kind) {
84  case HK_WIDTH:
85  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
86  case HK_UNROLL:
87  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
88  case HK_FORCE:
89  return (Val <= 1);
90  case HK_ISVECTORIZED:
91  return (Val == 0 || Val == 1);
92  }
93  return false;
94 }
95 
97  bool InterleaveOnlyWhenForced,
99  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
100  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
101  Force("vectorize.enable", FK_Undefined, HK_FORCE),
102  IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
103  // Populate values with existing loop metadata.
104  getHintsFromMetadata();
105 
106  // force-vector-interleave overrides DisableInterleaving.
109 
110  if (IsVectorized.Value != 1)
111  // If the vectorization width and interleaving count are both 1 then
112  // consider the loop to have been already vectorized because there's
113  // nothing more that we can do.
114  IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
115  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
116  << "LV: Interleaving disabled by the pass manager\n");
117 }
118 
120  LLVMContext &Context = TheLoop->getHeader()->getContext();
121 
122  MDNode *IsVectorizedMD = MDNode::get(
123  Context,
124  {MDString::get(Context, "llvm.loop.isvectorized"),
125  ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
126  MDNode *LoopID = TheLoop->getLoopID();
127  MDNode *NewLoopID =
128  makePostTransformationMetadata(Context, LoopID,
129  {Twine(Prefix(), "vectorize.").str(),
130  Twine(Prefix(), "interleave.").str()},
131  {IsVectorizedMD});
132  TheLoop->setLoopID(NewLoopID);
133 
134  // Update internal cache.
135  IsVectorized.Value = 1;
136 }
137 
139  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
141  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
143  return false;
144  }
145 
146  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
147  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
149  return false;
150  }
151 
152  if (getIsVectorized() == 1) {
153  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
154  // FIXME: Add interleave.disable metadata. This will allow
155  // vectorize.disable to be used without disabling the pass and errors
156  // to differentiate between disabled vectorization and a width of 1.
157  ORE.emit([&]() {
159  "AllDisabled", L->getStartLoc(),
160  L->getHeader())
161  << "loop not vectorized: vectorization and interleaving are "
162  "explicitly disabled, or the loop has already been "
163  "vectorized";
164  });
165  return false;
166  }
167 
168  return true;
169 }
170 
172  using namespace ore;
173 
174  ORE.emit([&]() {
175  if (Force.Value == LoopVectorizeHints::FK_Disabled)
176  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
177  TheLoop->getStartLoc(),
178  TheLoop->getHeader())
179  << "loop not vectorized: vectorization is explicitly disabled";
180  else {
181  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
182  TheLoop->getStartLoc(), TheLoop->getHeader());
183  R << "loop not vectorized";
184  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
185  R << " (Force=" << NV("Force", true);
186  if (Width.Value != 0)
187  R << ", Vector Width=" << NV("VectorWidth", Width.Value);
188  if (Interleave.Value != 0)
189  R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
190  R << ")";
191  }
192  return R;
193  }
194  });
195 }
196 
198  if (getWidth() == 1)
199  return LV_NAME;
201  return LV_NAME;
203  return LV_NAME;
205 }
206 
207 void LoopVectorizeHints::getHintsFromMetadata() {
208  MDNode *LoopID = TheLoop->getLoopID();
209  if (!LoopID)
210  return;
211 
212  // First operand should refer to the loop id itself.
213  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
214  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
215 
216  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
217  const MDString *S = nullptr;
219 
220  // The expected hint is either a MDString or a MDNode with the first
221  // operand a MDString.
222  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
223  if (!MD || MD->getNumOperands() == 0)
224  continue;
225  S = dyn_cast<MDString>(MD->getOperand(0));
226  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
227  Args.push_back(MD->getOperand(i));
228  } else {
229  S = dyn_cast<MDString>(LoopID->getOperand(i));
230  assert(Args.size() == 0 && "too many arguments for MDString");
231  }
232 
233  if (!S)
234  continue;
235 
236  // Check if the hint starts with the loop metadata prefix.
237  StringRef Name = S->getString();
238  if (Args.size() == 1)
239  setHint(Name, Args[0]);
240  }
241 }
242 
243 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
244  if (!Name.startswith(Prefix()))
245  return;
246  Name = Name.substr(Prefix().size(), StringRef::npos);
247 
248  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
249  if (!C)
250  return;
251  unsigned Val = C->getZExtValue();
252 
253  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
254  for (auto H : Hints) {
255  if (Name == H->Name) {
256  if (H->validate(Val))
257  H->Value = Val;
258  else
259  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
260  break;
261  }
262  }
263 }
264 
266  Function *F, Loop *L, const LoopVectorizeHints &Hints) {
267  const char *PassName = Hints.vectorizeAnalysisPassName();
268  bool Failed = false;
269  if (UnsafeAlgebraInst && !Hints.allowReordering()) {
270  ORE.emit([&]() {
272  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
273  UnsafeAlgebraInst->getParent())
274  << "loop not vectorized: cannot prove it is safe to reorder "
275  "floating-point operations";
276  });
277  Failed = true;
278  }
279 
280  // Test if runtime memcheck thresholds are exceeded.
281  bool PragmaThresholdReached =
282  NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
283  bool ThresholdReached =
284  NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
285  if ((ThresholdReached && !Hints.allowReordering()) ||
286  PragmaThresholdReached) {
287  ORE.emit([&]() {
288  return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
289  L->getStartLoc(),
290  L->getHeader())
291  << "loop not vectorized: cannot prove it is safe to reorder "
292  "memory operations";
293  });
294  LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
295  Failed = true;
296  }
297 
298  return Failed;
299 }
300 
301 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
302 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
303 // executing the inner loop will execute the same iterations). This check is
304 // very constrained for now but it will be relaxed in the future. \p Lp is
305 // considered uniform if it meets all the following conditions:
306 // 1) it has a canonical IV (starting from 0 and with stride 1),
307 // 2) its latch terminator is a conditional branch and,
308 // 3) its latch condition is a compare instruction whose operands are the
309 // canonical IV and an OuterLp invariant.
310 // This check doesn't take into account the uniformity of other conditions not
311 // related to the loop latch because they don't affect the loop uniformity.
312 //
313 // NOTE: We decided to keep all these checks and its associated documentation
314 // together so that we can easily have a picture of the current supported loop
315 // nests. However, some of the current checks don't depend on \p OuterLp and
316 // would be redundantly executed for each \p Lp if we invoked this function for
317 // different candidate outer loops. This is not the case for now because we
318 // don't currently have the infrastructure to evaluate multiple candidate outer
319 // loops and \p OuterLp will be a fixed parameter while we only support explicit
320 // outer loop vectorization. It's also very likely that these checks go away
321 // before introducing the aforementioned infrastructure. However, if this is not
322 // the case, we should move the \p OuterLp independent checks to a separate
323 // function that is only executed once for each \p Lp.
324 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
325  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
326 
327  // If Lp is the outer loop, it's uniform by definition.
328  if (Lp == OuterLp)
329  return true;
330  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
331 
332  // 1.
334  if (!IV) {
335  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
336  return false;
337  }
338 
339  // 2.
340  BasicBlock *Latch = Lp->getLoopLatch();
341  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
342  if (!LatchBr || LatchBr->isUnconditional()) {
343  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
344  return false;
345  }
346 
347  // 3.
348  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
349  if (!LatchCmp) {
350  LLVM_DEBUG(
351  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
352  return false;
353  }
354 
355  Value *CondOp0 = LatchCmp->getOperand(0);
356  Value *CondOp1 = LatchCmp->getOperand(1);
357  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
358  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
359  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
360  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
361  return false;
362  }
363 
364  return true;
365 }
366 
367 // Return true if \p Lp and all its nested loops are uniform with regard to \p
368 // OuterLp.
369 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
370  if (!isUniformLoop(Lp, OuterLp))
371  return false;
372 
373  // Check if nested loops are uniform.
374  for (Loop *SubLp : *Lp)
375  if (!isUniformLoopNest(SubLp, OuterLp))
376  return false;
377 
378  return true;
379 }
380 
381 /// Check whether it is safe to if-convert this phi node.
382 ///
383 /// Phi nodes with constant expressions that can trap are not safe to if
384 /// convert.
386  for (PHINode &Phi : BB->phis()) {
387  for (Value *V : Phi.incoming_values())
388  if (auto *C = dyn_cast<Constant>(V))
389  if (C->canTrap())
390  return false;
391  }
392  return true;
393 }
394 
396  if (Ty->isPointerTy())
397  return DL.getIntPtrType(Ty);
398 
399  // It is possible that char's or short's overflow when we ask for the loop's
400  // trip count, work around this by changing the type size.
401  if (Ty->getScalarSizeInBits() < 32)
402  return Type::getInt32Ty(Ty->getContext());
403 
404  return Ty;
405 }
406 
407 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
408  Ty0 = convertPointerToIntegerType(DL, Ty0);
409  Ty1 = convertPointerToIntegerType(DL, Ty1);
410  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
411  return Ty0;
412  return Ty1;
413 }
414 
415 /// Check that the instruction has outside loop users and is not an
416 /// identified reduction variable.
417 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
418  SmallPtrSetImpl<Value *> &AllowedExit) {
419  // Reductions, Inductions and non-header phis are allowed to have exit users. All
420  // other instructions must not have external users.
421  if (!AllowedExit.count(Inst))
422  // Check that all of the users of the loop are inside the BB.
423  for (User *U : Inst->users()) {
424  Instruction *UI = cast<Instruction>(U);
425  // This user may be a reduction exit value.
426  if (!TheLoop->contains(UI)) {
427  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
428  return true;
429  }
430  }
431  return false;
432 }
433 
435  const ValueToValueMap &Strides =
436  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
437 
438  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
439  if (Stride == 1 || Stride == -1)
440  return Stride;
441  return 0;
442 }
443 
445  return LAI->isUniform(V);
446 }
447 
448 void LoopVectorizationLegality::reportVectorizationFailure(
449  const StringRef DebugMsg, const StringRef OREMsg,
450  const StringRef ORETag, Instruction *I) const {
452  ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
453  ORETag, TheLoop, I) << OREMsg);
454 }
455 
456 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
457  assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
458  // Store the result and return it at the end instead of exiting early, in case
459  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
460  bool Result = true;
461  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
462 
463  for (BasicBlock *BB : TheLoop->blocks()) {
464  // Check whether the BB terminator is a BranchInst. Any other terminator is
465  // not supported yet.
466  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
467  if (!Br) {
468  reportVectorizationFailure("Unsupported basic block terminator",
469  "loop control flow is not understood by vectorizer",
470  "CFGNotUnderstood");
471  if (DoExtraAnalysis)
472  Result = false;
473  else
474  return false;
475  }
476 
477  // Check whether the BranchInst is a supported one. Only unconditional
478  // branches, conditional branches with an outer loop invariant condition or
479  // backedges are supported.
480  // FIXME: We skip these checks when VPlan predication is enabled as we
481  // want to allow divergent branches. This whole check will be removed
482  // once VPlan predication is on by default.
483  if (!EnableVPlanPredication && Br && Br->isConditional() &&
484  !TheLoop->isLoopInvariant(Br->getCondition()) &&
485  !LI->isLoopHeader(Br->getSuccessor(0)) &&
486  !LI->isLoopHeader(Br->getSuccessor(1))) {
487  reportVectorizationFailure("Unsupported conditional branch",
488  "loop control flow is not understood by vectorizer",
489  "CFGNotUnderstood");
490  if (DoExtraAnalysis)
491  Result = false;
492  else
493  return false;
494  }
495  }
496 
497  // Check whether inner loops are uniform. At this point, we only support
498  // simple outer loops scenarios with uniform nested loops.
499  if (!isUniformLoopNest(TheLoop /*loop nest*/,
500  TheLoop /*context outer loop*/)) {
501  reportVectorizationFailure("Outer loop contains divergent loops",
502  "loop control flow is not understood by vectorizer",
503  "CFGNotUnderstood");
504  if (DoExtraAnalysis)
505  Result = false;
506  else
507  return false;
508  }
509 
510  // Check whether we are able to set up outer loop induction.
511  if (!setupOuterLoopInductions()) {
512  reportVectorizationFailure("Unsupported outer loop Phi(s)",
513  "Unsupported outer loop Phi(s)",
514  "UnsupportedPhi");
515  if (DoExtraAnalysis)
516  Result = false;
517  else
518  return false;
519  }
520 
521  return Result;
522 }
523 
524 void LoopVectorizationLegality::addInductionPhi(
525  PHINode *Phi, const InductionDescriptor &ID,
526  SmallPtrSetImpl<Value *> &AllowedExit) {
527  Inductions[Phi] = ID;
528 
529  // In case this induction also comes with casts that we know we can ignore
530  // in the vectorized loop body, record them here. All casts could be recorded
531  // here for ignoring, but suffices to record only the first (as it is the
532  // only one that may bw used outside the cast sequence).
533  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
534  if (!Casts.empty())
535  InductionCastsToIgnore.insert(*Casts.begin());
536 
537  Type *PhiTy = Phi->getType();
538  const DataLayout &DL = Phi->getModule()->getDataLayout();
539 
540  // Get the widest type.
541  if (!PhiTy->isFloatingPointTy()) {
542  if (!WidestIndTy)
543  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
544  else
545  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
546  }
547 
548  // Int inductions are special because we only allow one IV.
551  isa<Constant>(ID.getStartValue()) &&
552  cast<Constant>(ID.getStartValue())->isNullValue()) {
553 
554  // Use the phi node with the widest type as induction. Use the last
555  // one if there are multiple (no good reason for doing this other
556  // than it is expedient). We've checked that it begins at zero and
557  // steps by one, so this is a canonical induction variable.
558  if (!PrimaryInduction || PhiTy == WidestIndTy)
559  PrimaryInduction = Phi;
560  }
561 
562  // Both the PHI node itself, and the "post-increment" value feeding
563  // back into the PHI node may have external users.
564  // We can allow those uses, except if the SCEVs we have for them rely
565  // on predicates that only hold within the loop, since allowing the exit
566  // currently means re-using this SCEV outside the loop (see PR33706 for more
567  // details).
568  if (PSE.getUnionPredicate().isAlwaysTrue()) {
569  AllowedExit.insert(Phi);
570  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
571  }
572 
573  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
574 }
575 
576 bool LoopVectorizationLegality::setupOuterLoopInductions() {
577  BasicBlock *Header = TheLoop->getHeader();
578 
579  // Returns true if a given Phi is a supported induction.
580  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
582  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
584  addInductionPhi(&Phi, ID, AllowedExit);
585  return true;
586  } else {
587  // Bail out for any Phi in the outer loop header that is not a supported
588  // induction.
589  LLVM_DEBUG(
590  dbgs()
591  << "LV: Found unsupported PHI for outer loop vectorization.\n");
592  return false;
593  }
594  };
595 
596  if (llvm::all_of(Header->phis(), isSupportedPhi))
597  return true;
598  else
599  return false;
600 }
601 
602 bool LoopVectorizationLegality::canVectorizeInstrs() {
603  BasicBlock *Header = TheLoop->getHeader();
604 
605  // Look for the attribute signaling the absence of NaNs.
606  Function &F = *Header->getParent();
607  HasFunNoNaNAttr =
608  F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
609 
610  // For each block in the loop.
611  for (BasicBlock *BB : TheLoop->blocks()) {
612  // Scan the instructions in the block and look for hazards.
613  for (Instruction &I : *BB) {
614  if (auto *Phi = dyn_cast<PHINode>(&I)) {
615  Type *PhiTy = Phi->getType();
616  // Check that this PHI type is allowed.
617  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
618  !PhiTy->isPointerTy()) {
619  reportVectorizationFailure("Found a non-int non-pointer PHI",
620  "loop control flow is not understood by vectorizer",
621  "CFGNotUnderstood");
622  return false;
623  }
624 
625  // If this PHINode is not in the header block, then we know that we
626  // can convert it to select during if-conversion. No need to check if
627  // the PHIs in this block are induction or reduction variables.
628  if (BB != Header) {
629  // Non-header phi nodes that have outside uses can be vectorized. Add
630  // them to the list of allowed exits.
631  // Unsafe cyclic dependencies with header phis are identified during
632  // legalization for reduction, induction and first order
633  // recurrences.
634  continue;
635  }
636 
637  // We only allow if-converted PHIs with exactly two incoming values.
638  if (Phi->getNumIncomingValues() != 2) {
639  reportVectorizationFailure("Found an invalid PHI",
640  "loop control flow is not understood by vectorizer",
641  "CFGNotUnderstood", Phi);
642  return false;
643  }
644 
645  RecurrenceDescriptor RedDes;
646  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
647  DT)) {
648  if (RedDes.hasUnsafeAlgebra())
649  Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
650  AllowedExit.insert(RedDes.getLoopExitInstr());
651  Reductions[Phi] = RedDes;
652  continue;
653  }
654 
655  // TODO: Instead of recording the AllowedExit, it would be good to record the
656  // complementary set: NotAllowedExit. These include (but may not be
657  // limited to):
658  // 1. Reduction phis as they represent the one-before-last value, which
659  // is not available when vectorized
660  // 2. Induction phis and increment when SCEV predicates cannot be used
661  // outside the loop - see addInductionPhi
662  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
663  // outside the loop - see call to hasOutsideLoopUser in the non-phi
664  // handling below
665  // 4. FirstOrderRecurrence phis that can possibly be handled by
666  // extraction.
667  // By recording these, we can then reason about ways to vectorize each
668  // of these NotAllowedExit.
670  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
671  addInductionPhi(Phi, ID, AllowedExit);
672  if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
673  Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
674  continue;
675  }
676 
678  SinkAfter, DT)) {
679  FirstOrderRecurrences.insert(Phi);
680  continue;
681  }
682 
683  // As a last resort, coerce the PHI to a AddRec expression
684  // and re-try classifying it a an induction PHI.
685  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
686  addInductionPhi(Phi, ID, AllowedExit);
687  continue;
688  }
689 
690  reportVectorizationFailure("Found an unidentified PHI",
691  "value that could not be identified as "
692  "reduction is used outside the loop",
693  "NonReductionValueUsedOutsideLoop", Phi);
694  return false;
695  } // end of PHI handling
696 
697  // We handle calls that:
698  // * Are debug info intrinsics.
699  // * Have a mapping to an IR intrinsic.
700  // * Have a vector version available.
701  auto *CI = dyn_cast<CallInst>(&I);
702  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
703  !isa<DbgInfoIntrinsic>(CI) &&
704  !(CI->getCalledFunction() && TLI &&
705  TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
706  // If the call is a recognized math libary call, it is likely that
707  // we can vectorize it given loosened floating-point constraints.
708  LibFunc Func;
709  bool IsMathLibCall =
710  TLI && CI->getCalledFunction() &&
711  CI->getType()->isFloatingPointTy() &&
712  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
713  TLI->hasOptimizedCodeGen(Func);
714 
715  if (IsMathLibCall) {
716  // TODO: Ideally, we should not use clang-specific language here,
717  // but it's hard to provide meaningful yet generic advice.
718  // Also, should this be guarded by allowExtraAnalysis() and/or be part
719  // of the returned info from isFunctionVectorizable()?
720  reportVectorizationFailure("Found a non-intrinsic callsite",
721  "library call cannot be vectorized. "
722  "Try compiling with -fno-math-errno, -ffast-math, "
723  "or similar flags",
724  "CantVectorizeLibcall", CI);
725  } else {
726  reportVectorizationFailure("Found a non-intrinsic callsite",
727  "call instruction cannot be vectorized",
728  "CantVectorizeLibcall", CI);
729  }
730  return false;
731  }
732 
733  // Some intrinsics have scalar arguments and should be same in order for
734  // them to be vectorized (i.e. loop invariant).
735  if (CI) {
736  auto *SE = PSE.getSE();
737  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
738  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
739  if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
740  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
741  reportVectorizationFailure("Found unvectorizable intrinsic",
742  "intrinsic instruction cannot be vectorized",
743  "CantVectorizeIntrinsic", CI);
744  return false;
745  }
746  }
747  }
748 
749  // Check that the instruction return type is vectorizable.
750  // Also, we can't vectorize extractelement instructions.
752  !I.getType()->isVoidTy()) ||
753  isa<ExtractElementInst>(I)) {
754  reportVectorizationFailure("Found unvectorizable type",
755  "instruction return type cannot be vectorized",
756  "CantVectorizeInstructionReturnType", &I);
757  return false;
758  }
759 
760  // Check that the stored type is vectorizable.
761  if (auto *ST = dyn_cast<StoreInst>(&I)) {
762  Type *T = ST->getValueOperand()->getType();
764  reportVectorizationFailure("Store instruction cannot be vectorized",
765  "store instruction cannot be vectorized",
766  "CantVectorizeStore", ST);
767  return false;
768  }
769 
770  // FP instructions can allow unsafe algebra, thus vectorizable by
771  // non-IEEE-754 compliant SIMD units.
772  // This applies to floating-point math operations and calls, not memory
773  // operations, shuffles, or casts, as they don't change precision or
774  // semantics.
775  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
776  !I.isFast()) {
777  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
778  Hints->setPotentiallyUnsafe();
779  }
780 
781  // Reduction instructions are allowed to have exit users.
782  // All other instructions must not have external users.
783  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
784  // We can safely vectorize loops where instructions within the loop are
785  // used outside the loop only if the SCEV predicates within the loop is
786  // same as outside the loop. Allowing the exit means reusing the SCEV
787  // outside the loop.
788  if (PSE.getUnionPredicate().isAlwaysTrue()) {
789  AllowedExit.insert(&I);
790  continue;
791  }
792  reportVectorizationFailure("Value cannot be used outside the loop",
793  "value cannot be used outside the loop",
794  "ValueUsedOutsideLoop", &I);
795  return false;
796  }
797  } // next instr.
798  }
799 
800  if (!PrimaryInduction) {
801  if (Inductions.empty()) {
802  reportVectorizationFailure("Did not find one integer induction var",
803  "loop induction variable could not be identified",
804  "NoInductionVariable");
805  return false;
806  } else if (!WidestIndTy) {
807  reportVectorizationFailure("Did not find one integer induction var",
808  "integer loop induction variable could not be identified",
809  "NoIntegerInductionVariable");
810  return false;
811  } else {
812  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
813  }
814  }
815 
816  // Now we know the widest induction type, check if our found induction
817  // is the same size. If it's not, unset it here and InnerLoopVectorizer
818  // will create another.
819  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
820  PrimaryInduction = nullptr;
821 
822  return true;
823 }
824 
825 bool LoopVectorizationLegality::canVectorizeMemory() {
826  LAI = &(*GetLAA)(*TheLoop);
827  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
828  if (LAR) {
829  ORE->emit([&]() {
830  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
831  "loop not vectorized: ", *LAR);
832  });
833  }
834  if (!LAI->canVectorizeMemory())
835  return false;
836 
837  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
838  reportVectorizationFailure("Stores to a uniform address",
839  "write to a loop invariant address could not be vectorized",
840  "CantVectorizeStoreToLoopInvariantAddress");
841  return false;
842  }
843  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
844  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
845 
846  return true;
847 }
848 
850  Value *In0 = const_cast<Value *>(V);
851  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
852  if (!PN)
853  return false;
854 
855  return Inductions.count(PN);
856 }
857 
859  auto *Inst = dyn_cast<Instruction>(V);
860  return (Inst && InductionCastsToIgnore.count(Inst));
861 }
862 
864  return isInductionPhi(V) || isCastedInductionVariable(V);
865 }
866 
868  return FirstOrderRecurrences.count(Phi);
869 }
870 
872  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
873 }
874 
875 bool LoopVectorizationLegality::blockCanBePredicated(
876  BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
877  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
878 
879  for (Instruction &I : *BB) {
880  // Check that we don't have a constant expression that can trap as operand.
881  for (Value *Operand : I.operands()) {
882  if (auto *C = dyn_cast<Constant>(Operand))
883  if (C->canTrap())
884  return false;
885  }
886  // We might be able to hoist the load.
887  if (I.mayReadFromMemory()) {
888  auto *LI = dyn_cast<LoadInst>(&I);
889  if (!LI)
890  return false;
891  if (!SafePtrs.count(LI->getPointerOperand())) {
892  // !llvm.mem.parallel_loop_access implies if-conversion safety.
893  // Otherwise, record that the load needs (real or emulated) masking
894  // and let the cost model decide.
895  if (!IsAnnotatedParallel)
896  MaskedOp.insert(LI);
897  continue;
898  }
899  }
900 
901  if (I.mayWriteToMemory()) {
902  auto *SI = dyn_cast<StoreInst>(&I);
903  if (!SI)
904  return false;
905  // Predicated store requires some form of masking:
906  // 1) masked store HW instruction,
907  // 2) emulation via load-blend-store (only if safe and legal to do so,
908  // be aware on the race conditions), or
909  // 3) element-by-element predicate check and scalar store.
910  MaskedOp.insert(SI);
911  continue;
912  }
913  if (I.mayThrow())
914  return false;
915  }
916 
917  return true;
918 }
919 
920 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
921  if (!EnableIfConversion) {
922  reportVectorizationFailure("If-conversion is disabled",
923  "if-conversion is disabled",
924  "IfConversionDisabled");
925  return false;
926  }
927 
928  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
929 
930  // A list of pointers that we can safely read and write to.
931  SmallPtrSet<Value *, 8> SafePointes;
932 
933  // Collect safe addresses.
934  for (BasicBlock *BB : TheLoop->blocks()) {
935  if (blockNeedsPredication(BB))
936  continue;
937 
938  for (Instruction &I : *BB)
939  if (auto *Ptr = getLoadStorePointerOperand(&I))
940  SafePointes.insert(Ptr);
941  }
942 
943  // Collect the blocks that need predication.
944  BasicBlock *Header = TheLoop->getHeader();
945  for (BasicBlock *BB : TheLoop->blocks()) {
946  // We don't support switch statements inside loops.
947  if (!isa<BranchInst>(BB->getTerminator())) {
948  reportVectorizationFailure("Loop contains a switch statement",
949  "loop contains a switch statement",
950  "LoopContainsSwitch", BB->getTerminator());
951  return false;
952  }
953 
954  // We must be able to predicate all blocks that need to be predicated.
955  if (blockNeedsPredication(BB)) {
956  if (!blockCanBePredicated(BB, SafePointes)) {
957  reportVectorizationFailure(
958  "Control flow cannot be substituted for a select",
959  "control flow cannot be substituted for a select",
960  "NoCFGForSelect", BB->getTerminator());
961  return false;
962  }
963  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
964  reportVectorizationFailure(
965  "Control flow cannot be substituted for a select",
966  "control flow cannot be substituted for a select",
967  "NoCFGForSelect", BB->getTerminator());
968  return false;
969  }
970  }
971 
972  // We can if-convert this loop.
973  return true;
974 }
975 
976 // Helper function to canVectorizeLoopNestCFG.
977 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
978  bool UseVPlanNativePath) {
979  assert((UseVPlanNativePath || Lp->empty()) &&
980  "VPlan-native path is not enabled.");
981 
982  // TODO: ORE should be improved to show more accurate information when an
983  // outer loop can't be vectorized because a nested loop is not understood or
984  // legal. Something like: "outer_loop_location: loop not vectorized:
985  // (inner_loop_location) loop control flow is not understood by vectorizer".
986 
987  // Store the result and return it at the end instead of exiting early, in case
988  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
989  bool Result = true;
990  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
991 
992  // We must have a loop in canonical form. Loops with indirectbr in them cannot
993  // be canonicalized.
994  if (!Lp->getLoopPreheader()) {
995  reportVectorizationFailure("Loop doesn't have a legal pre-header",
996  "loop control flow is not understood by vectorizer",
997  "CFGNotUnderstood");
998  if (DoExtraAnalysis)
999  Result = false;
1000  else
1001  return false;
1002  }
1003 
1004  // We must have a single backedge.
1005  if (Lp->getNumBackEdges() != 1) {
1006  reportVectorizationFailure("The loop must have a single backedge",
1007  "loop control flow is not understood by vectorizer",
1008  "CFGNotUnderstood");
1009  if (DoExtraAnalysis)
1010  Result = false;
1011  else
1012  return false;
1013  }
1014 
1015  // We must have a single exiting block.
1016  if (!Lp->getExitingBlock()) {
1017  reportVectorizationFailure("The loop must have an exiting block",
1018  "loop control flow is not understood by vectorizer",
1019  "CFGNotUnderstood");
1020  if (DoExtraAnalysis)
1021  Result = false;
1022  else
1023  return false;
1024  }
1025 
1026  // We only handle bottom-tested loops, i.e. loop in which the condition is
1027  // checked at the end of each iteration. With that we can assume that all
1028  // instructions in the loop are executed the same number of times.
1029  if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1030  reportVectorizationFailure("The exiting block is not the loop latch",
1031  "loop control flow is not understood by vectorizer",
1032  "CFGNotUnderstood");
1033  if (DoExtraAnalysis)
1034  Result = false;
1035  else
1036  return false;
1037  }
1038 
1039  return Result;
1040 }
1041 
1042 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1043  Loop *Lp, bool UseVPlanNativePath) {
1044  // Store the result and return it at the end instead of exiting early, in case
1045  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1046  bool Result = true;
1047  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1048  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1049  if (DoExtraAnalysis)
1050  Result = false;
1051  else
1052  return false;
1053  }
1054 
1055  // Recursively check whether the loop control flow of nested loops is
1056  // understood.
1057  for (Loop *SubLp : *Lp)
1058  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1059  if (DoExtraAnalysis)
1060  Result = false;
1061  else
1062  return false;
1063  }
1064 
1065  return Result;
1066 }
1067 
1068 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1069  // Store the result and return it at the end instead of exiting early, in case
1070  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1071  bool Result = true;
1072 
1073  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1074  // Check whether the loop-related control flow in the loop nest is expected by
1075  // vectorizer.
1076  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1077  if (DoExtraAnalysis)
1078  Result = false;
1079  else
1080  return false;
1081  }
1082 
1083  // We need to have a loop header.
1084  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1085  << '\n');
1086 
1087  // Specific checks for outer loops. We skip the remaining legal checks at this
1088  // point because they don't support outer loops.
1089  if (!TheLoop->empty()) {
1090  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1091 
1092  if (!canVectorizeOuterLoop()) {
1093  reportVectorizationFailure("Unsupported outer loop",
1094  "unsupported outer loop",
1095  "UnsupportedOuterLoop");
1096  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1097  // outer loops.
1098  return false;
1099  }
1100 
1101  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1102  return Result;
1103  }
1104 
1105  assert(TheLoop->empty() && "Inner loop expected.");
1106  // Check if we can if-convert non-single-bb loops.
1107  unsigned NumBlocks = TheLoop->getNumBlocks();
1108  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1109  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1110  if (DoExtraAnalysis)
1111  Result = false;
1112  else
1113  return false;
1114  }
1115 
1116  // Check if we can vectorize the instructions and CFG in this loop.
1117  if (!canVectorizeInstrs()) {
1118  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1119  if (DoExtraAnalysis)
1120  Result = false;
1121  else
1122  return false;
1123  }
1124 
1125  // Go over each instruction and look at memory deps.
1126  if (!canVectorizeMemory()) {
1127  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1128  if (DoExtraAnalysis)
1129  Result = false;
1130  else
1131  return false;
1132  }
1133 
1134  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1135  << (LAI->getRuntimePointerChecking()->Need
1136  ? " (with a runtime bound check)"
1137  : "")
1138  << "!\n");
1139 
1140  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1141  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1142  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1143 
1144  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1145  reportVectorizationFailure("Too many SCEV checks needed",
1146  "Too many SCEV assumptions need to be made and checked at runtime",
1147  "TooManySCEVRunTimeChecks");
1148  if (DoExtraAnalysis)
1149  Result = false;
1150  else
1151  return false;
1152  }
1153 
1154  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1155  // we can vectorize, and at this point we don't have any other mem analysis
1156  // which may limit our maximum vectorization factor, so just return true with
1157  // no restrictions.
1158  return Result;
1159 }
1160 
1162 
1163  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1164 
1165  if (!PrimaryInduction) {
1166  reportVectorizationFailure(
1167  "No primary induction, cannot fold tail by masking",
1168  "Missing a primary induction variable in the loop, which is "
1169  "needed in order to fold tail by masking as required.",
1170  "NoPrimaryInduction");
1171  return false;
1172  }
1173 
1174  // TODO: handle reductions when tail is folded by masking.
1175  if (!Reductions.empty()) {
1176  reportVectorizationFailure(
1177  "Loop has reductions, cannot fold tail by masking",
1178  "Cannot fold tail by masking in the presence of reductions.",
1179  "ReductionFoldingTailByMasking");
1180  return false;
1181  }
1182 
1183  // TODO: handle outside users when tail is folded by masking.
1184  for (auto *AE : AllowedExit) {
1185  // Check that all users of allowed exit values are inside the loop.
1186  for (User *U : AE->users()) {
1187  Instruction *UI = cast<Instruction>(U);
1188  if (TheLoop->contains(UI))
1189  continue;
1190  reportVectorizationFailure(
1191  "Cannot fold tail by masking, loop has an outside user for",
1192  "Cannot fold tail by masking in the presence of live outs.",
1193  "LiveOutFoldingTailByMasking", UI);
1194  return false;
1195  }
1196  }
1197 
1198  // The list of pointers that we can safely read and write to remains empty.
1199  SmallPtrSet<Value *, 8> SafePointers;
1200 
1201  // Check and mark all blocks for predication, including those that ordinarily
1202  // do not need predication such as the header block.
1203  for (BasicBlock *BB : TheLoop->blocks()) {
1204  if (!blockCanBePredicated(BB, SafePointers)) {
1205  reportVectorizationFailure(
1206  "Cannot fold tail by masking as required",
1207  "control flow cannot be substituted for a select",
1208  "NoCFGForSelect", BB->getTerminator());
1209  return false;
1210  }
1211  }
1212 
1213  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1214  return true;
1215 }
1216 
1217 } // namespace llvm
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
uint64_t CallInst * C
#define LV_NAME
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
LLVMContext & Context
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Instruction * getUnsafeAlgebraInst()
Returns first unsafe algebra instruction in the PHI node&#39;s use-chain.
bool isCastedInductionVariable(const Value *V)
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction *> *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:453
ConstantInt * getConstIntStepValue() const
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode *> AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:968
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class...
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
InductionKind getKind() const
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
An instruction for reading from memory.
Definition: Instructions.h:167
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
cl::opt< bool > EnableVPlanPredication
Value * getStartValue() const
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
#define DEBUG_TYPE
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
bool hasUnsafeAlgebra()
Returns true if the recurrence has unsafe algebra which requires a relaxed floating-point model...
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:229
This file defines the LoopVectorizationLegality class.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
static const unsigned MaxVectorWidth
Maximum SIMD width.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
Diagnostic information for optimization analysis remarks.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:578
BlockT * getHeader() const
Definition: LoopInfo.h:102
int isConsecutivePtr(Value *Ptr)
Check if this pointer is consecutive when vectorizing.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool isInductionVariable(const Value *V)
Returns True if V can be considered as an induction variable in this loop.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:471
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:620
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< unsigned > PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
bool blockNeedsPredication(BasicBlock *BB)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Diagnostic information for optimization analysis remarks related to pointer aliasing.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:409
StringRef getString() const
Definition: Metadata.cpp:463
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Integer induction variable. Step = C.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value *> &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
Instruction * getUnsafeAlgebraInst()
Returns induction operator that does not have "fast-math" property and requires FP unsafe mode...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayThrow() const
Return true if this instruction may throw an exception.
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
Create an analysis remark that explains why vectorization failed.
bool isFast() const
Determine whether all fast-math-flags are set.
bool isBinaryOp() const
Definition: Instruction.h:130
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints)
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:569
size_t size() const
Definition: SmallVector.h:52
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, DenseMap< Instruction *, Instruction *> &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:62
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:112
Diagnostic information for optimization analysis remarks related to floating-point non-commutativity...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1173
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
A struct for saving information about induction variables.
bool canFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:147
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
DenseMap< const Value *, Value * > ValueToValueMap
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool isFirstOrderRecurrence(const PHINode *Phi)
Returns True if Phi is a first-order recurrence in this loop.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:90
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:146
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
bool isInductionPhi(const Value *V)
Returns True if V is a Phi node of an induction variable in this loop.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
bool hasUnsafeAlgebra()
Returns true if the induction type is FP and the binary operator does not have the "fast-math" proper...
const SmallVectorImpl< Instruction * > & getCastInsts() const
Returns a reference to the type cast instructions in the induction update chain, that are redundant w...
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:447
static void debugVectorizationFailure(const StringRef DebugMsg, Instruction *I)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
static const size_t npos
Definition: StringRef.h:50
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Instruction * getLoopExitInstr()
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:223
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:324
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
bool empty() const
Definition: LoopInfo.h:148
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
A single uniqued string.
Definition: Metadata.h:603
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
Root of the metadata hierarchy.
Definition: Metadata.h:57
The optimization diagnostic interface.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
void emitRemarkWithHints() const
Dumps all the hint information.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
const BasicBlock * getParent() const
Definition: Instruction.h:66