LLVM  3.7.0
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop unroller. It works best when loops have
11 // been canonicalized by the -indvars pass, allowing it to determine the trip
12 // counts of loops easily.
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Scalar.h"
16 #include "llvm/ADT/SetVector.h"
20 #include "llvm/Analysis/LoopPass.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/InstVisitor.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Metadata.h"
31 #include "llvm/Support/Debug.h"
34 #include <climits>
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "loop-unroll"
39 
40 static cl::opt<unsigned>
41  UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
42  cl::desc("The baseline cost threshold for loop unrolling"));
43 
45  "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,
46  cl::desc("The percentage of estimated dynamic cost which must be saved by "
47  "unrolling to allow unrolling up to the max threshold."));
48 
50  "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,
51  cl::desc("This is the amount discounted from the total unroll cost when "
52  "the unrolled form has a high dynamic cost savings (triggered by "
53  "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
54 
56  "unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,
57  cl::desc("Don't allow loop unrolling to simulate more than this number of"
58  "iterations when checking full unroll profitability"));
59 
60 static cl::opt<unsigned>
61 UnrollCount("unroll-count", cl::init(0), cl::Hidden,
62  cl::desc("Use this unroll count for all loops including those with "
63  "unroll_count pragma values, for testing purposes"));
64 
65 static cl::opt<bool>
66 UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
67  cl::desc("Allows loops to be partially unrolled until "
68  "-unroll-threshold loop size is reached."));
69 
70 static cl::opt<bool>
71 UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
72  cl::desc("Unroll loops with run-time trip counts"));
73 
74 static cl::opt<unsigned>
75 PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
76  cl::desc("Unrolled size limit for loops with an unroll(full) or "
77  "unroll_count pragma."));
78 
79 namespace {
80  class LoopUnroll : public LoopPass {
81  public:
82  static char ID; // Pass ID, replacement for typeid
83  LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
84  CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
85  CurrentPercentDynamicCostSavedThreshold =
87  CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
88  CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
89  CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
90  CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
91 
92  UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
93  UserPercentDynamicCostSavedThreshold =
94  (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);
95  UserDynamicCostSavingsDiscount =
96  (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);
97  UserAllowPartial = (P != -1) ||
98  (UnrollAllowPartial.getNumOccurrences() > 0);
99  UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
100  UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
101 
103  }
104 
105  /// A magic value for use with the Threshold parameter to indicate
106  /// that the loop unroll should be performed regardless of how much
107  /// code expansion would result.
108  static const unsigned NoThreshold = UINT_MAX;
109 
110  // Threshold to use when optsize is specified (and there is no
111  // explicit -unroll-threshold).
112  static const unsigned OptSizeUnrollThreshold = 50;
113 
114  // Default unroll count for loops with run-time trip count if
115  // -unroll-count is not set
116  static const unsigned UnrollRuntimeCount = 8;
117 
118  unsigned CurrentCount;
119  unsigned CurrentThreshold;
120  unsigned CurrentPercentDynamicCostSavedThreshold;
121  unsigned CurrentDynamicCostSavingsDiscount;
122  bool CurrentAllowPartial;
123  bool CurrentRuntime;
124 
125  // Flags for whether the 'current' settings are user-specified.
126  bool UserCount;
127  bool UserThreshold;
128  bool UserPercentDynamicCostSavedThreshold;
129  bool UserDynamicCostSavingsDiscount;
130  bool UserAllowPartial;
131  bool UserRuntime;
132 
133  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
134 
135  /// This transformation requires natural loop information & requires that
136  /// loop preheaders be inserted into the CFG...
137  ///
138  void getAnalysisUsage(AnalysisUsage &AU) const override {
149  // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
150  // If loop unroll does not preserve dom info then LCSSA pass on next
151  // loop will receive invalid dom info.
152  // For now, recreate dom info, if loop is unrolled.
154  }
155 
156  // Fill in the UnrollingPreferences parameter with values from the
157  // TargetTransformationInfo.
158  void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
160  UP.Threshold = CurrentThreshold;
162  CurrentPercentDynamicCostSavedThreshold;
163  UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;
164  UP.OptSizeThreshold = OptSizeUnrollThreshold;
165  UP.PartialThreshold = CurrentThreshold;
166  UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
167  UP.Count = CurrentCount;
168  UP.MaxCount = UINT_MAX;
169  UP.Partial = CurrentAllowPartial;
170  UP.Runtime = CurrentRuntime;
171  UP.AllowExpensiveTripCount = false;
172  TTI.getUnrollingPreferences(L, UP);
173  }
174 
175  // Select and return an unroll count based on parameters from
176  // user, unroll preferences, unroll pragmas, or a heuristic.
177  // SetExplicitly is set to true if the unroll count is is set by
178  // the user or a pragma rather than selected heuristically.
179  unsigned
180  selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
181  unsigned PragmaCount,
183  bool &SetExplicitly);
184 
185  // Select threshold values used to limit unrolling based on a
186  // total unrolled size. Parameters Threshold and PartialThreshold
187  // are set to the maximum unrolled size for fully and partially
188  // unrolled loops respectively.
189  void selectThresholds(const Loop *L, bool HasPragma,
191  unsigned &Threshold, unsigned &PartialThreshold,
192  unsigned &PercentDynamicCostSavedThreshold,
193  unsigned &DynamicCostSavingsDiscount) {
194  // Determine the current unrolling threshold. While this is
195  // normally set from UnrollThreshold, it is overridden to a
196  // smaller value if the current function is marked as
197  // optimize-for-size, and the unroll threshold was not user
198  // specified.
199  Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
200  PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
201  PercentDynamicCostSavedThreshold =
202  UserPercentDynamicCostSavedThreshold
203  ? CurrentPercentDynamicCostSavedThreshold
205  DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount
206  ? CurrentDynamicCostSavingsDiscount
208 
209  if (!UserThreshold &&
210  L->getHeader()->getParent()->hasFnAttribute(
212  Threshold = UP.OptSizeThreshold;
213  PartialThreshold = UP.PartialOptSizeThreshold;
214  }
215  if (HasPragma) {
216  // If the loop has an unrolling pragma, we want to be more
217  // aggressive with unrolling limits. Set thresholds to at
218  // least the PragmaTheshold value which is larger than the
219  // default limits.
220  if (Threshold != NoThreshold)
221  Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
222  if (PartialThreshold != NoThreshold)
223  PartialThreshold =
224  std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
225  }
226  }
227  bool canUnrollCompletely(Loop *L, unsigned Threshold,
228  unsigned PercentDynamicCostSavedThreshold,
229  unsigned DynamicCostSavingsDiscount,
230  uint64_t UnrolledCost, uint64_t RolledDynamicCost);
231  };
232 }
233 
234 char LoopUnroll::ID = 0;
235 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
239 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
242 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
243 
244 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
245  int Runtime) {
246  return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
247 }
248 
250  return llvm::createLoopUnrollPass(-1, -1, 0, 0);
251 }
252 
253 namespace {
254 // This class is used to get an estimate of the optimization effects that we
255 // could get from complete loop unrolling. It comes from the fact that some
256 // loads might be replaced with concrete constant values and that could trigger
257 // a chain of instruction simplifications.
258 //
259 // E.g. we might have:
260 // int a[] = {0, 1, 0};
261 // v = 0;
262 // for (i = 0; i < 3; i ++)
263 // v += b[i]*a[i];
264 // If we completely unroll the loop, we would get:
265 // v = b[0]*a[0] + b[1]*a[1] + b[2]*a[2]
266 // Which then will be simplified to:
267 // v = b[0]* 0 + b[1]* 1 + b[2]* 0
268 // And finally:
269 // v = b[1]
270 class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
272  friend class InstVisitor<UnrolledInstAnalyzer, bool>;
273  struct SimplifiedAddress {
274  Value *Base = nullptr;
275  ConstantInt *Offset = nullptr;
276  };
277 
278 public:
279  UnrolledInstAnalyzer(unsigned Iteration,
280  DenseMap<Value *, Constant *> &SimplifiedValues,
281  const Loop *L, ScalarEvolution &SE)
282  : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) {
283  IterationNumber = SE.getConstant(APInt(64, Iteration));
284  }
285 
286  // Allow access to the initial visit method.
287  using Base::visit;
288 
289 private:
290  /// \brief A cache of pointer bases and constant-folded offsets corresponding
291  /// to GEP (or derived from GEP) instructions.
292  ///
293  /// In order to find the base pointer one needs to perform non-trivial
294  /// traversal of the corresponding SCEV expression, so it's good to have the
295  /// results saved.
296  DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;
297 
298  /// \brief Number of currently simulated iteration.
299  ///
300  /// If an expression is ConstAddress+Constant, then the Constant is
301  /// Start + Iteration*Step, where Start and Step could be obtained from
302  /// SCEVGEPCache.
303  unsigned Iteration;
304 
305  /// \brief SCEV expression corresponding to number of currently simulated
306  /// iteration.
307  const SCEV *IterationNumber;
308 
309  /// \brief A Value->Constant map for keeping values that we managed to
310  /// constant-fold on the given iteration.
311  ///
312  /// While we walk the loop instructions, we build up and maintain a mapping
313  /// of simplified values specific to this iteration. The idea is to propagate
314  /// any special information we have about loads that can be replaced with
315  /// constants after complete unrolling, and account for likely simplifications
316  /// post-unrolling.
317  DenseMap<Value *, Constant *> &SimplifiedValues;
318 
319  const Loop *L;
320  ScalarEvolution &SE;
321 
322  /// \brief Try to simplify instruction \param I using its SCEV expression.
323  ///
324  /// The idea is that some AddRec expressions become constants, which then
325  /// could trigger folding of other instructions. However, that only happens
326  /// for expressions whose start value is also constant, which isn't always the
327  /// case. In another common and important case the start value is just some
328  /// address (i.e. SCEVUnknown) - in this case we compute the offset and save
329  /// it along with the base address instead.
330  bool simplifyInstWithSCEV(Instruction *I) {
331  if (!SE.isSCEVable(I->getType()))
332  return false;
333 
334  const SCEV *S = SE.getSCEV(I);
335  if (auto *SC = dyn_cast<SCEVConstant>(S)) {
336  SimplifiedValues[I] = SC->getValue();
337  return true;
338  }
339 
340  auto *AR = dyn_cast<SCEVAddRecExpr>(S);
341  if (!AR)
342  return false;
343 
344  const SCEV *ValueAtIteration = AR->evaluateAtIteration(IterationNumber, SE);
345  // Check if the AddRec expression becomes a constant.
346  if (auto *SC = dyn_cast<SCEVConstant>(ValueAtIteration)) {
347  SimplifiedValues[I] = SC->getValue();
348  return true;
349  }
350 
351  // Check if the offset from the base address becomes a constant.
352  auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(S));
353  if (!Base)
354  return false;
355  auto *Offset =
356  dyn_cast<SCEVConstant>(SE.getMinusSCEV(ValueAtIteration, Base));
357  if (!Offset)
358  return false;
359  SimplifiedAddress Address;
360  Address.Base = Base->getValue();
361  Address.Offset = Offset->getValue();
362  SimplifiedAddresses[I] = Address;
363  return true;
364  }
365 
366  /// Base case for the instruction visitor.
367  bool visitInstruction(Instruction &I) {
368  return simplifyInstWithSCEV(&I);
369  }
370 
371  /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
372 
373  /// Try to simplify binary operator I.
374  ///
375  /// TODO: Probaly it's worth to hoist the code for estimating the
376  /// simplifications effects to a separate class, since we have a very similar
377  /// code in InlineCost already.
378  bool visitBinaryOperator(BinaryOperator &I) {
379  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
380  if (!isa<Constant>(LHS))
381  if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
382  LHS = SimpleLHS;
383  if (!isa<Constant>(RHS))
384  if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
385  RHS = SimpleRHS;
386 
387  Value *SimpleV = nullptr;
388  const DataLayout &DL = I.getModule()->getDataLayout();
389  if (auto FI = dyn_cast<FPMathOperator>(&I))
390  SimpleV =
391  SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
392  else
393  SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
394 
395  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
396  SimplifiedValues[&I] = C;
397 
398  if (SimpleV)
399  return true;
400  return Base::visitBinaryOperator(I);
401  }
402 
403  /// Try to fold load I.
404  bool visitLoad(LoadInst &I) {
405  Value *AddrOp = I.getPointerOperand();
406 
407  auto AddressIt = SimplifiedAddresses.find(AddrOp);
408  if (AddressIt == SimplifiedAddresses.end())
409  return false;
410  ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset;
411 
412  auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
413  // We're only interested in loads that can be completely folded to a
414  // constant.
415  if (!GV || !GV->hasInitializer())
416  return false;
417 
419  dyn_cast<ConstantDataSequential>(GV->getInitializer());
420  if (!CDS)
421  return false;
422 
423  int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
424  assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
425  "Unexpectedly large index value.");
426  int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
427  if (Index >= CDS->getNumElements()) {
428  // FIXME: For now we conservatively ignore out of bound accesses, but
429  // we're allowed to perform the optimization in this case.
430  return false;
431  }
432 
433  Constant *CV = CDS->getElementAsConstant(Index);
434  assert(CV && "Constant expected.");
435  SimplifiedValues[&I] = CV;
436 
437  return true;
438  }
439 };
440 } // namespace
441 
442 
443 namespace {
444 struct EstimatedUnrollCost {
445  /// \brief The estimated cost after unrolling.
446  unsigned UnrolledCost;
447 
448  /// \brief The estimated dynamic cost of executing the instructions in the
449  /// rolled form.
450  unsigned RolledDynamicCost;
451 };
452 }
453 
454 /// \brief Figure out if the loop is worth full unrolling.
455 ///
456 /// Complete loop unrolling can make some loads constant, and we need to know
457 /// if that would expose any further optimization opportunities. This routine
458 /// estimates this optimization. It computes cost of unrolled loop
459 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
460 /// dynamic cost we mean that we won't count costs of blocks that are known not
461 /// to be executed (i.e. if we have a branch in the loop and we know that at the
462 /// given iteration its condition would be resolved to true, we won't add up the
463 /// cost of the 'false'-block).
464 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
465 /// the analysis failed (no benefits expected from the unrolling, or the loop is
466 /// too big to analyze), the returned value is None.
468 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
469  const TargetTransformInfo &TTI,
470  unsigned MaxUnrolledLoopSize) {
471  // We want to be able to scale offsets by the trip count and add more offsets
472  // to them without checking for overflows, and we already don't want to
473  // analyze *massive* trip counts, so we force the max to be reasonably small.
474  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
475  "The unroll iterations max is too large!");
476 
477  // Don't simulate loops with a big or unknown tripcount
478  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
480  return None;
481 
483  DenseMap<Value *, Constant *> SimplifiedValues;
484 
485  // The estimated cost of the unrolled form of the loop. We try to estimate
486  // this by simplifying as much as we can while computing the estimate.
487  unsigned UnrolledCost = 0;
488  // We also track the estimated dynamic (that is, actually executed) cost in
489  // the rolled form. This helps identify cases when the savings from unrolling
490  // aren't just exposing dead control flows, but actual reduced dynamic
491  // instructions due to the simplifications which we expect to occur after
492  // unrolling.
493  unsigned RolledDynamicCost = 0;
494 
495  // Simulate execution of each iteration of the loop counting instructions,
496  // which would be simplified.
497  // Since the same load will take different values on different iterations,
498  // we literally have to go through all loop's iterations.
499  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
500  SimplifiedValues.clear();
501  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);
502 
503  BBWorklist.clear();
504  BBWorklist.insert(L->getHeader());
505  // Note that we *must not* cache the size, this loop grows the worklist.
506  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
507  BasicBlock *BB = BBWorklist[Idx];
508 
509  // Visit all instructions in the given basic block and try to simplify
510  // it. We don't change the actual IR, just count optimization
511  // opportunities.
512  for (Instruction &I : *BB) {
513  unsigned InstCost = TTI.getUserCost(&I);
514 
515  // Visit the instruction to analyze its loop cost after unrolling,
516  // and if the visitor returns false, include this instruction in the
517  // unrolled cost.
518  if (!Analyzer.visit(I))
519  UnrolledCost += InstCost;
520 
521  // Also track this instructions expected cost when executing the rolled
522  // loop form.
523  RolledDynamicCost += InstCost;
524 
525  // If unrolled body turns out to be too big, bail out.
526  if (UnrolledCost > MaxUnrolledLoopSize)
527  return None;
528  }
529 
530  // Add BB's successors to the worklist.
531  for (BasicBlock *Succ : successors(BB))
532  if (L->contains(Succ))
533  BBWorklist.insert(Succ);
534  }
535 
536  // If we found no optimization opportunities on the first iteration, we
537  // won't find them on later ones too.
538  if (UnrolledCost == RolledDynamicCost)
539  return None;
540  }
541  return {{UnrolledCost, RolledDynamicCost}};
542 }
543 
544 /// ApproximateLoopSize - Approximate the size of the loop.
545 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
546  bool &NotDuplicatable,
547  const TargetTransformInfo &TTI,
548  AssumptionCache *AC) {
550  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
551 
553  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
554  I != E; ++I)
555  Metrics.analyzeBasicBlock(*I, TTI, EphValues);
556  NumCalls = Metrics.NumInlineCandidates;
557  NotDuplicatable = Metrics.notDuplicatable;
558 
559  unsigned LoopSize = Metrics.NumInsts;
560 
561  // Don't allow an estimate of size zero. This would allows unrolling of loops
562  // with huge iteration counts, which is a compile time problem even if it's
563  // not a problem for code quality. Also, the code using this size may assume
564  // that each loop has at least three instructions (likely a conditional
565  // branch, a comparison feeding that branch, and some kind of loop increment
566  // feeding that comparison instruction).
567  LoopSize = std::max(LoopSize, 3u);
568 
569  return LoopSize;
570 }
571 
572 // Returns the loop hint metadata node with the given name (for example,
573 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
574 // returned.
576  if (MDNode *LoopID = L->getLoopID())
577  return GetUnrollMetadata(LoopID, Name);
578  return nullptr;
579 }
580 
581 // Returns true if the loop has an unroll(full) pragma.
582 static bool HasUnrollFullPragma(const Loop *L) {
583  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
584 }
585 
586 // Returns true if the loop has an unroll(disable) pragma.
587 static bool HasUnrollDisablePragma(const Loop *L) {
588  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
589 }
590 
591 // Returns true if the loop has an runtime unroll(disable) pragma.
592 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
593  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
594 }
595 
596 // If loop has an unroll_count pragma return the (necessarily
597 // positive) value from the pragma. Otherwise return 0.
598 static unsigned UnrollCountPragmaValue(const Loop *L) {
599  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
600  if (MD) {
601  assert(MD->getNumOperands() == 2 &&
602  "Unroll count hint metadata should have two operands.");
603  unsigned Count =
604  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
605  assert(Count >= 1 && "Unroll count must be positive.");
606  return Count;
607  }
608  return 0;
609 }
610 
611 // Remove existing unroll metadata and add unroll disable metadata to
612 // indicate the loop has already been unrolled. This prevents a loop
613 // from being unrolled more than is directed by a pragma if the loop
614 // unrolling pass is run more than once (which it generally is).
615 static void SetLoopAlreadyUnrolled(Loop *L) {
616  MDNode *LoopID = L->getLoopID();
617  if (!LoopID) return;
618 
619  // First remove any existing loop unrolling metadata.
621  // Reserve first location for self reference to the LoopID metadata node.
622  MDs.push_back(nullptr);
623  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
624  bool IsUnrollMetadata = false;
625  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
626  if (MD) {
627  const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
628  IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
629  }
630  if (!IsUnrollMetadata)
631  MDs.push_back(LoopID->getOperand(i));
632  }
633 
634  // Add unroll(disable) metadata to disable future unrolling.
635  LLVMContext &Context = L->getHeader()->getContext();
636  SmallVector<Metadata *, 1> DisableOperands;
637  DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
638  MDNode *DisableNode = MDNode::get(Context, DisableOperands);
639  MDs.push_back(DisableNode);
640 
641  MDNode *NewLoopID = MDNode::get(Context, MDs);
642  // Set operand 0 to refer to the loop id itself.
643  NewLoopID->replaceOperandWith(0, NewLoopID);
644  L->setLoopID(NewLoopID);
645 }
646 
647 bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
648  unsigned PercentDynamicCostSavedThreshold,
649  unsigned DynamicCostSavingsDiscount,
650  uint64_t UnrolledCost,
651  uint64_t RolledDynamicCost) {
652 
653  if (Threshold == NoThreshold) {
654  DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
655  return true;
656  }
657 
658  if (UnrolledCost <= Threshold) {
659  DEBUG(dbgs() << " Can fully unroll, because unrolled cost: "
660  << UnrolledCost << "<" << Threshold << "\n");
661  return true;
662  }
663 
664  assert(UnrolledCost && "UnrolledCost can't be 0 at this point.");
665  assert(RolledDynamicCost >= UnrolledCost &&
666  "Cannot have a higher unrolled cost than a rolled cost!");
667 
668  // Compute the percentage of the dynamic cost in the rolled form that is
669  // saved when unrolled. If unrolling dramatically reduces the estimated
670  // dynamic cost of the loop, we use a higher threshold to allow more
671  // unrolling.
672  unsigned PercentDynamicCostSaved =
673  (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost;
674 
675  if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold &&
676  (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
677  (int64_t)Threshold) {
678  DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the "
679  "expected dynamic cost by " << PercentDynamicCostSaved
680  << "% (threshold: " << PercentDynamicCostSavedThreshold
681  << "%)\n"
682  << " and the unrolled cost (" << UnrolledCost
683  << ") is less than the max threshold ("
684  << DynamicCostSavingsDiscount << ").\n");
685  return true;
686  }
687 
688  DEBUG(dbgs() << " Too large to fully unroll:\n");
689  DEBUG(dbgs() << " Threshold: " << Threshold << "\n");
690  DEBUG(dbgs() << " Max threshold: " << DynamicCostSavingsDiscount << "\n");
691  DEBUG(dbgs() << " Percent cost saved threshold: "
692  << PercentDynamicCostSavedThreshold << "%\n");
693  DEBUG(dbgs() << " Unrolled cost: " << UnrolledCost << "\n");
694  DEBUG(dbgs() << " Rolled dynamic cost: " << RolledDynamicCost << "\n");
695  DEBUG(dbgs() << " Percent cost saved: " << PercentDynamicCostSaved
696  << "\n");
697  return false;
698 }
699 
700 unsigned LoopUnroll::selectUnrollCount(
701  const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
702  unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
703  bool &SetExplicitly) {
704  SetExplicitly = true;
705 
706  // User-specified count (either as a command-line option or
707  // constructor parameter) has highest precedence.
708  unsigned Count = UserCount ? CurrentCount : 0;
709 
710  // If there is no user-specified count, unroll pragmas have the next
711  // highest precendence.
712  if (Count == 0) {
713  if (PragmaCount) {
714  Count = PragmaCount;
715  } else if (PragmaFullUnroll) {
716  Count = TripCount;
717  }
718  }
719 
720  if (Count == 0)
721  Count = UP.Count;
722 
723  if (Count == 0) {
724  SetExplicitly = false;
725  if (TripCount == 0)
726  // Runtime trip count.
727  Count = UnrollRuntimeCount;
728  else
729  // Conservative heuristic: if we know the trip count, see if we can
730  // completely unroll (subject to the threshold, checked below); otherwise
731  // try to find greatest modulo of the trip count which is still under
732  // threshold value.
733  Count = TripCount;
734  }
735  if (TripCount && Count > TripCount)
736  return TripCount;
737  return Count;
738 }
739 
740 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
741  if (skipOptnoneFunction(L))
742  return false;
743 
744  Function &F = *L->getHeader()->getParent();
745 
746  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
747  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
748  const TargetTransformInfo &TTI =
749  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
750  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
751 
752  BasicBlock *Header = L->getHeader();
753  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
754  << "] Loop %" << Header->getName() << "\n");
755 
756  if (HasUnrollDisablePragma(L)) {
757  return false;
758  }
759  bool PragmaFullUnroll = HasUnrollFullPragma(L);
760  unsigned PragmaCount = UnrollCountPragmaValue(L);
761  bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
762 
764  getUnrollingPreferences(L, TTI, UP);
765 
766  // Find trip count and trip multiple if count is not available
767  unsigned TripCount = 0;
768  unsigned TripMultiple = 1;
769  // If there are multiple exiting blocks but one of them is the latch, use the
770  // latch for the trip count estimation. Otherwise insist on a single exiting
771  // block for the trip count estimation.
772  BasicBlock *ExitingBlock = L->getLoopLatch();
773  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
774  ExitingBlock = L->getExitingBlock();
775  if (ExitingBlock) {
776  TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
777  TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
778  }
779 
780  // Select an initial unroll count. This may be reduced later based
781  // on size thresholds.
782  bool CountSetExplicitly;
783  unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
784  PragmaCount, UP, CountSetExplicitly);
785 
786  unsigned NumInlineCandidates;
787  bool notDuplicatable;
788  unsigned LoopSize =
789  ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, &AC);
790  DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
791 
792  // When computing the unrolled size, note that the conditional branch on the
793  // backedge and the comparison feeding it are not replicated like the rest of
794  // the loop body (which is why 2 is subtracted).
795  uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2;
796  if (notDuplicatable) {
797  DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
798  << " instructions.\n");
799  return false;
800  }
801  if (NumInlineCandidates != 0) {
802  DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
803  return false;
804  }
805 
806  unsigned Threshold, PartialThreshold;
807  unsigned PercentDynamicCostSavedThreshold;
808  unsigned DynamicCostSavingsDiscount;
809  selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
810  PercentDynamicCostSavedThreshold,
811  DynamicCostSavingsDiscount);
812 
813  // Given Count, TripCount and thresholds determine the type of
814  // unrolling which is to be performed.
815  enum { Full = 0, Partial = 1, Runtime = 2 };
816  int Unrolling;
817  if (TripCount && Count == TripCount) {
818  Unrolling = Partial;
819  // If the loop is really small, we don't need to run an expensive analysis.
820  if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
821  UnrolledSize, UnrolledSize)) {
822  Unrolling = Full;
823  } else {
824  // The loop isn't that small, but we still can fully unroll it if that
825  // helps to remove a significant number of instructions.
826  // To check that, run additional analysis on the loop.
828  L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))
829  if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
830  DynamicCostSavingsDiscount, Cost->UnrolledCost,
831  Cost->RolledDynamicCost)) {
832  Unrolling = Full;
833  }
834  }
835  } else if (TripCount && Count < TripCount) {
836  Unrolling = Partial;
837  } else {
838  Unrolling = Runtime;
839  }
840 
841  // Reduce count based on the type of unrolling and the threshold values.
842  unsigned OriginalCount = Count;
843  bool AllowRuntime =
844  (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
845  // Don't unroll a runtime trip count loop with unroll full pragma.
846  if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
847  AllowRuntime = false;
848  }
849  if (Unrolling == Partial) {
850  bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
851  if (!AllowPartial && !CountSetExplicitly) {
852  DEBUG(dbgs() << " will not try to unroll partially because "
853  << "-unroll-allow-partial not given\n");
854  return false;
855  }
856  if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
857  // Reduce unroll count to be modulo of TripCount for partial unrolling.
858  Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
859  while (Count != 0 && TripCount % Count != 0)
860  Count--;
861  }
862  } else if (Unrolling == Runtime) {
863  if (!AllowRuntime && !CountSetExplicitly) {
864  DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
865  << "-unroll-runtime not given\n");
866  return false;
867  }
868  // Reduce unroll count to be the largest power-of-two factor of
869  // the original count which satisfies the threshold limit.
870  while (Count != 0 && UnrolledSize > PartialThreshold) {
871  Count >>= 1;
872  UnrolledSize = (LoopSize-2) * Count + 2;
873  }
874  if (Count > UP.MaxCount)
875  Count = UP.MaxCount;
876  DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
877  }
878 
879  if (HasPragma) {
880  if (PragmaCount != 0)
881  // If loop has an unroll count pragma mark loop as unrolled to prevent
882  // unrolling beyond that requested by the pragma.
884 
885  // Emit optimization remarks if we are unable to unroll the loop
886  // as directed by a pragma.
887  DebugLoc LoopLoc = L->getStartLoc();
888  Function *F = Header->getParent();
889  LLVMContext &Ctx = F->getContext();
890  if (PragmaFullUnroll && PragmaCount == 0) {
891  if (TripCount && Count != TripCount) {
893  Ctx, DEBUG_TYPE, *F, LoopLoc,
894  "Unable to fully unroll loop as directed by unroll(full) pragma "
895  "because unrolled size is too large.");
896  } else if (!TripCount) {
898  Ctx, DEBUG_TYPE, *F, LoopLoc,
899  "Unable to fully unroll loop as directed by unroll(full) pragma "
900  "because loop has a runtime trip count.");
901  }
902  } else if (PragmaCount > 0 && Count != OriginalCount) {
904  Ctx, DEBUG_TYPE, *F, LoopLoc,
905  "Unable to unroll loop the number of times directed by "
906  "unroll_count pragma because unrolled size is too large.");
907  }
908  }
909 
910  if (Unrolling != Full && Count < 2) {
911  // Partial unrolling by 1 is a nop. For full unrolling, a factor
912  // of 1 makes sense because loop control can be eliminated.
913  return false;
914  }
915 
916  // Unroll the loop.
917  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
918  TripMultiple, LI, this, &LPM, &AC))
919  return false;
920 
921  return true;
922 }
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:82
const NoneType None
Definition: None.h:23
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
Base class for instruction visitors.
Definition: InstVisitor.h:81
const SCEV * getConstant(ConstantInt *V)
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:159
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:743
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:360
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:942
const SCEV * getPointerBase(const SCEV *V)
getPointerBase - Transitively follow the chain of pointer-type operands until reaching a SCEV that do...
ScalarEvolution - This class is the main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
Constant * getElementAsConstant(unsigned i) const
getElementAsConstant - Return a Constant for a specified index's element.
Definition: Constants.cpp:2784
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of .assume calls within a function.
bool isLoopExiting(const BlockT *BB) const
isLoopExiting - True if terminator in the block can branch to another block that is outside of the cu...
Definition: LoopInfo.h:152
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:75
A debug info location.
Definition: DebugLoc.h:34
Metadata node.
Definition: Metadata.h:740
static cl::opt< unsigned > UnrollCount("unroll-count", cl::init(0), cl::Hidden, cl::desc("Use this unroll count for all loops including those with ""unroll_count pragma values, for testing purposes"))
F(f)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:54
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:64
BlockT * getHeader() const
Definition: LoopInfo.h:96
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:156
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of""iterations when checking full unroll profitability"))
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:106
static bool HasUnrollFullPragma(const Loop *L)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
#define false
Definition: ConvertUTF.c:65
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
void emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName, const Function &Fn, const DebugLoc &DLoc, const Twine &Msg)
Emit an optimization-missed message.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:102
SCEVAddRecExpr - This node represents a polynomial recurrence on the trip count of the specified loop...
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:557
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1297
AnalysisUsage & addPreservedID(const void *ID)
unsigned getUserCost(const User *U) const
Estimate the cost of a given IR user when lowered.
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, cl::desc("The baseline cost threshold for loop unrolling"))
void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Pass * createLoopUnrollPass(int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1)
void initializeLoopUnrollPass(PassRegistry &)
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, SmallPtrSetImpl< const Value * > &EphValues)
Add information about a block to the current state.
#define P(N)
SCEVUnknown - This means that we are dealing with an entirely unknown SCEV value, and only represent ...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
bool isSCEVable(Type *Ty) const
isSCEVable - Test if values of the given type are analyzable within the SCEV framework.
Wrapper pass for TargetTransformInfo.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, cl::desc("Allows loops to be partially unrolled until ""-unroll-threshold loop size is reached."))
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
This is an important base class in LLVM.
Definition: Constant.h:41
char & LCSSAID
Definition: LCSSA.cpp:312
unsigned DynamicCostSavingsDiscount
The discount applied to the unrolled cost when the dynamic cost savings of unrolling exceed the Perce...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:228
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
BlockT * getExitingBlock() const
getExitingBlock - If getExitingBlocks would return exactly one block, return that block...
Definition: LoopInfoImpl.h:51
Value * getOperand(unsigned i) const
Definition: User.h:118
Value * getPointerOperand()
Definition: Instructions.h:284
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Value * SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, const FastMathFlags &FMF, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr)
SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can fold the result.
unsigned getSmallConstantTripCount(Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.h:460
char & LoopSimplifyID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:262
Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BinaryOps getOpcode() const
Definition: InstrTypes.h:323
StringRef getString() const
Definition: Metadata.cpp:375
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:936
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:217
machine trace Machine Trace Metrics
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:215
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:276
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
unsigned getSmallConstantTripMultiple(Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
unsigned PercentDynamicCostSavedThreshold
If complete unrolling will reduce the cost of the loop below its expected dynamic cost while rolled b...
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:67
static unsigned UnrollCountPragmaValue(const Loop *L)
CHAIN = SC CHAIN, Imm128 - System call.
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or ""unroll_count pragma."))
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
static void SetLoopAlreadyUnrolled(Loop *L)
static cl::opt< unsigned > UnrollPercentDynamicCostSavedThreshold("unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden, cl::desc("The percentage of estimated dynamic cost which must be saved by ""unrolling to allow unrolling up to the max threshold."))
void clear()
Completely clear the SetVector.
Definition: SetVector.h:161
static bool HasUnrollDisablePragma(const Loop *L)
Class for arbitrary precision integers.
Definition: APInt.h:73
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr)
SimplifyBinOp - Given operands for a BinaryOperator, see if we can fold the result.
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
std::vector< BlockT * >::const_iterator block_iterator
Definition: LoopInfo.h:140
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:564
block_iterator block_end() const
Definition: LoopInfo.h:142
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1030
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
unsigned Threshold
The cost threshold for the unrolled loop.
SCEV - This class represents an analyzed expression in the program.
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
#define I(x, y, z)
Definition: MD5.cpp:54
unsigned getNumElements() const
getNumElements - Return the number of elements in the array or vector.
Definition: Constants.cpp:2449
Type * getElementType() const
getElementType - Return the element type of the array/vector.
Definition: Constants.cpp:2421
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, const TargetTransformInfo &TTI, AssumptionCache *AC)
ApproximateLoopSize - Approximate the size of the loop.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
LLVM Value Representation.
Definition: Value.h:69
Pass * createSimpleLoopUnrollPass()
succ_range successors(BasicBlock *BB)
Definition: IR/CFG.h:271
const SCEV * getSCEV(Value *V)
getSCEV - Return a SCEV expression for the full generality of the specified expression.
#define DEBUG(X)
Definition: Debug.h:92
block_iterator block_begin() const
Definition: LoopInfo.h:141
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:737
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
A single uniqued string.
Definition: Metadata.h:508
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
This pass exposes codegen information to IR-level passes.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:125
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:60
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, bool AllowExpensiveTripCount, unsigned TripMultiple, LoopInfo *LI, Pass *PP, LPPassManager *LPM, AssumptionCache *AC)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:163
static cl::opt< unsigned > UnrollDynamicCostSavingsDiscount("unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden, cl::desc("This is the amount discounted from the total unroll cost when ""the unrolled form has a high dynamic cost savings (triggered by ""the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."))
loops
Definition: LoopInfo.cpp:696
SCEVConstant - This class represents a constant integer value.