LLVM  4.0.0
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop unroller. It works best when loops have
11 // been canonicalized by the -indvars pass, allowing it to determine the trip
12 // counts of loops easily.
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/SetVector.h"
21 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/InstVisitor.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Metadata.h"
32 #include "llvm/Support/Debug.h"
34 #include "llvm/Transforms/Scalar.h"
38 #include <climits>
39 #include <utility>
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "loop-unroll"
44 
45 static cl::opt<unsigned>
46  UnrollThreshold("unroll-threshold", cl::Hidden,
47  cl::desc("The baseline cost threshold for loop unrolling"));
48 
50  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
51  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
52  "to the threshold when aggressively unrolling a loop due to the "
53  "dynamic cost savings. If completely unrolling a loop will reduce "
54  "the total runtime from X to Y, we boost the loop unroll "
55  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
56  "X/Y). This limit avoids excessive code bloat."));
57 
59  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
60  cl::desc("Don't allow loop unrolling to simulate more than this number of"
61  "iterations when checking full unroll profitability"));
62 
64  "unroll-count", cl::Hidden,
65  cl::desc("Use this unroll count for all loops including those with "
66  "unroll_count pragma values, for testing purposes"));
67 
69  "unroll-max-count", cl::Hidden,
70  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
71  "testing purposes"));
72 
74  "unroll-full-max-count", cl::Hidden,
75  cl::desc(
76  "Set the max unroll count for full unrolling, for testing purposes"));
77 
78 static cl::opt<bool>
79  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
80  cl::desc("Allows loops to be partially unrolled until "
81  "-unroll-threshold loop size is reached."));
82 
84  "unroll-allow-remainder", cl::Hidden,
85  cl::desc("Allow generation of a loop remainder (extra iterations) "
86  "when unrolling a loop."));
87 
88 static cl::opt<bool>
89  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
90  cl::desc("Unroll loops with run-time trip counts"));
91 
93  "unroll-max-upperbound", cl::init(8), cl::Hidden,
94  cl::desc(
95  "The max of trip count upper bound that is considered in unrolling"));
96 
98  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
99  cl::desc("Unrolled size limit for loops with an unroll(full) or "
100  "unroll_count pragma."));
101 
103  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
104  cl::desc("If the runtime tripcount for the loop is lower than the "
105  "threshold, the loop is considered as flat and will be less "
106  "aggressively unrolled."));
107 
108 static cl::opt<bool>
109  UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden,
110  cl::desc("Allows loops to be peeled when the dynamic "
111  "trip count is known to be low."));
112 
113 /// A magic value for use with the Threshold parameter to indicate
114 /// that the loop unroll should be performed regardless of how much
115 /// code expansion would result.
116 static const unsigned NoThreshold = UINT_MAX;
117 
118 /// Gather the various unrolling parameters based on the defaults, compiler
119 /// flags, TTI overrides and user specified parameters.
121  Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
122  Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
123  Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
125 
126  // Set up the defaults
127  UP.Threshold = 150;
128  UP.MaxPercentThresholdBoost = 400;
129  UP.OptSizeThreshold = 0;
130  UP.PartialThreshold = UP.Threshold;
132  UP.Count = 0;
133  UP.PeelCount = 0;
135  UP.MaxCount = UINT_MAX;
136  UP.FullUnrollMaxCount = UINT_MAX;
137  UP.BEInsns = 2;
138  UP.Partial = false;
139  UP.Runtime = false;
140  UP.AllowRemainder = true;
141  UP.AllowExpensiveTripCount = false;
142  UP.Force = false;
143  UP.UpperBound = false;
144  UP.AllowPeeling = false;
145 
146  // Override with any target specific settings
147  TTI.getUnrollingPreferences(L, UP);
148 
149  // Apply size attributes
150  if (L->getHeader()->getParent()->optForSize()) {
151  UP.Threshold = UP.OptSizeThreshold;
153  }
154 
155  // Apply any user values specified by cl::opt
156  if (UnrollThreshold.getNumOccurrences() > 0) {
159  }
160  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
162  if (UnrollMaxCount.getNumOccurrences() > 0)
164  if (UnrollFullMaxCount.getNumOccurrences() > 0)
166  if (UnrollAllowPartial.getNumOccurrences() > 0)
168  if (UnrollAllowRemainder.getNumOccurrences() > 0)
170  if (UnrollRuntime.getNumOccurrences() > 0)
171  UP.Runtime = UnrollRuntime;
172  if (UnrollMaxUpperBound == 0)
173  UP.UpperBound = false;
174  if (UnrollAllowPeeling.getNumOccurrences() > 0)
176 
177  // Apply user values provided by argument
178  if (UserThreshold.hasValue()) {
179  UP.Threshold = *UserThreshold;
180  UP.PartialThreshold = *UserThreshold;
181  }
182  if (UserCount.hasValue())
183  UP.Count = *UserCount;
184  if (UserAllowPartial.hasValue())
185  UP.Partial = *UserAllowPartial;
186  if (UserRuntime.hasValue())
187  UP.Runtime = *UserRuntime;
188  if (UserUpperBound.hasValue())
189  UP.UpperBound = *UserUpperBound;
190 
191  return UP;
192 }
193 
194 namespace {
195 /// A struct to densely store the state of an instruction after unrolling at
196 /// each iteration.
197 ///
198 /// This is designed to work like a tuple of <Instruction *, int> for the
199 /// purposes of hashing and lookup, but to be able to associate two boolean
200 /// states with each key.
201 struct UnrolledInstState {
202  Instruction *I;
203  int Iteration : 30;
204  unsigned IsFree : 1;
205  unsigned IsCounted : 1;
206 };
207 
208 /// Hashing and equality testing for a set of the instruction states.
209 struct UnrolledInstStateKeyInfo {
210  typedef DenseMapInfo<Instruction *> PtrInfo;
212  static inline UnrolledInstState getEmptyKey() {
213  return {PtrInfo::getEmptyKey(), 0, 0, 0};
214  }
215  static inline UnrolledInstState getTombstoneKey() {
216  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
217  }
218  static inline unsigned getHashValue(const UnrolledInstState &S) {
219  return PairInfo::getHashValue({S.I, S.Iteration});
220  }
221  static inline bool isEqual(const UnrolledInstState &LHS,
222  const UnrolledInstState &RHS) {
223  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
224  }
225 };
226 }
227 
228 namespace {
229 struct EstimatedUnrollCost {
230  /// \brief The estimated cost after unrolling.
231  unsigned UnrolledCost;
232 
233  /// \brief The estimated dynamic cost of executing the instructions in the
234  /// rolled form.
235  unsigned RolledDynamicCost;
236 };
237 }
238 
239 /// \brief Figure out if the loop is worth full unrolling.
240 ///
241 /// Complete loop unrolling can make some loads constant, and we need to know
242 /// if that would expose any further optimization opportunities. This routine
243 /// estimates this optimization. It computes cost of unrolled loop
244 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
245 /// dynamic cost we mean that we won't count costs of blocks that are known not
246 /// to be executed (i.e. if we have a branch in the loop and we know that at the
247 /// given iteration its condition would be resolved to true, we won't add up the
248 /// cost of the 'false'-block).
249 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
250 /// the analysis failed (no benefits expected from the unrolling, or the loop is
251 /// too big to analyze), the returned value is None.
253 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
254  ScalarEvolution &SE, const TargetTransformInfo &TTI,
255  unsigned MaxUnrolledLoopSize) {
256  // We want to be able to scale offsets by the trip count and add more offsets
257  // to them without checking for overflows, and we already don't want to
258  // analyze *massive* trip counts, so we force the max to be reasonably small.
259  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
260  "The unroll iterations max is too large!");
261 
262  // Only analyze inner loops. We can't properly estimate cost of nested loops
263  // and we won't visit inner loops again anyway.
264  if (!L->empty())
265  return None;
266 
267  // Don't simulate loops with a big or unknown tripcount
268  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
270  return None;
271 
274  DenseMap<Value *, Constant *> SimplifiedValues;
275  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
276 
277  // The estimated cost of the unrolled form of the loop. We try to estimate
278  // this by simplifying as much as we can while computing the estimate.
279  unsigned UnrolledCost = 0;
280 
281  // We also track the estimated dynamic (that is, actually executed) cost in
282  // the rolled form. This helps identify cases when the savings from unrolling
283  // aren't just exposing dead control flows, but actual reduced dynamic
284  // instructions due to the simplifications which we expect to occur after
285  // unrolling.
286  unsigned RolledDynamicCost = 0;
287 
288  // We track the simplification of each instruction in each iteration. We use
289  // this to recursively merge costs into the unrolled cost on-demand so that
290  // we don't count the cost of any dead code. This is essentially a map from
291  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
293 
294  // A small worklist used to accumulate cost of instructions from each
295  // observable and reached root in the loop.
296  SmallVector<Instruction *, 16> CostWorklist;
297 
298  // PHI-used worklist used between iterations while accumulating cost.
299  SmallVector<Instruction *, 4> PHIUsedList;
300 
301  // Helper function to accumulate cost for instructions in the loop.
302  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
303  assert(Iteration >= 0 && "Cannot have a negative iteration!");
304  assert(CostWorklist.empty() && "Must start with an empty cost list");
305  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
306  CostWorklist.push_back(&RootI);
307  for (;; --Iteration) {
308  do {
309  Instruction *I = CostWorklist.pop_back_val();
310 
311  // InstCostMap only uses I and Iteration as a key, the other two values
312  // don't matter here.
313  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
314  if (CostIter == InstCostMap.end())
315  // If an input to a PHI node comes from a dead path through the loop
316  // we may have no cost data for it here. What that actually means is
317  // that it is free.
318  continue;
319  auto &Cost = *CostIter;
320  if (Cost.IsCounted)
321  // Already counted this instruction.
322  continue;
323 
324  // Mark that we are counting the cost of this instruction now.
325  Cost.IsCounted = true;
326 
327  // If this is a PHI node in the loop header, just add it to the PHI set.
328  if (auto *PhiI = dyn_cast<PHINode>(I))
329  if (PhiI->getParent() == L->getHeader()) {
330  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
331  "inherently simplify during unrolling.");
332  if (Iteration == 0)
333  continue;
334 
335  // Push the incoming value from the backedge into the PHI used list
336  // if it is an in-loop instruction. We'll use this to populate the
337  // cost worklist for the next iteration (as we count backwards).
338  if (auto *OpI = dyn_cast<Instruction>(
339  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
340  if (L->contains(OpI))
341  PHIUsedList.push_back(OpI);
342  continue;
343  }
344 
345  // First accumulate the cost of this instruction.
346  if (!Cost.IsFree) {
347  UnrolledCost += TTI.getUserCost(I);
348  DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
349  << "): ");
350  DEBUG(I->dump());
351  }
352 
353  // We must count the cost of every operand which is not free,
354  // recursively. If we reach a loop PHI node, simply add it to the set
355  // to be considered on the next iteration (backwards!).
356  for (Value *Op : I->operands()) {
357  // Check whether this operand is free due to being a constant or
358  // outside the loop.
359  auto *OpI = dyn_cast<Instruction>(Op);
360  if (!OpI || !L->contains(OpI))
361  continue;
362 
363  // Otherwise accumulate its cost.
364  CostWorklist.push_back(OpI);
365  }
366  } while (!CostWorklist.empty());
367 
368  if (PHIUsedList.empty())
369  // We've exhausted the search.
370  break;
371 
372  assert(Iteration > 0 &&
373  "Cannot track PHI-used values past the first iteration!");
374  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
375  PHIUsedList.clear();
376  }
377  };
378 
379  // Ensure that we don't violate the loop structure invariants relied on by
380  // this analysis.
381  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
382  assert(L->isLCSSAForm(DT) &&
383  "Must have loops in LCSSA form to track live-out values.");
384 
385  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
386 
387  // Simulate execution of each iteration of the loop counting instructions,
388  // which would be simplified.
389  // Since the same load will take different values on different iterations,
390  // we literally have to go through all loop's iterations.
391  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
392  DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
393 
394  // Prepare for the iteration by collecting any simplified entry or backedge
395  // inputs.
396  for (Instruction &I : *L->getHeader()) {
397  auto *PHI = dyn_cast<PHINode>(&I);
398  if (!PHI)
399  break;
400 
401  // The loop header PHI nodes must have exactly two input: one from the
402  // loop preheader and one from the loop latch.
403  assert(
404  PHI->getNumIncomingValues() == 2 &&
405  "Must have an incoming value only for the preheader and the latch.");
406 
407  Value *V = PHI->getIncomingValueForBlock(
408  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
409  Constant *C = dyn_cast<Constant>(V);
410  if (Iteration != 0 && !C)
411  C = SimplifiedValues.lookup(V);
412  if (C)
413  SimplifiedInputValues.push_back({PHI, C});
414  }
415 
416  // Now clear and re-populate the map for the next iteration.
417  SimplifiedValues.clear();
418  while (!SimplifiedInputValues.empty())
419  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
420 
421  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
422 
423  BBWorklist.clear();
424  BBWorklist.insert(L->getHeader());
425  // Note that we *must not* cache the size, this loop grows the worklist.
426  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
427  BasicBlock *BB = BBWorklist[Idx];
428 
429  // Visit all instructions in the given basic block and try to simplify
430  // it. We don't change the actual IR, just count optimization
431  // opportunities.
432  for (Instruction &I : *BB) {
433  if (isa<DbgInfoIntrinsic>(I))
434  continue;
435 
436  // Track this instruction's expected baseline cost when executing the
437  // rolled loop form.
438  RolledDynamicCost += TTI.getUserCost(&I);
439 
440  // Visit the instruction to analyze its loop cost after unrolling,
441  // and if the visitor returns true, mark the instruction as free after
442  // unrolling and continue.
443  bool IsFree = Analyzer.visit(I);
444  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
445  (unsigned)IsFree,
446  /*IsCounted*/ false}).second;
447  (void)Inserted;
448  assert(Inserted && "Cannot have a state for an unvisited instruction!");
449 
450  if (IsFree)
451  continue;
452 
453  // Can't properly model a cost of a call.
454  // FIXME: With a proper cost model we should be able to do it.
455  if(isa<CallInst>(&I))
456  return None;
457 
458  // If the instruction might have a side-effect recursively account for
459  // the cost of it and all the instructions leading up to it.
460  if (I.mayHaveSideEffects())
461  AddCostRecursively(I, Iteration);
462 
463  // If unrolled body turns out to be too big, bail out.
464  if (UnrolledCost > MaxUnrolledLoopSize) {
465  DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
466  << " UnrolledCost: " << UnrolledCost
467  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
468  << "\n");
469  return None;
470  }
471  }
472 
473  TerminatorInst *TI = BB->getTerminator();
474 
475  // Add in the live successors by first checking whether we have terminator
476  // that may be simplified based on the values simplified by this call.
477  BasicBlock *KnownSucc = nullptr;
478  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
479  if (BI->isConditional()) {
480  if (Constant *SimpleCond =
481  SimplifiedValues.lookup(BI->getCondition())) {
482  // Just take the first successor if condition is undef
483  if (isa<UndefValue>(SimpleCond))
484  KnownSucc = BI->getSuccessor(0);
485  else if (ConstantInt *SimpleCondVal =
486  dyn_cast<ConstantInt>(SimpleCond))
487  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
488  }
489  }
490  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
491  if (Constant *SimpleCond =
492  SimplifiedValues.lookup(SI->getCondition())) {
493  // Just take the first successor if condition is undef
494  if (isa<UndefValue>(SimpleCond))
495  KnownSucc = SI->getSuccessor(0);
496  else if (ConstantInt *SimpleCondVal =
497  dyn_cast<ConstantInt>(SimpleCond))
498  KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
499  }
500  }
501  if (KnownSucc) {
502  if (L->contains(KnownSucc))
503  BBWorklist.insert(KnownSucc);
504  else
505  ExitWorklist.insert({BB, KnownSucc});
506  continue;
507  }
508 
509  // Add BB's successors to the worklist.
510  for (BasicBlock *Succ : successors(BB))
511  if (L->contains(Succ))
512  BBWorklist.insert(Succ);
513  else
514  ExitWorklist.insert({BB, Succ});
515  AddCostRecursively(*TI, Iteration);
516  }
517 
518  // If we found no optimization opportunities on the first iteration, we
519  // won't find them on later ones too.
520  if (UnrolledCost == RolledDynamicCost) {
521  DEBUG(dbgs() << " No opportunities found.. exiting.\n"
522  << " UnrolledCost: " << UnrolledCost << "\n");
523  return None;
524  }
525  }
526 
527  while (!ExitWorklist.empty()) {
528  BasicBlock *ExitingBB, *ExitBB;
529  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
530 
531  for (Instruction &I : *ExitBB) {
532  auto *PN = dyn_cast<PHINode>(&I);
533  if (!PN)
534  break;
535 
536  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
537  if (auto *OpI = dyn_cast<Instruction>(Op))
538  if (L->contains(OpI))
539  AddCostRecursively(*OpI, TripCount - 1);
540  }
541  }
542 
543  DEBUG(dbgs() << "Analysis finished:\n"
544  << "UnrolledCost: " << UnrolledCost << ", "
545  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
546  return {{UnrolledCost, RolledDynamicCost}};
547 }
548 
549 /// ApproximateLoopSize - Approximate the size of the loop.
550 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
551  bool &NotDuplicatable, bool &Convergent,
552  const TargetTransformInfo &TTI,
553  AssumptionCache *AC, unsigned BEInsns) {
555  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
556 
558  for (BasicBlock *BB : L->blocks())
559  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
560  NumCalls = Metrics.NumInlineCandidates;
561  NotDuplicatable = Metrics.notDuplicatable;
562  Convergent = Metrics.convergent;
563 
564  unsigned LoopSize = Metrics.NumInsts;
565 
566  // Don't allow an estimate of size zero. This would allows unrolling of loops
567  // with huge iteration counts, which is a compile time problem even if it's
568  // not a problem for code quality. Also, the code using this size may assume
569  // that each loop has at least three instructions (likely a conditional
570  // branch, a comparison feeding that branch, and some kind of loop increment
571  // feeding that comparison instruction).
572  LoopSize = std::max(LoopSize, BEInsns + 1);
573 
574  return LoopSize;
575 }
576 
577 // Returns the loop hint metadata node with the given name (for example,
578 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
579 // returned.
581  if (MDNode *LoopID = L->getLoopID())
582  return GetUnrollMetadata(LoopID, Name);
583  return nullptr;
584 }
585 
586 // Returns true if the loop has an unroll(full) pragma.
587 static bool HasUnrollFullPragma(const Loop *L) {
588  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
589 }
590 
591 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
592 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
593 static bool HasUnrollEnablePragma(const Loop *L) {
594  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
595 }
596 
597 // Returns true if the loop has an unroll(disable) pragma.
598 static bool HasUnrollDisablePragma(const Loop *L) {
599  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
600 }
601 
602 // Returns true if the loop has an runtime unroll(disable) pragma.
603 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
604  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
605 }
606 
607 // If loop has an unroll_count pragma return the (necessarily
608 // positive) value from the pragma. Otherwise return 0.
609 static unsigned UnrollCountPragmaValue(const Loop *L) {
610  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
611  if (MD) {
612  assert(MD->getNumOperands() == 2 &&
613  "Unroll count hint metadata should have two operands.");
614  unsigned Count =
615  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
616  assert(Count >= 1 && "Unroll count must be positive.");
617  return Count;
618  }
619  return 0;
620 }
621 
622 // Remove existing unroll metadata and add unroll disable metadata to
623 // indicate the loop has already been unrolled. This prevents a loop
624 // from being unrolled more than is directed by a pragma if the loop
625 // unrolling pass is run more than once (which it generally is).
627  MDNode *LoopID = L->getLoopID();
628  // First remove any existing loop unrolling metadata.
630  // Reserve first location for self reference to the LoopID metadata node.
631  MDs.push_back(nullptr);
632 
633  if (LoopID) {
634  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
635  bool IsUnrollMetadata = false;
636  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
637  if (MD) {
638  const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
639  IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
640  }
641  if (!IsUnrollMetadata)
642  MDs.push_back(LoopID->getOperand(i));
643  }
644  }
645 
646  // Add unroll(disable) metadata to disable future unrolling.
647  LLVMContext &Context = L->getHeader()->getContext();
648  SmallVector<Metadata *, 1> DisableOperands;
649  DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
650  MDNode *DisableNode = MDNode::get(Context, DisableOperands);
651  MDs.push_back(DisableNode);
652 
653  MDNode *NewLoopID = MDNode::get(Context, MDs);
654  // Set operand 0 to refer to the loop id itself.
655  NewLoopID->replaceOperandWith(0, NewLoopID);
656  L->setLoopID(NewLoopID);
657 }
658 
659 // Computes the boosting factor for complete unrolling.
660 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
661 // be beneficial to fully unroll the loop even if unrolledcost is large. We
662 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
663 // the unroll threshold.
664 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
665  unsigned MaxPercentThresholdBoost) {
666  if (Cost.RolledDynamicCost >= UINT_MAX / 100)
667  return 100;
668  else if (Cost.UnrolledCost != 0)
669  // The boosting factor is RolledDynamicCost / UnrolledCost
670  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
671  MaxPercentThresholdBoost);
672  else
673  return MaxPercentThresholdBoost;
674 }
675 
676 // Returns loop size estimation for unrolled loop.
677 static uint64_t getUnrolledLoopSize(
678  unsigned LoopSize,
680  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
681  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
682 }
683 
684 // Returns true if unroll count was set explicitly.
685 // Calculates unroll count and writes it to UP.Count.
686 static bool computeUnrollCount(
687  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
688  ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
689  unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
690  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
691  // Check for explicit Count.
692  // 1st priority is unroll count set by "unroll-count" option.
693  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
694  if (UserUnrollCount) {
695  UP.Count = UnrollCount;
696  UP.AllowExpensiveTripCount = true;
697  UP.Force = true;
698  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
699  return true;
700  }
701 
702  // 2nd priority is unroll count set by pragma.
703  unsigned PragmaCount = UnrollCountPragmaValue(L);
704  if (PragmaCount > 0) {
705  UP.Count = PragmaCount;
706  UP.Runtime = true;
707  UP.AllowExpensiveTripCount = true;
708  UP.Force = true;
709  if (UP.AllowRemainder &&
711  return true;
712  }
713  bool PragmaFullUnroll = HasUnrollFullPragma(L);
714  if (PragmaFullUnroll && TripCount != 0) {
715  UP.Count = TripCount;
716  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
717  return false;
718  }
719 
720  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
721  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
722  PragmaEnableUnroll || UserUnrollCount;
723 
724  if (ExplicitUnroll && TripCount != 0) {
725  // If the loop has an unrolling pragma, we want to be more aggressive with
726  // unrolling limits. Set thresholds to at least the PragmaThreshold value
727  // which is larger than the default limits.
728  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
729  UP.PartialThreshold =
730  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
731  }
732 
733  // 3rd priority is full unroll count.
734  // Full unroll makes sense only when TripCount or its upper bound could be
735  // statically calculated.
736  // Also we need to check if we exceed FullUnrollMaxCount.
737  // If using the upper bound to unroll, TripMultiple should be set to 1 because
738  // we do not know when loop may exit.
739  // MaxTripCount and ExactTripCount cannot both be non zero since we only
740  // compute the former when the latter is zero.
741  unsigned ExactTripCount = TripCount;
742  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
743  "ExtractTripCound and MaxTripCount cannot both be non zero.");
744  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
745  UP.Count = FullUnrollTripCount;
746  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
747  // When computing the unrolled size, note that BEInsns are not replicated
748  // like the rest of the loop body.
749  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
750  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
751  TripCount = FullUnrollTripCount;
752  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
753  return ExplicitUnroll;
754  } else {
755  // The loop isn't that small, but we still can fully unroll it if that
756  // helps to remove a significant number of instructions.
757  // To check that, run additional analysis on the loop.
759  L, FullUnrollTripCount, DT, *SE, TTI,
760  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
761  unsigned Boost =
763  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
764  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
765  TripCount = FullUnrollTripCount;
766  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
767  return ExplicitUnroll;
768  }
769  }
770  }
771  }
772 
773  // 4rd priority is partial unrolling.
774  // Try partial unroll only when TripCount could be staticaly calculated.
775  if (TripCount) {
776  UP.Partial |= ExplicitUnroll;
777  if (!UP.Partial) {
778  DEBUG(dbgs() << " will not try to unroll partially because "
779  << "-unroll-allow-partial not given\n");
780  UP.Count = 0;
781  return false;
782  }
783  if (UP.Count == 0)
784  UP.Count = TripCount;
785  if (UP.PartialThreshold != NoThreshold) {
786  // Reduce unroll count to be modulo of TripCount for partial unrolling.
787  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
788  UP.Count =
789  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
790  (LoopSize - UP.BEInsns);
791  if (UP.Count > UP.MaxCount)
792  UP.Count = UP.MaxCount;
793  while (UP.Count != 0 && TripCount % UP.Count != 0)
794  UP.Count--;
795  if (UP.AllowRemainder && UP.Count <= 1) {
796  // If there is no Count that is modulo of TripCount, set Count to
797  // largest power-of-two factor that satisfies the threshold limit.
798  // As we'll create fixup loop, do the type of unrolling only if
799  // remainder loop is allowed.
801  while (UP.Count != 0 &&
802  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
803  UP.Count >>= 1;
804  }
805  if (UP.Count < 2) {
806  if (PragmaEnableUnroll)
807  ORE->emit(
808  OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge",
809  L->getStartLoc(), L->getHeader())
810  << "Unable to unroll loop as directed by unroll(enable) pragma "
811  "because unrolled size is too large.");
812  UP.Count = 0;
813  }
814  } else {
815  UP.Count = TripCount;
816  }
817  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
818  UP.Count != TripCount)
819  ORE->emit(
820  OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedTooLarge",
821  L->getStartLoc(), L->getHeader())
822  << "Unable to fully unroll loop as directed by unroll pragma because "
823  "unrolled size is too large.");
824  return ExplicitUnroll;
825  }
826  assert(TripCount == 0 &&
827  "All cases when TripCount is constant should be covered here.");
828  if (PragmaFullUnroll)
829  ORE->emit(
831  "CantFullUnrollAsDirectedRuntimeTripCount",
832  L->getStartLoc(), L->getHeader())
833  << "Unable to fully unroll loop as directed by unroll(full) pragma "
834  "because loop has a runtime trip count.");
835 
836  // 5th priority is loop peeling
837  computePeelCount(L, LoopSize, UP);
838  if (UP.PeelCount) {
839  UP.Runtime = false;
840  UP.Count = 1;
841  return ExplicitUnroll;
842  }
843 
844  // 6th priority is runtime unrolling.
845  // Don't unroll a runtime trip count loop when it is disabled.
847  UP.Count = 0;
848  return false;
849  }
850 
851  // Check if the runtime trip count is too small when profile is available.
852  if (L->getHeader()->getParent()->getEntryCount()) {
853  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
854  if (*ProfileTripCount < FlatLoopTripCountThreshold)
855  return false;
856  else
857  UP.AllowExpensiveTripCount = true;
858  }
859  }
860 
861  // Reduce count based on the type of unrolling and the threshold values.
862  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
863  if (!UP.Runtime) {
864  DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
865  << "-unroll-runtime not given\n");
866  UP.Count = 0;
867  return false;
868  }
869  if (UP.Count == 0)
871 
872  // Reduce unroll count to be the largest power-of-two factor of
873  // the original count which satisfies the threshold limit.
874  while (UP.Count != 0 &&
875  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
876  UP.Count >>= 1;
877 
878 #ifndef NDEBUG
879  unsigned OrigCount = UP.Count;
880 #endif
881 
882  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
883  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
884  UP.Count >>= 1;
885  DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
886  "specific or because the loop contains a convergent "
887  "instruction), so unroll count must divide the trip "
888  "multiple, "
889  << TripMultiple << ". Reducing unroll count from "
890  << OrigCount << " to " << UP.Count << ".\n");
891  using namespace ore;
892  if (PragmaCount > 0 && !UP.AllowRemainder)
893  ORE->emit(
895  "DifferentUnrollCountFromDirected",
896  L->getStartLoc(), L->getHeader())
897  << "Unable to unroll loop the number of times directed by "
898  "unroll_count pragma because remainder loop is restricted "
899  "(that could architecture specific or because the loop "
900  "contains a convergent instruction) and so must have an unroll "
901  "count that divides the loop trip multiple of "
902  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
903  << NV("UnrollCount", UP.Count) << " time(s).");
904  }
905 
906  if (UP.Count > UP.MaxCount)
907  UP.Count = UP.MaxCount;
908  DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n");
909  if (UP.Count < 2)
910  UP.Count = 0;
911  return ExplicitUnroll;
912 }
913 
914 static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
915  ScalarEvolution *SE, const TargetTransformInfo &TTI,
917  bool PreserveLCSSA,
918  Optional<unsigned> ProvidedCount,
919  Optional<unsigned> ProvidedThreshold,
920  Optional<bool> ProvidedAllowPartial,
921  Optional<bool> ProvidedRuntime,
922  Optional<bool> ProvidedUpperBound) {
923  DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
924  << "] Loop %" << L->getHeader()->getName() << "\n");
925  if (HasUnrollDisablePragma(L))
926  return false;
927  if (!L->isLoopSimplifyForm()) {
928  DEBUG(
929  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
930  return false;
931  }
932 
933  unsigned NumInlineCandidates;
934  bool NotDuplicatable;
935  bool Convergent;
937  L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
938  ProvidedRuntime, ProvidedUpperBound);
939  // Exit early if unrolling is disabled.
940  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
941  return false;
942  unsigned LoopSize = ApproximateLoopSize(
943  L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
944  DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
945  if (NotDuplicatable) {
946  DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
947  << " instructions.\n");
948  return false;
949  }
950  if (NumInlineCandidates != 0) {
951  DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
952  return false;
953  }
954 
955  // Find trip count and trip multiple if count is not available
956  unsigned TripCount = 0;
957  unsigned MaxTripCount = 0;
958  unsigned TripMultiple = 1;
959  // If there are multiple exiting blocks but one of them is the latch, use the
960  // latch for the trip count estimation. Otherwise insist on a single exiting
961  // block for the trip count estimation.
962  BasicBlock *ExitingBlock = L->getLoopLatch();
963  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
964  ExitingBlock = L->getExitingBlock();
965  if (ExitingBlock) {
966  TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
967  TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
968  }
969 
970  // If the loop contains a convergent operation, the prelude we'd add
971  // to do the first few instructions before we hit the unrolled loop
972  // is unsafe -- it adds a control-flow dependency to the convergent
973  // operation. Therefore restrict remainder loop (try unrollig without).
974  //
975  // TODO: This is quite conservative. In practice, convergent_op()
976  // is likely to be called unconditionally in the loop. In this
977  // case, the program would be ill-formed (on most architectures)
978  // unless n were the same on all threads in a thread group.
979  // Assuming n is the same on all threads, any kind of unrolling is
980  // safe. But currently llvm's notion of convergence isn't powerful
981  // enough to express this.
982  if (Convergent)
983  UP.AllowRemainder = false;
984 
985  // Try to find the trip count upper bound if we cannot find the exact trip
986  // count.
987  bool MaxOrZero = false;
988  if (!TripCount) {
989  MaxTripCount = SE->getSmallConstantMaxTripCount(L);
990  MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
991  // We can unroll by the upper bound amount if it's generally allowed or if
992  // we know that the loop is executed either the upper bound or zero times.
993  // (MaxOrZero unrolling keeps only the first loop test, so the number of
994  // loop tests remains the same compared to the non-unrolled version, whereas
995  // the generic upper bound unrolling keeps all but the last loop test so the
996  // number of loop tests goes up which may end up being worse on targets with
997  // constriained branch predictor resources so is controlled by an option.)
998  // In addition we only unroll small upper bounds.
999  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1000  MaxTripCount = 0;
1001  }
1002  }
1003 
1004  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1005  // fully unroll the loop.
1006  bool UseUpperBound = false;
1007  bool IsCountSetExplicitly =
1008  computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
1009  TripMultiple, LoopSize, UP, UseUpperBound);
1010  if (!UP.Count)
1011  return false;
1012  // Unroll factor (Count) must be less or equal to TripCount.
1013  if (TripCount && UP.Count > TripCount)
1014  UP.Count = TripCount;
1015 
1016  // Unroll the loop.
1017  if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
1018  UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
1019  TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE,
1020  PreserveLCSSA))
1021  return false;
1022 
1023  // If loop has an unroll count pragma or unrolled by explicitly set count
1024  // mark loop as unrolled to prevent unrolling beyond that requested.
1025  // If the loop was peeled, we already "used up" the profile information
1026  // we had, so we don't want to unroll or peel again.
1027  if (IsCountSetExplicitly || UP.PeelCount)
1029 
1030  return true;
1031 }
1032 
1033 namespace {
1034 class LoopUnroll : public LoopPass {
1035 public:
1036  static char ID; // Pass ID, replacement for typeid
1037  LoopUnroll(Optional<unsigned> Threshold = None,
1038  Optional<unsigned> Count = None,
1039  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1040  Optional<bool> UpperBound = None)
1041  : LoopPass(ID), ProvidedCount(std::move(Count)),
1042  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1043  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
1045  }
1046 
1047  Optional<unsigned> ProvidedCount;
1048  Optional<unsigned> ProvidedThreshold;
1049  Optional<bool> ProvidedAllowPartial;
1050  Optional<bool> ProvidedRuntime;
1051  Optional<bool> ProvidedUpperBound;
1052 
1053  bool runOnLoop(Loop *L, LPPassManager &) override {
1054  if (skipLoop(L))
1055  return false;
1056 
1057  Function &F = *L->getHeader()->getParent();
1058 
1059  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1060  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1061  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1062  const TargetTransformInfo &TTI =
1063  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1064  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1065  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1066  // pass. Function analyses need to be preserved across loop transformations
1067  // but ORE cannot be preserved (see comment before the pass definition).
1068  OptimizationRemarkEmitter ORE(&F);
1069  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1070 
1071  return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
1072  ProvidedCount, ProvidedThreshold,
1073  ProvidedAllowPartial, ProvidedRuntime,
1074  ProvidedUpperBound);
1075  }
1076 
1077  /// This transformation requires natural loop information & requires that
1078  /// loop preheaders be inserted into the CFG...
1079  ///
1080  void getAnalysisUsage(AnalysisUsage &AU) const override {
1083  // FIXME: Loop passes are required to preserve domtree, and for now we just
1084  // recreate dom info if anything gets unrolled.
1086  }
1087 };
1088 }
1089 
1090 char LoopUnroll::ID = 0;
1091 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1095 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1096 
1097 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
1098  int Runtime, int UpperBound) {
1099  // TODO: It would make more sense for this function to take the optionals
1100  // directly, but that's dangerous since it would silently break out of tree
1101  // callers.
1102  return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
1103  Count == -1 ? None : Optional<unsigned>(Count),
1104  AllowPartial == -1 ? None
1105  : Optional<bool>(AllowPartial),
1106  Runtime == -1 ? None : Optional<bool>(Runtime),
1107  UpperBound == -1 ? None : Optional<bool>(UpperBound));
1108 }
1109 
1111  return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
1112 }
1113 
1116  LPMUpdater &) {
1117  const auto &FAM =
1118  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1119  Function *F = L.getHeader()->getParent();
1120 
1121  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1122  // FIXME: This should probably be optional rather than required.
1123  if (!ORE)
1124  report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
1125  "cached at a higher level");
1126 
1127  bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
1128  /*PreserveLCSSA*/ true, ProvidedCount,
1131 
1132  if (!Changed)
1133  return PreservedAnalyses::all();
1135 }
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound)
MachineLoop * L
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
const NoneType None
Definition: None.h:23
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
DiagnosticInfoOptimizationBase::Argument NV
bool hasValue() const
Definition: Optional.h:125
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with ""unroll_count pragma values, for testing purposes"))
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
size_t i
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:162
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
This is the interface for a simple mod/ref and alias analysis over globals.
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:57
Optional< bool > ProvidedAllowPartial
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:819
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:414
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:307
static bool HasUnrollEnablePragma(const Loop *L)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:202
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1040
static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until ""-unroll-threshold loop size is reached."))
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of .assume calls within a function.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic ""trip count is known to be low."))
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:160
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:78
Metadata node.
Definition: Metadata.h:830
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:54
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
BlockT * getHeader() const
Definition: LoopInfo.h:102
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:245
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) ""when unrolling a loop."))
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied ""to the threshold when aggressively unrolling a loop due to the ""dynamic cost savings. If completely unrolling a loop will reduce ""the total runtime from X to Y, we boost the loop unroll ""threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, ""X/Y). This limit avoids excessive code bloat."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:190
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:90
#define F(x, y, z)
Definition: MD5.cpp:51
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:136
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:264
unsigned getSmallConstantMaxTripCount(Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
static bool isEqual(const Function &Caller, const Function &Callee)
Function Alias Analysis false
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:143
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for""testing purposes"))
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:52
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:653
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
Conditional or Unconditional Branch instruction.
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
Optional< bool > ProvidedUpperBound
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
char & LCSSAID
Definition: LCSSA.cpp:379
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:212
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:52
op_range operands()
Definition: User.h:213
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
unsigned getSmallConstantTripCount(Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:1076
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3540
Optional< bool > ProvidedRuntime
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:246
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
StringRef getString() const
Definition: Metadata.cpp:424
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the ""threshold, the loop is considered as flat and will be less ""aggressively unrolled."))
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1034
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:292
machine trace Machine Trace Metrics
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
void emit(DiagnosticInfoOptimizationBase &OptDiag)
The new interface to emit remarks.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1017
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:174
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
unsigned getSmallConstantTripMultiple(Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:50
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
Optional< unsigned > ProvidedThreshold
Pass * createLoopUnrollPass(int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1)
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or ""unroll_count pragma."))
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static void SetLoopAlreadyUnrolled(Loop *L)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static bool HasUnrollDisablePragma(const Loop *L)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:760
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of""iterations when checking full unroll profitability"))
Optional< unsigned > ProvidedCount
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1132
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The baseline cost threshold for loop unrolling"))
#define I(x, y, z)
Definition: MD5.cpp:54
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:938
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues)
Add information about a block to the current state.
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:71
Pass * createSimpleLoopUnrollPass()
succ_range successors(BasicBlock *BB)
Definition: IR/CFG.h:143
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool empty() const
Definition: LoopInfo.h:136
#define DEBUG(X)
Definition: Debug.h:100
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool UpperBound
Allow using trip count upper bound to unroll loops.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
A single uniqued string.
Definition: Metadata.h:586
A container for analyses that lazily runs them and caches their results.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
int getUserCost(const User *U) const
Estimate the cost of a given IR user when lowered.
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
The optimization diagnostic interface.
loops
Definition: LoopInfo.cpp:709