LLVM  9.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constant.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
55 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <limits>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-unroll"
73 
75  "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
76  cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
77  " the current top-most loop. This is somtimes preferred to reduce"
78  " compile time."));
79 
80 static cl::opt<unsigned>
81  UnrollThreshold("unroll-threshold", cl::Hidden,
82  cl::desc("The cost threshold for loop unrolling"));
83 
85  "unroll-partial-threshold", cl::Hidden,
86  cl::desc("The cost threshold for partial loop unrolling"));
87 
89  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
90  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
91  "to the threshold when aggressively unrolling a loop due to the "
92  "dynamic cost savings. If completely unrolling a loop will reduce "
93  "the total runtime from X to Y, we boost the loop unroll "
94  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
95  "X/Y). This limit avoids excessive code bloat."));
96 
98  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
99  cl::desc("Don't allow loop unrolling to simulate more than this number of"
100  "iterations when checking full unroll profitability"));
101 
103  "unroll-count", cl::Hidden,
104  cl::desc("Use this unroll count for all loops including those with "
105  "unroll_count pragma values, for testing purposes"));
106 
108  "unroll-max-count", cl::Hidden,
109  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
110  "testing purposes"));
111 
113  "unroll-full-max-count", cl::Hidden,
114  cl::desc(
115  "Set the max unroll count for full unrolling, for testing purposes"));
116 
118  "unroll-peel-count", cl::Hidden,
119  cl::desc("Set the unroll peeling count, for testing purposes"));
120 
121 static cl::opt<bool>
122  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
123  cl::desc("Allows loops to be partially unrolled until "
124  "-unroll-threshold loop size is reached."));
125 
127  "unroll-allow-remainder", cl::Hidden,
128  cl::desc("Allow generation of a loop remainder (extra iterations) "
129  "when unrolling a loop."));
130 
131 static cl::opt<bool>
132  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
133  cl::desc("Unroll loops with run-time trip counts"));
134 
136  "unroll-max-upperbound", cl::init(8), cl::Hidden,
137  cl::desc(
138  "The max of trip count upper bound that is considered in unrolling"));
139 
141  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
142  cl::desc("Unrolled size limit for loops with an unroll(full) or "
143  "unroll_count pragma."));
144 
146  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
147  cl::desc("If the runtime tripcount for the loop is lower than the "
148  "threshold, the loop is considered as flat and will be less "
149  "aggressively unrolled."));
150 
151 static cl::opt<bool>
152  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
153  cl::desc("Allows loops to be peeled when the dynamic "
154  "trip count is known to be low."));
155 
157  "unroll-remainder", cl::Hidden,
158  cl::desc("Allow the loop remainder to be unrolled."));
159 
160 // This option isn't ever intended to be enabled, it serves to allow
161 // experiments to check the assumptions about when this kind of revisit is
162 // necessary.
164  "unroll-revisit-child-loops", cl::Hidden,
165  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
166  "This shouldn't typically be needed as child loops (or their "
167  "clones) were already visited."));
168 
169 /// A magic value for use with the Threshold parameter to indicate
170 /// that the loop unroll should be performed regardless of how much
171 /// code expansion would result.
173 
174 /// Gather the various unrolling parameters based on the defaults, compiler
175 /// flags, TTI overrides and user specified parameters.
177  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
178  BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
179  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
180  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
181  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
183 
184  // Set up the defaults
185  UP.Threshold = OptLevel > 2 ? 300 : 150;
186  UP.MaxPercentThresholdBoost = 400;
187  UP.OptSizeThreshold = 0;
188  UP.PartialThreshold = 150;
190  UP.Count = 0;
191  UP.PeelCount = 0;
195  UP.BEInsns = 2;
196  UP.Partial = false;
197  UP.Runtime = false;
198  UP.AllowRemainder = true;
199  UP.UnrollRemainder = false;
200  UP.AllowExpensiveTripCount = false;
201  UP.Force = false;
202  UP.UpperBound = false;
203  UP.AllowPeeling = true;
204  UP.UnrollAndJam = false;
206 
207  // Override with any target specific settings
208  TTI.getUnrollingPreferences(L, SE, UP);
209 
210  // Apply size attributes
211  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
213  if (OptForSize) {
214  UP.Threshold = UP.OptSizeThreshold;
216  UP.MaxPercentThresholdBoost = 100;
217  }
218 
219  // Apply any user values specified by cl::opt
220  if (UnrollThreshold.getNumOccurrences() > 0)
222  if (UnrollPartialThreshold.getNumOccurrences() > 0)
224  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
226  if (UnrollMaxCount.getNumOccurrences() > 0)
228  if (UnrollFullMaxCount.getNumOccurrences() > 0)
230  if (UnrollPeelCount.getNumOccurrences() > 0)
232  if (UnrollAllowPartial.getNumOccurrences() > 0)
234  if (UnrollAllowRemainder.getNumOccurrences() > 0)
236  if (UnrollRuntime.getNumOccurrences() > 0)
237  UP.Runtime = UnrollRuntime;
238  if (UnrollMaxUpperBound == 0)
239  UP.UpperBound = false;
240  if (UnrollAllowPeeling.getNumOccurrences() > 0)
242  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
244 
245  // Apply user values provided by argument
246  if (UserThreshold.hasValue()) {
247  UP.Threshold = *UserThreshold;
248  UP.PartialThreshold = *UserThreshold;
249  }
250  if (UserCount.hasValue())
251  UP.Count = *UserCount;
252  if (UserAllowPartial.hasValue())
253  UP.Partial = *UserAllowPartial;
254  if (UserRuntime.hasValue())
255  UP.Runtime = *UserRuntime;
256  if (UserUpperBound.hasValue())
257  UP.UpperBound = *UserUpperBound;
258  if (UserAllowPeeling.hasValue())
259  UP.AllowPeeling = *UserAllowPeeling;
260 
261  return UP;
262 }
263 
264 namespace {
265 
266 /// A struct to densely store the state of an instruction after unrolling at
267 /// each iteration.
268 ///
269 /// This is designed to work like a tuple of <Instruction *, int> for the
270 /// purposes of hashing and lookup, but to be able to associate two boolean
271 /// states with each key.
272 struct UnrolledInstState {
273  Instruction *I;
274  int Iteration : 30;
275  unsigned IsFree : 1;
276  unsigned IsCounted : 1;
277 };
278 
279 /// Hashing and equality testing for a set of the instruction states.
280 struct UnrolledInstStateKeyInfo {
281  using PtrInfo = DenseMapInfo<Instruction *>;
283 
284  static inline UnrolledInstState getEmptyKey() {
285  return {PtrInfo::getEmptyKey(), 0, 0, 0};
286  }
287 
288  static inline UnrolledInstState getTombstoneKey() {
289  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
290  }
291 
292  static inline unsigned getHashValue(const UnrolledInstState &S) {
293  return PairInfo::getHashValue({S.I, S.Iteration});
294  }
295 
296  static inline bool isEqual(const UnrolledInstState &LHS,
297  const UnrolledInstState &RHS) {
298  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
299  }
300 };
301 
302 struct EstimatedUnrollCost {
303  /// The estimated cost after unrolling.
304  unsigned UnrolledCost;
305 
306  /// The estimated dynamic cost of executing the instructions in the
307  /// rolled form.
308  unsigned RolledDynamicCost;
309 };
310 
311 } // end anonymous namespace
312 
313 /// Figure out if the loop is worth full unrolling.
314 ///
315 /// Complete loop unrolling can make some loads constant, and we need to know
316 /// if that would expose any further optimization opportunities. This routine
317 /// estimates this optimization. It computes cost of unrolled loop
318 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
319 /// dynamic cost we mean that we won't count costs of blocks that are known not
320 /// to be executed (i.e. if we have a branch in the loop and we know that at the
321 /// given iteration its condition would be resolved to true, we won't add up the
322 /// cost of the 'false'-block).
323 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
324 /// the analysis failed (no benefits expected from the unrolling, or the loop is
325 /// too big to analyze), the returned value is None.
327  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
328  const SmallPtrSetImpl<const Value *> &EphValues,
329  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
330  // We want to be able to scale offsets by the trip count and add more offsets
331  // to them without checking for overflows, and we already don't want to
332  // analyze *massive* trip counts, so we force the max to be reasonably small.
334  (unsigned)(std::numeric_limits<int>::max() / 2) &&
335  "The unroll iterations max is too large!");
336 
337  // Only analyze inner loops. We can't properly estimate cost of nested loops
338  // and we won't visit inner loops again anyway.
339  if (!L->empty())
340  return None;
341 
342  // Don't simulate loops with a big or unknown tripcount
343  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
345  return None;
346 
349  DenseMap<Value *, Constant *> SimplifiedValues;
350  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
351 
352  // The estimated cost of the unrolled form of the loop. We try to estimate
353  // this by simplifying as much as we can while computing the estimate.
354  unsigned UnrolledCost = 0;
355 
356  // We also track the estimated dynamic (that is, actually executed) cost in
357  // the rolled form. This helps identify cases when the savings from unrolling
358  // aren't just exposing dead control flows, but actual reduced dynamic
359  // instructions due to the simplifications which we expect to occur after
360  // unrolling.
361  unsigned RolledDynamicCost = 0;
362 
363  // We track the simplification of each instruction in each iteration. We use
364  // this to recursively merge costs into the unrolled cost on-demand so that
365  // we don't count the cost of any dead code. This is essentially a map from
366  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
368 
369  // A small worklist used to accumulate cost of instructions from each
370  // observable and reached root in the loop.
371  SmallVector<Instruction *, 16> CostWorklist;
372 
373  // PHI-used worklist used between iterations while accumulating cost.
374  SmallVector<Instruction *, 4> PHIUsedList;
375 
376  // Helper function to accumulate cost for instructions in the loop.
377  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
378  assert(Iteration >= 0 && "Cannot have a negative iteration!");
379  assert(CostWorklist.empty() && "Must start with an empty cost list");
380  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
381  CostWorklist.push_back(&RootI);
382  for (;; --Iteration) {
383  do {
384  Instruction *I = CostWorklist.pop_back_val();
385 
386  // InstCostMap only uses I and Iteration as a key, the other two values
387  // don't matter here.
388  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
389  if (CostIter == InstCostMap.end())
390  // If an input to a PHI node comes from a dead path through the loop
391  // we may have no cost data for it here. What that actually means is
392  // that it is free.
393  continue;
394  auto &Cost = *CostIter;
395  if (Cost.IsCounted)
396  // Already counted this instruction.
397  continue;
398 
399  // Mark that we are counting the cost of this instruction now.
400  Cost.IsCounted = true;
401 
402  // If this is a PHI node in the loop header, just add it to the PHI set.
403  if (auto *PhiI = dyn_cast<PHINode>(I))
404  if (PhiI->getParent() == L->getHeader()) {
405  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
406  "inherently simplify during unrolling.");
407  if (Iteration == 0)
408  continue;
409 
410  // Push the incoming value from the backedge into the PHI used list
411  // if it is an in-loop instruction. We'll use this to populate the
412  // cost worklist for the next iteration (as we count backwards).
413  if (auto *OpI = dyn_cast<Instruction>(
414  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
415  if (L->contains(OpI))
416  PHIUsedList.push_back(OpI);
417  continue;
418  }
419 
420  // First accumulate the cost of this instruction.
421  if (!Cost.IsFree) {
422  UnrolledCost += TTI.getUserCost(I);
423  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
424  << Iteration << "): ");
425  LLVM_DEBUG(I->dump());
426  }
427 
428  // We must count the cost of every operand which is not free,
429  // recursively. If we reach a loop PHI node, simply add it to the set
430  // to be considered on the next iteration (backwards!).
431  for (Value *Op : I->operands()) {
432  // Check whether this operand is free due to being a constant or
433  // outside the loop.
434  auto *OpI = dyn_cast<Instruction>(Op);
435  if (!OpI || !L->contains(OpI))
436  continue;
437 
438  // Otherwise accumulate its cost.
439  CostWorklist.push_back(OpI);
440  }
441  } while (!CostWorklist.empty());
442 
443  if (PHIUsedList.empty())
444  // We've exhausted the search.
445  break;
446 
447  assert(Iteration > 0 &&
448  "Cannot track PHI-used values past the first iteration!");
449  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
450  PHIUsedList.clear();
451  }
452  };
453 
454  // Ensure that we don't violate the loop structure invariants relied on by
455  // this analysis.
456  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
457  assert(L->isLCSSAForm(DT) &&
458  "Must have loops in LCSSA form to track live-out values.");
459 
460  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
461 
462  // Simulate execution of each iteration of the loop counting instructions,
463  // which would be simplified.
464  // Since the same load will take different values on different iterations,
465  // we literally have to go through all loop's iterations.
466  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
467  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
468 
469  // Prepare for the iteration by collecting any simplified entry or backedge
470  // inputs.
471  for (Instruction &I : *L->getHeader()) {
472  auto *PHI = dyn_cast<PHINode>(&I);
473  if (!PHI)
474  break;
475 
476  // The loop header PHI nodes must have exactly two input: one from the
477  // loop preheader and one from the loop latch.
478  assert(
479  PHI->getNumIncomingValues() == 2 &&
480  "Must have an incoming value only for the preheader and the latch.");
481 
482  Value *V = PHI->getIncomingValueForBlock(
483  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
484  Constant *C = dyn_cast<Constant>(V);
485  if (Iteration != 0 && !C)
486  C = SimplifiedValues.lookup(V);
487  if (C)
488  SimplifiedInputValues.push_back({PHI, C});
489  }
490 
491  // Now clear and re-populate the map for the next iteration.
492  SimplifiedValues.clear();
493  while (!SimplifiedInputValues.empty())
494  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
495 
496  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
497 
498  BBWorklist.clear();
499  BBWorklist.insert(L->getHeader());
500  // Note that we *must not* cache the size, this loop grows the worklist.
501  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
502  BasicBlock *BB = BBWorklist[Idx];
503 
504  // Visit all instructions in the given basic block and try to simplify
505  // it. We don't change the actual IR, just count optimization
506  // opportunities.
507  for (Instruction &I : *BB) {
508  // These won't get into the final code - don't even try calculating the
509  // cost for them.
510  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
511  continue;
512 
513  // Track this instruction's expected baseline cost when executing the
514  // rolled loop form.
515  RolledDynamicCost += TTI.getUserCost(&I);
516 
517  // Visit the instruction to analyze its loop cost after unrolling,
518  // and if the visitor returns true, mark the instruction as free after
519  // unrolling and continue.
520  bool IsFree = Analyzer.visit(I);
521  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
522  (unsigned)IsFree,
523  /*IsCounted*/ false}).second;
524  (void)Inserted;
525  assert(Inserted && "Cannot have a state for an unvisited instruction!");
526 
527  if (IsFree)
528  continue;
529 
530  // Can't properly model a cost of a call.
531  // FIXME: With a proper cost model we should be able to do it.
532  if (auto *CI = dyn_cast<CallInst>(&I)) {
533  const Function *Callee = CI->getCalledFunction();
534  if (!Callee || TTI.isLoweredToCall(Callee)) {
535  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
536  return None;
537  }
538  }
539 
540  // If the instruction might have a side-effect recursively account for
541  // the cost of it and all the instructions leading up to it.
542  if (I.mayHaveSideEffects())
543  AddCostRecursively(I, Iteration);
544 
545  // If unrolled body turns out to be too big, bail out.
546  if (UnrolledCost > MaxUnrolledLoopSize) {
547  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
548  << " UnrolledCost: " << UnrolledCost
549  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
550  << "\n");
551  return None;
552  }
553  }
554 
555  Instruction *TI = BB->getTerminator();
556 
557  // Add in the live successors by first checking whether we have terminator
558  // that may be simplified based on the values simplified by this call.
559  BasicBlock *KnownSucc = nullptr;
560  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
561  if (BI->isConditional()) {
562  if (Constant *SimpleCond =
563  SimplifiedValues.lookup(BI->getCondition())) {
564  // Just take the first successor if condition is undef
565  if (isa<UndefValue>(SimpleCond))
566  KnownSucc = BI->getSuccessor(0);
567  else if (ConstantInt *SimpleCondVal =
568  dyn_cast<ConstantInt>(SimpleCond))
569  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
570  }
571  }
572  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
573  if (Constant *SimpleCond =
574  SimplifiedValues.lookup(SI->getCondition())) {
575  // Just take the first successor if condition is undef
576  if (isa<UndefValue>(SimpleCond))
577  KnownSucc = SI->getSuccessor(0);
578  else if (ConstantInt *SimpleCondVal =
579  dyn_cast<ConstantInt>(SimpleCond))
580  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
581  }
582  }
583  if (KnownSucc) {
584  if (L->contains(KnownSucc))
585  BBWorklist.insert(KnownSucc);
586  else
587  ExitWorklist.insert({BB, KnownSucc});
588  continue;
589  }
590 
591  // Add BB's successors to the worklist.
592  for (BasicBlock *Succ : successors(BB))
593  if (L->contains(Succ))
594  BBWorklist.insert(Succ);
595  else
596  ExitWorklist.insert({BB, Succ});
597  AddCostRecursively(*TI, Iteration);
598  }
599 
600  // If we found no optimization opportunities on the first iteration, we
601  // won't find them on later ones too.
602  if (UnrolledCost == RolledDynamicCost) {
603  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
604  << " UnrolledCost: " << UnrolledCost << "\n");
605  return None;
606  }
607  }
608 
609  while (!ExitWorklist.empty()) {
610  BasicBlock *ExitingBB, *ExitBB;
611  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
612 
613  for (Instruction &I : *ExitBB) {
614  auto *PN = dyn_cast<PHINode>(&I);
615  if (!PN)
616  break;
617 
618  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
619  if (auto *OpI = dyn_cast<Instruction>(Op))
620  if (L->contains(OpI))
621  AddCostRecursively(*OpI, TripCount - 1);
622  }
623  }
624 
625  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
626  << "UnrolledCost: " << UnrolledCost << ", "
627  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
628  return {{UnrolledCost, RolledDynamicCost}};
629 }
630 
631 /// ApproximateLoopSize - Approximate the size of the loop.
633  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
634  const TargetTransformInfo &TTI,
635  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
637  for (BasicBlock *BB : L->blocks())
638  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
639  NumCalls = Metrics.NumInlineCandidates;
640  NotDuplicatable = Metrics.notDuplicatable;
641  Convergent = Metrics.convergent;
642 
643  unsigned LoopSize = Metrics.NumInsts;
644 
645  // Don't allow an estimate of size zero. This would allows unrolling of loops
646  // with huge iteration counts, which is a compile time problem even if it's
647  // not a problem for code quality. Also, the code using this size may assume
648  // that each loop has at least three instructions (likely a conditional
649  // branch, a comparison feeding that branch, and some kind of loop increment
650  // feeding that comparison instruction).
651  LoopSize = std::max(LoopSize, BEInsns + 1);
652 
653  return LoopSize;
654 }
655 
656 // Returns the loop hint metadata node with the given name (for example,
657 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
658 // returned.
660  if (MDNode *LoopID = L->getLoopID())
661  return GetUnrollMetadata(LoopID, Name);
662  return nullptr;
663 }
664 
665 // Returns true if the loop has an unroll(full) pragma.
666 static bool HasUnrollFullPragma(const Loop *L) {
667  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
668 }
669 
670 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
671 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
672 static bool HasUnrollEnablePragma(const Loop *L) {
673  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
674 }
675 
676 // Returns true if the loop has an runtime unroll(disable) pragma.
677 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
678  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
679 }
680 
681 // If loop has an unroll_count pragma return the (necessarily
682 // positive) value from the pragma. Otherwise return 0.
683 static unsigned UnrollCountPragmaValue(const Loop *L) {
684  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
685  if (MD) {
686  assert(MD->getNumOperands() == 2 &&
687  "Unroll count hint metadata should have two operands.");
688  unsigned Count =
689  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
690  assert(Count >= 1 && "Unroll count must be positive.");
691  return Count;
692  }
693  return 0;
694 }
695 
696 // Computes the boosting factor for complete unrolling.
697 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
698 // be beneficial to fully unroll the loop even if unrolledcost is large. We
699 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
700 // the unroll threshold.
701 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
702  unsigned MaxPercentThresholdBoost) {
703  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
704  return 100;
705  else if (Cost.UnrolledCost != 0)
706  // The boosting factor is RolledDynamicCost / UnrolledCost
707  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
708  MaxPercentThresholdBoost);
709  else
710  return MaxPercentThresholdBoost;
711 }
712 
713 // Returns loop size estimation for unrolled loop.
714 static uint64_t getUnrolledLoopSize(
715  unsigned LoopSize,
717  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
718  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
719 }
720 
721 // Returns true if unroll count was set explicitly.
722 // Calculates unroll count and writes it to UP.Count.
723 // Unless IgnoreUser is true, will also use metadata and command-line options
724 // that are specific to to the LoopUnroll pass (which, for instance, are
725 // irrelevant for the LoopUnrollAndJam pass).
726 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
727 // many LoopUnroll-specific options. The shared functionality should be
728 // refactored into it own function.
730  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
731  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
732  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
733  unsigned &TripMultiple, unsigned LoopSize,
734  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
735 
736  // Check for explicit Count.
737  // 1st priority is unroll count set by "unroll-count" option.
738  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
739  if (UserUnrollCount) {
740  UP.Count = UnrollCount;
741  UP.AllowExpensiveTripCount = true;
742  UP.Force = true;
743  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
744  return true;
745  }
746 
747  // 2nd priority is unroll count set by pragma.
748  unsigned PragmaCount = UnrollCountPragmaValue(L);
749  if (PragmaCount > 0) {
750  UP.Count = PragmaCount;
751  UP.Runtime = true;
752  UP.AllowExpensiveTripCount = true;
753  UP.Force = true;
754  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
756  return true;
757  }
758  bool PragmaFullUnroll = HasUnrollFullPragma(L);
759  if (PragmaFullUnroll && TripCount != 0) {
760  UP.Count = TripCount;
761  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
762  return false;
763  }
764 
765  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
766  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
767  PragmaEnableUnroll || UserUnrollCount;
768 
769  if (ExplicitUnroll && TripCount != 0) {
770  // If the loop has an unrolling pragma, we want to be more aggressive with
771  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
772  // value which is larger than the default limits.
773  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
774  UP.PartialThreshold =
775  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
776  }
777 
778  // 3rd priority is full unroll count.
779  // Full unroll makes sense only when TripCount or its upper bound could be
780  // statically calculated.
781  // Also we need to check if we exceed FullUnrollMaxCount.
782  // If using the upper bound to unroll, TripMultiple should be set to 1 because
783  // we do not know when loop may exit.
784  // MaxTripCount and ExactTripCount cannot both be non zero since we only
785  // compute the former when the latter is zero.
786  unsigned ExactTripCount = TripCount;
787  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
788  "ExtractTripCount and MaxTripCount cannot both be non zero.");
789  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
790  UP.Count = FullUnrollTripCount;
791  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
792  // When computing the unrolled size, note that BEInsns are not replicated
793  // like the rest of the loop body.
794  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
795  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
796  TripCount = FullUnrollTripCount;
797  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
798  return ExplicitUnroll;
799  } else {
800  // The loop isn't that small, but we still can fully unroll it if that
801  // helps to remove a significant number of instructions.
802  // To check that, run additional analysis on the loop.
804  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
805  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
806  unsigned Boost =
808  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
809  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
810  TripCount = FullUnrollTripCount;
811  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
812  return ExplicitUnroll;
813  }
814  }
815  }
816  }
817 
818  // 4th priority is loop peeling.
819  computePeelCount(L, LoopSize, UP, TripCount, SE);
820  if (UP.PeelCount) {
821  UP.Runtime = false;
822  UP.Count = 1;
823  return ExplicitUnroll;
824  }
825 
826  // 5th priority is partial unrolling.
827  // Try partial unroll only when TripCount could be statically calculated.
828  if (TripCount) {
829  UP.Partial |= ExplicitUnroll;
830  if (!UP.Partial) {
831  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
832  << "-unroll-allow-partial not given\n");
833  UP.Count = 0;
834  return false;
835  }
836  if (UP.Count == 0)
837  UP.Count = TripCount;
838  if (UP.PartialThreshold != NoThreshold) {
839  // Reduce unroll count to be modulo of TripCount for partial unrolling.
840  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
841  UP.Count =
842  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
843  (LoopSize - UP.BEInsns);
844  if (UP.Count > UP.MaxCount)
845  UP.Count = UP.MaxCount;
846  while (UP.Count != 0 && TripCount % UP.Count != 0)
847  UP.Count--;
848  if (UP.AllowRemainder && UP.Count <= 1) {
849  // If there is no Count that is modulo of TripCount, set Count to
850  // largest power-of-two factor that satisfies the threshold limit.
851  // As we'll create fixup loop, do the type of unrolling only if
852  // remainder loop is allowed.
854  while (UP.Count != 0 &&
855  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
856  UP.Count >>= 1;
857  }
858  if (UP.Count < 2) {
859  if (PragmaEnableUnroll)
860  ORE->emit([&]() {
862  "UnrollAsDirectedTooLarge",
863  L->getStartLoc(), L->getHeader())
864  << "Unable to unroll loop as directed by unroll(enable) "
865  "pragma "
866  "because unrolled size is too large.";
867  });
868  UP.Count = 0;
869  }
870  } else {
871  UP.Count = TripCount;
872  }
873  if (UP.Count > UP.MaxCount)
874  UP.Count = UP.MaxCount;
875  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
876  UP.Count != TripCount)
877  ORE->emit([&]() {
878  return OptimizationRemarkMissed(DEBUG_TYPE,
879  "FullUnrollAsDirectedTooLarge",
880  L->getStartLoc(), L->getHeader())
881  << "Unable to fully unroll loop as directed by unroll pragma "
882  "because "
883  "unrolled size is too large.";
884  });
885  return ExplicitUnroll;
886  }
887  assert(TripCount == 0 &&
888  "All cases when TripCount is constant should be covered here.");
889  if (PragmaFullUnroll)
890  ORE->emit([&]() {
892  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
893  L->getStartLoc(), L->getHeader())
894  << "Unable to fully unroll loop as directed by unroll(full) "
895  "pragma "
896  "because loop has a runtime trip count.";
897  });
898 
899  // 6th priority is runtime unrolling.
900  // Don't unroll a runtime trip count loop when it is disabled.
902  UP.Count = 0;
903  return false;
904  }
905 
906  // Check if the runtime trip count is too small when profile is available.
907  if (L->getHeader()->getParent()->hasProfileData()) {
908  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
909  if (*ProfileTripCount < FlatLoopTripCountThreshold)
910  return false;
911  else
912  UP.AllowExpensiveTripCount = true;
913  }
914  }
915 
916  // Reduce count based on the type of unrolling and the threshold values.
917  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
918  if (!UP.Runtime) {
919  LLVM_DEBUG(
920  dbgs() << " will not try to unroll loop with runtime trip count "
921  << "-unroll-runtime not given\n");
922  UP.Count = 0;
923  return false;
924  }
925  if (UP.Count == 0)
927 
928  // Reduce unroll count to be the largest power-of-two factor of
929  // the original count which satisfies the threshold limit.
930  while (UP.Count != 0 &&
931  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
932  UP.Count >>= 1;
933 
934 #ifndef NDEBUG
935  unsigned OrigCount = UP.Count;
936 #endif
937 
938  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
939  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
940  UP.Count >>= 1;
941  LLVM_DEBUG(
942  dbgs() << "Remainder loop is restricted (that could architecture "
943  "specific or because the loop contains a convergent "
944  "instruction), so unroll count must divide the trip "
945  "multiple, "
946  << TripMultiple << ". Reducing unroll count from " << OrigCount
947  << " to " << UP.Count << ".\n");
948 
949  using namespace ore;
950 
951  if (PragmaCount > 0 && !UP.AllowRemainder)
952  ORE->emit([&]() {
954  "DifferentUnrollCountFromDirected",
955  L->getStartLoc(), L->getHeader())
956  << "Unable to unroll loop the number of times directed by "
957  "unroll_count pragma because remainder loop is restricted "
958  "(that could architecture specific or because the loop "
959  "contains a convergent instruction) and so must have an "
960  "unroll "
961  "count that divides the loop trip multiple of "
962  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
963  << NV("UnrollCount", UP.Count) << " time(s).";
964  });
965  }
966 
967  if (UP.Count > UP.MaxCount)
968  UP.Count = UP.MaxCount;
969  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
970  << "\n");
971  if (UP.Count < 2)
972  UP.Count = 0;
973  return ExplicitUnroll;
974 }
975 
977  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
978  const TargetTransformInfo &TTI, AssumptionCache &AC,
981  bool PreserveLCSSA, int OptLevel,
982  bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
983  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
984  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
985  Optional<bool> ProvidedAllowPeeling) {
986  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
987  << L->getHeader()->getParent()->getName() << "] Loop %"
988  << L->getHeader()->getName() << "\n");
990  if (TM & TM_Disable)
992  if (!L->isLoopSimplifyForm()) {
993  LLVM_DEBUG(
994  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
996  }
997 
998  // When automtatic unrolling is disabled, do not unroll unless overridden for
999  // this loop.
1000  if (OnlyWhenForced && !(TM & TM_Enable))
1002 
1003  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1004  unsigned NumInlineCandidates;
1005  bool NotDuplicatable;
1006  bool Convergent;
1008  L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
1009  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1010  ProvidedAllowPeeling);
1011 
1012  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1013  // as threshold later on.
1014  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1015  !OptForSize)
1017 
1019  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1020 
1021  unsigned LoopSize =
1022  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1023  TTI, EphValues, UP.BEInsns);
1024  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1025  if (NotDuplicatable) {
1026  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1027  << " instructions.\n");
1029  }
1030 
1031  // When optimizing for size, use LoopSize as threshold, to (fully) unroll
1032  // loops, if it does not increase code size.
1033  if (OptForSize)
1034  UP.Threshold = std::max(UP.Threshold, LoopSize);
1035 
1036  if (NumInlineCandidates != 0) {
1037  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1039  }
1040 
1041  // Find trip count and trip multiple if count is not available
1042  unsigned TripCount = 0;
1043  unsigned MaxTripCount = 0;
1044  unsigned TripMultiple = 1;
1045  // If there are multiple exiting blocks but one of them is the latch, use the
1046  // latch for the trip count estimation. Otherwise insist on a single exiting
1047  // block for the trip count estimation.
1048  BasicBlock *ExitingBlock = L->getLoopLatch();
1049  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1050  ExitingBlock = L->getExitingBlock();
1051  if (ExitingBlock) {
1052  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1053  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1054  }
1055 
1056  // If the loop contains a convergent operation, the prelude we'd add
1057  // to do the first few instructions before we hit the unrolled loop
1058  // is unsafe -- it adds a control-flow dependency to the convergent
1059  // operation. Therefore restrict remainder loop (try unrollig without).
1060  //
1061  // TODO: This is quite conservative. In practice, convergent_op()
1062  // is likely to be called unconditionally in the loop. In this
1063  // case, the program would be ill-formed (on most architectures)
1064  // unless n were the same on all threads in a thread group.
1065  // Assuming n is the same on all threads, any kind of unrolling is
1066  // safe. But currently llvm's notion of convergence isn't powerful
1067  // enough to express this.
1068  if (Convergent)
1069  UP.AllowRemainder = false;
1070 
1071  // Try to find the trip count upper bound if we cannot find the exact trip
1072  // count.
1073  bool MaxOrZero = false;
1074  if (!TripCount) {
1075  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1076  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1077  // We can unroll by the upper bound amount if it's generally allowed or if
1078  // we know that the loop is executed either the upper bound or zero times.
1079  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1080  // loop tests remains the same compared to the non-unrolled version, whereas
1081  // the generic upper bound unrolling keeps all but the last loop test so the
1082  // number of loop tests goes up which may end up being worse on targets with
1083  // constrained branch predictor resources so is controlled by an option.)
1084  // In addition we only unroll small upper bounds.
1085  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1086  MaxTripCount = 0;
1087  }
1088  }
1089 
1090  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1091  // fully unroll the loop.
1092  bool UseUpperBound = false;
1093  bool IsCountSetExplicitly = computeUnrollCount(
1094  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1095  TripMultiple, LoopSize, UP, UseUpperBound);
1096  if (!UP.Count)
1098  // Unroll factor (Count) must be less or equal to TripCount.
1099  if (TripCount && UP.Count > TripCount)
1100  UP.Count = TripCount;
1101 
1102  // Save loop properties before it is transformed.
1103  MDNode *OrigLoopID = L->getLoopID();
1104 
1105  // Unroll the loop.
1106  Loop *RemainderLoop = nullptr;
1107  LoopUnrollResult UnrollResult = UnrollLoop(
1108  L,
1109  {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1110  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1111  ForgetAllSCEV},
1112  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1113  if (UnrollResult == LoopUnrollResult::Unmodified)
1115 
1116  if (RemainderLoop) {
1117  Optional<MDNode *> RemainderLoopID =
1120  if (RemainderLoopID.hasValue())
1121  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1122  }
1123 
1124  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1125  Optional<MDNode *> NewLoopID =
1128  if (NewLoopID.hasValue()) {
1129  L->setLoopID(NewLoopID.getValue());
1130 
1131  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1132  // explicitly.
1133  return UnrollResult;
1134  }
1135  }
1136 
1137  // If loop has an unroll count pragma or unrolled by explicitly set count
1138  // mark loop as unrolled to prevent unrolling beyond that requested.
1139  // If the loop was peeled, we already "used up" the profile information
1140  // we had, so we don't want to unroll or peel again.
1141  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1142  (IsCountSetExplicitly || UP.PeelCount))
1144 
1145  return UnrollResult;
1146 }
1147 
1148 namespace {
1149 
1150 class LoopUnroll : public LoopPass {
1151 public:
1152  static char ID; // Pass ID, replacement for typeid
1153 
1154  int OptLevel;
1155 
1156  /// If false, use a cost model to determine whether unrolling of a loop is
1157  /// profitable. If true, only loops that explicitly request unrolling via
1158  /// metadata are considered. All other loops are skipped.
1159  bool OnlyWhenForced;
1160 
1161  /// If false, when SCEV is invalidated, only forget everything in the
1162  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1163  /// Otherwise, forgetAllLoops and rebuild when needed next.
1164  bool ForgetAllSCEV;
1165 
1166  Optional<unsigned> ProvidedCount;
1167  Optional<unsigned> ProvidedThreshold;
1168  Optional<bool> ProvidedAllowPartial;
1169  Optional<bool> ProvidedRuntime;
1170  Optional<bool> ProvidedUpperBound;
1171  Optional<bool> ProvidedAllowPeeling;
1172 
1173  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1174  bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1175  Optional<unsigned> Count = None,
1176  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1177  Optional<bool> UpperBound = None,
1178  Optional<bool> AllowPeeling = None)
1179  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1180  ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1181  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1182  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1183  ProvidedAllowPeeling(AllowPeeling) {
1185  }
1186 
1187  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1188  if (skipLoop(L))
1189  return false;
1190 
1191  Function &F = *L->getHeader()->getParent();
1192 
1193  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1194  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1195  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1196  const TargetTransformInfo &TTI =
1197  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1198  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1199  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1200  // pass. Function analyses need to be preserved across loop transformations
1201  // but ORE cannot be preserved (see comment before the pass definition).
1202  OptimizationRemarkEmitter ORE(&F);
1203  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1204 
1206  L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
1207  PreserveLCSSA, OptLevel, OnlyWhenForced,
1208  ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
1209  ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
1210 
1211  if (Result == LoopUnrollResult::FullyUnrolled)
1212  LPM.markLoopAsDeleted(*L);
1213 
1214  return Result != LoopUnrollResult::Unmodified;
1215  }
1216 
1217  /// This transformation requires natural loop information & requires that
1218  /// loop preheaders be inserted into the CFG...
1219  void getAnalysisUsage(AnalysisUsage &AU) const override {
1222  // FIXME: Loop passes are required to preserve domtree, and for now we just
1223  // recreate dom info if anything gets unrolled.
1225  }
1226 };
1227 
1228 } // end anonymous namespace
1229 
1230 char LoopUnroll::ID = 0;
1231 
1232 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1236 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1237 
1238 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1239  bool ForgetAllSCEV, int Threshold, int Count,
1240  int AllowPartial, int Runtime, int UpperBound,
1241  int AllowPeeling) {
1242  // TODO: It would make more sense for this function to take the optionals
1243  // directly, but that's dangerous since it would silently break out of tree
1244  // callers.
1245  return new LoopUnroll(
1246  OptLevel, OnlyWhenForced, ForgetAllSCEV,
1247  Threshold == -1 ? None : Optional<unsigned>(Threshold),
1248  Count == -1 ? None : Optional<unsigned>(Count),
1249  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1250  Runtime == -1 ? None : Optional<bool>(Runtime),
1251  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1252  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1253 }
1254 
1255 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1256  bool ForgetAllSCEV) {
1257  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1258  0, 0, 0, 0);
1259 }
1260 
1263  LPMUpdater &Updater) {
1264  const auto &FAM =
1265  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1266  Function *F = L.getHeader()->getParent();
1267 
1268  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1269  // FIXME: This should probably be optional rather than required.
1270  if (!ORE)
1272  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1273  "cached at a higher level");
1274 
1275  // Keep track of the previous loop structure so we can identify new loops
1276  // created by unrolling.
1277  Loop *ParentL = L.getParentLoop();
1278  SmallPtrSet<Loop *, 4> OldLoops;
1279  if (ParentL)
1280  OldLoops.insert(ParentL->begin(), ParentL->end());
1281  else
1282  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1283 
1284  std::string LoopName = L.getName();
1285 
1286  bool Changed =
1287  tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1288  /*BFI*/ nullptr, /*PSI*/ nullptr,
1289  /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
1290  ForgetSCEV, /*Count*/ None,
1291  /*Threshold*/ None, /*AllowPartial*/ false,
1292  /*Runtime*/ false, /*UpperBound*/ false,
1293  /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
1294  if (!Changed)
1295  return PreservedAnalyses::all();
1296 
1297  // The parent must not be damaged by unrolling!
1298 #ifndef NDEBUG
1299  if (ParentL)
1300  ParentL->verifyLoop();
1301 #endif
1302 
1303  // Unrolling can do several things to introduce new loops into a loop nest:
1304  // - Full unrolling clones child loops within the current loop but then
1305  // removes the current loop making all of the children appear to be new
1306  // sibling loops.
1307  //
1308  // When a new loop appears as a sibling loop after fully unrolling,
1309  // its nesting structure has fundamentally changed and we want to revisit
1310  // it to reflect that.
1311  //
1312  // When unrolling has removed the current loop, we need to tell the
1313  // infrastructure that it is gone.
1314  //
1315  // Finally, we support a debugging/testing mode where we revisit child loops
1316  // as well. These are not expected to require further optimizations as either
1317  // they or the loop they were cloned from have been directly visited already.
1318  // But the debugging mode allows us to check this assumption.
1319  bool IsCurrentLoopValid = false;
1320  SmallVector<Loop *, 4> SibLoops;
1321  if (ParentL)
1322  SibLoops.append(ParentL->begin(), ParentL->end());
1323  else
1324  SibLoops.append(AR.LI.begin(), AR.LI.end());
1325  erase_if(SibLoops, [&](Loop *SibLoop) {
1326  if (SibLoop == &L) {
1327  IsCurrentLoopValid = true;
1328  return true;
1329  }
1330 
1331  // Otherwise erase the loop from the list if it was in the old loops.
1332  return OldLoops.count(SibLoop) != 0;
1333  });
1334  Updater.addSiblingLoops(SibLoops);
1335 
1336  if (!IsCurrentLoopValid) {
1337  Updater.markLoopAsDeleted(L, LoopName);
1338  } else {
1339  // We can only walk child loops if the current loop remained valid.
1341  // Walk *all* of the child loops.
1342  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1343  Updater.addChildLoops(ChildLoops);
1344  }
1345  }
1346 
1348 }
1349 
1350 template <typename RangeT>
1352  SmallVector<Loop *, 8> Worklist;
1353  // We use an internal worklist to build up the preorder traversal without
1354  // recursion.
1355  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1356 
1357  for (Loop *RootL : Loops) {
1358  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1359  assert(PreOrderWorklist.empty() &&
1360  "Must start with an empty preorder walk worklist.");
1361  PreOrderWorklist.push_back(RootL);
1362  do {
1363  Loop *L = PreOrderWorklist.pop_back_val();
1364  PreOrderWorklist.append(L->begin(), L->end());
1365  PreOrderLoops.push_back(L);
1366  } while (!PreOrderWorklist.empty());
1367 
1368  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1369  PreOrderLoops.clear();
1370  }
1371  return Worklist;
1372 }
1373 
1376  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1377  auto &LI = AM.getResult<LoopAnalysis>(F);
1378  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1379  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1380  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1382 
1383  LoopAnalysisManager *LAM = nullptr;
1384  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1385  LAM = &LAMProxy->getManager();
1386 
1387  const ModuleAnalysisManager &MAM =
1389  ProfileSummaryInfo *PSI =
1391  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1392  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1393 
1394  bool Changed = false;
1395 
1396  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1397  // Since simplification may add new inner loops, it has to run before the
1398  // legality and profitability checks. This means running the loop unroller
1399  // will simplify all loops, regardless of whether anything end up being
1400  // unrolled.
1401  for (auto &L : LI) {
1402  Changed |=
1403  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1404  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1405  }
1406 
1408 
1409  while (!Worklist.empty()) {
1410  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1411  // from back to front so that we work forward across the CFG, which
1412  // for unrolling is only needed to get optimization remarks emitted in
1413  // a forward order.
1414  Loop &L = *Worklist.pop_back_val();
1415 #ifndef NDEBUG
1416  Loop *ParentL = L.getParentLoop();
1417 #endif
1418 
1419  // Check if the profile summary indicates that the profiled application
1420  // has a huge working set size, in which case we disable peeling to avoid
1421  // bloating it further.
1422  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1423  if (PSI && PSI->hasHugeWorkingSetSize())
1424  LocalAllowPeeling = false;
1425  std::string LoopName = L.getName();
1426  // The API here is quite complex to call and we allow to select some
1427  // flavors of unrolling during construction time (by setting UnrollOpts).
1429  &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1430  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1431  UnrollOpts.ForgetSCEV, /*Count*/ None,
1432  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1433  UnrollOpts.AllowUpperBound, LocalAllowPeeling);
1434  Changed |= Result != LoopUnrollResult::Unmodified;
1435 
1436  // The parent must not be damaged by unrolling!
1437 #ifndef NDEBUG
1438  if (Result != LoopUnrollResult::Unmodified && ParentL)
1439  ParentL->verifyLoop();
1440 #endif
1441 
1442  // Clear any cached analysis results for L if we removed it completely.
1443  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1444  LAM->clear(L, LoopName);
1445  }
1446 
1447  if (!Changed)
1448  return PreservedAnalyses::all();
1449 
1451 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false)
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:244
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
cl::opt< bool > ForgetSCEVInLoopUnroll
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:47
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:409
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
void push_back(const T &Elt)
Definition: SmallVector.h:211
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:193
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:68
Metadata node.
Definition: Metadata.h:863
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:44
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if function F is suggested to be size-optimized base on the profile. ...
Definition: SizeOpts.cpp:23
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:42
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4424
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Hexagon Hardware Loops
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1156
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:102
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:225
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:471
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool hasProfileSummary()
Returns true if profile summary is available.
The loop was fully unrolled into straight-line code.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:205
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:877
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:483
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:467
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:237
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:623
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:569
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
The transformation should not be applied.
Definition: LoopUtils.h:228
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:112
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
iterator begin() const
Definition: LoopInfo.h:144
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1359
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:250
iterator begin() const
Definition: LoopInfo.h:876
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:957
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:103
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:425
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:447
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:800
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:510
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:276
TransformationMode hasUnrollTransformation(Loop *L)
Definition: LoopUtils.cpp:335
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
iterator end() const
Definition: LoopInfo.h:145
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:219
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
bool empty() const
Definition: LoopInfo.h:148
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:72
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:314
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:158
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
The optimization diagnostic interface.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53