LLVM  14.0.0git
LoopUnrollAndJamPass.cpp
Go to the documentation of this file.
1 //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an unroll and jam pass. Most of the work is done by
10 // Utils/UnrollLoopAndJam.cpp.
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/None.h"
16 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/LoopPass.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Metadata.h"
35 #include "llvm/IR/PassManager.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/PassRegistry.h"
39 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/Debug.h"
44 #include "llvm/Transforms/Scalar.h"
45 #include "llvm/Transforms/Utils.h"
51 #include <cassert>
52 #include <cstdint>
53 
54 namespace llvm {
55 class Instruction;
56 class Value;
57 } // namespace llvm
58 
59 using namespace llvm;
60 
61 #define DEBUG_TYPE "loop-unroll-and-jam"
62 
63 /// @{
64 /// Metadata attribute names
65 static const char *const LLVMLoopUnrollAndJamFollowupAll =
66  "llvm.loop.unroll_and_jam.followup_all";
67 static const char *const LLVMLoopUnrollAndJamFollowupInner =
68  "llvm.loop.unroll_and_jam.followup_inner";
69 static const char *const LLVMLoopUnrollAndJamFollowupOuter =
70  "llvm.loop.unroll_and_jam.followup_outer";
72  "llvm.loop.unroll_and_jam.followup_remainder_inner";
74  "llvm.loop.unroll_and_jam.followup_remainder_outer";
75 /// @}
76 
77 static cl::opt<bool>
78  AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
79  cl::desc("Allows loops to be unroll-and-jammed."));
80 
82  "unroll-and-jam-count", cl::Hidden,
83  cl::desc("Use this unroll count for all loops including those with "
84  "unroll_and_jam_count pragma values, for testing purposes"));
85 
87  "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
88  cl::desc("Threshold to use for inner loop when doing unroll and jam."));
89 
91  "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
92  cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
93  "unroll_count pragma."));
94 
95 // Returns the loop hint metadata node with the given name (for example,
96 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
97 // returned.
99  if (MDNode *LoopID = L->getLoopID())
100  return GetUnrollMetadata(LoopID, Name);
101  return nullptr;
102 }
103 
104 // Returns true if the loop has any metadata starting with Prefix. For example a
105 // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
106 static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
107  if (MDNode *LoopID = L->getLoopID()) {
108  // First operand should refer to the loop id itself.
109  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
110  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
111 
112  for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
113  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
114  if (!MD)
115  continue;
116 
117  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
118  if (!S)
119  continue;
120 
121  if (S->getString().startswith(Prefix))
122  return true;
123  }
124  }
125  return false;
126 }
127 
128 // Returns true if the loop has an unroll_and_jam(enable) pragma.
129 static bool hasUnrollAndJamEnablePragma(const Loop *L) {
130  return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
131 }
132 
133 // If loop has an unroll_and_jam_count pragma return the (necessarily
134 // positive) value from the pragma. Otherwise return 0.
135 static unsigned unrollAndJamCountPragmaValue(const Loop *L) {
136  MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
137  if (MD) {
138  assert(MD->getNumOperands() == 2 &&
139  "Unroll count hint metadata should have two operands.");
140  unsigned Count =
141  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
142  assert(Count >= 1 && "Unroll count must be positive.");
143  return Count;
144  }
145  return 0;
146 }
147 
148 // Returns loop size estimation for unrolled loop.
149 static uint64_t
150 getUnrollAndJammedLoopSize(unsigned LoopSize,
152  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
153  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
154 }
155 
156 // Calculates unroll and jam count and writes it to UP.Count. Returns true if
157 // unroll count was set explicitly.
159  Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
160  LoopInfo *LI, ScalarEvolution &SE,
161  const SmallPtrSetImpl<const Value *> &EphValues,
162  OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
163  unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
164  unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
166  // First up use computeUnrollCount from the loop unroller to get a count
167  // for unrolling the outer loop, plus any loops requiring explicit
168  // unrolling we leave to the unroller. This uses UP.Threshold /
169  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
170  // We have already checked that the loop has no unroll.* pragmas.
171  unsigned MaxTripCount = 0;
172  bool UseUpperBound = false;
173  bool ExplicitUnroll = computeUnrollCount(
174  L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
175  /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
176  UseUpperBound);
177  if (ExplicitUnroll || UseUpperBound) {
178  // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
179  // for the unroller instead.
180  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
181  "computeUnrollCount\n");
182  UP.Count = 0;
183  return false;
184  }
185 
186  // Override with any explicit Count from the "unroll-and-jam-count" option.
187  bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
188  if (UserUnrollCount) {
190  UP.Force = true;
191  if (UP.AllowRemainder &&
192  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
193  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
195  return true;
196  }
197 
198  // Check for unroll_and_jam pragmas
199  unsigned PragmaCount = unrollAndJamCountPragmaValue(L);
200  if (PragmaCount > 0) {
201  UP.Count = PragmaCount;
202  UP.Runtime = true;
203  UP.Force = true;
204  if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
205  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
206  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
208  return true;
209  }
210 
211  bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);
212  bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
213  bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
214 
215  // If the loop has an unrolling pragma, we want to be more aggressive with
216  // unrolling limits.
217  if (ExplicitUnrollAndJam)
219 
220  if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
222  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
223  "inner loop too large\n");
224  UP.Count = 0;
225  return false;
226  }
227 
228  // We have a sensible limit for the outer loop, now adjust it for the inner
229  // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
230  // explicitly, we want to stick to it.
231  if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
232  while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
234  UP.Count--;
235  }
236 
237  // If we are explicitly unroll and jamming, we are done. Otherwise there are a
238  // number of extra performance heuristics to check.
239  if (ExplicitUnrollAndJam)
240  return true;
241 
242  // If the inner loop count is known and small, leave the entire loop nest to
243  // be the unroller
244  if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
245  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
246  "being left for the unroller\n");
247  UP.Count = 0;
248  return false;
249  }
250 
251  // Check for situations where UnJ is likely to be unprofitable. Including
252  // subloops with more than 1 block.
253  if (SubLoop->getBlocks().size() != 1) {
254  LLVM_DEBUG(
255  dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
256  UP.Count = 0;
257  return false;
258  }
259 
260  // Limit to loops where there is something to gain from unrolling and
261  // jamming the loop. In this case, look for loads that are invariant in the
262  // outer loop and can become shared.
263  unsigned NumInvariant = 0;
264  for (BasicBlock *BB : SubLoop->getBlocks()) {
265  for (Instruction &I : *BB) {
266  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
267  Value *V = Ld->getPointerOperand();
268  const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
269  if (SE.isLoopInvariant(LSCEV, L))
270  NumInvariant++;
271  }
272  }
273  }
274  if (NumInvariant == 0) {
275  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
276  UP.Count = 0;
277  return false;
278  }
279 
280  return false;
281 }
282 
283 static LoopUnrollResult
287  OptimizationRemarkEmitter &ORE, int OptLevel) {
289  gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel,
290  None, None, None, None, None, None);
293 
295  if (EnableMode & TM_Disable)
297  if (EnableMode & TM_ForcedByUser)
298  UP.UnrollAndJam = true;
299 
302  if (UnrollAndJamThreshold.getNumOccurrences() > 0)
304  // Exit early if unrolling is disabled.
305  if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)
307 
308  LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
309  << L->getHeader()->getParent()->getName() << "] Loop %"
310  << L->getHeader()->getName() << "\n");
311 
312  // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
313  // the unroller, so long as it does not explicitly have unroll_and_jam
314  // metadata. This means #pragma nounroll will disable unroll and jam as well
315  // as unrolling
316  if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
317  !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
318  LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
320  }
321 
322  if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {
323  LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
325  }
326 
327  // Approximate the loop size and collect useful info
328  unsigned NumInlineCandidates;
329  bool NotDuplicatable;
330  bool Convergent;
332  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
333  Loop *SubLoop = L->getSubLoops()[0];
334  unsigned InnerLoopSize =
335  ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
336  Convergent, TTI, EphValues, UP.BEInsns);
337  unsigned OuterLoopSize =
338  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
339  TTI, EphValues, UP.BEInsns);
340  LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
341  LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
342  if (NotDuplicatable) {
343  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
344  "instructions.\n");
346  }
347  if (NumInlineCandidates != 0) {
348  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
350  }
351  if (Convergent) {
352  LLVM_DEBUG(
353  dbgs() << " Not unrolling loop with convergent instructions.\n");
355  }
356 
357  // Save original loop IDs for after the transformation.
358  MDNode *OrigOuterLoopID = L->getLoopID();
359  MDNode *OrigSubLoopID = SubLoop->getLoopID();
360 
361  // To assign the loop id of the epilogue, assign it before unrolling it so it
362  // is applied to every inner loop of the epilogue. We later apply the loop ID
363  // for the jammed inner loop.
364  Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
365  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
367  if (NewInnerEpilogueLoopID.hasValue())
368  SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
369 
370  // Find trip count and trip multiple
371  BasicBlock *Latch = L->getLoopLatch();
372  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
373  unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
374  unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
375  unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
376 
377  // Decide if, and by how much, to unroll
378  bool IsCountSetExplicitly = computeUnrollAndJamCount(
379  L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
380  OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
381  if (UP.Count <= 1)
383  // Unroll factor (Count) must be less or equal to TripCount.
384  if (OuterTripCount && UP.Count > OuterTripCount)
385  UP.Count = OuterTripCount;
386 
387  Loop *EpilogueOuterLoop = nullptr;
388  LoopUnrollResult UnrollResult = UnrollAndJamLoop(
389  L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
390  &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
391 
392  // Assign new loop attributes.
393  if (EpilogueOuterLoop) {
394  Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
395  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
397  if (NewOuterEpilogueLoopID.hasValue())
398  EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
399  }
400 
401  Optional<MDNode *> NewInnerLoopID =
404  if (NewInnerLoopID.hasValue())
405  SubLoop->setLoopID(NewInnerLoopID.getValue());
406  else
407  SubLoop->setLoopID(OrigSubLoopID);
408 
409  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
410  Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
411  OrigOuterLoopID,
413  if (NewOuterLoopID.hasValue()) {
414  L->setLoopID(NewOuterLoopID.getValue());
415 
416  // Do not setLoopAlreadyUnrolled if a followup was given.
417  return UnrollResult;
418  }
419  }
420 
421  // If loop has an unroll count pragma or unrolled by explicitly set count
422  // mark loop as unrolled to prevent unrolling beyond that requested.
423  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
425 
426  return UnrollResult;
427 }
428 
430  ScalarEvolution &SE,
431  const TargetTransformInfo &TTI,
433  OptimizationRemarkEmitter &ORE, int OptLevel,
434  LPMUpdater &U) {
435  bool DidSomething = false;
437  Loop *OutmostLoop = &LN.getOutermostLoop();
438 
439  // Add the loop nests in the reverse order of LN. See method
440  // declaration.
442  appendLoopsToWorklist(Loops, Worklist);
443  while (!Worklist.empty()) {
444  Loop *L = Worklist.pop_back_val();
445  std::string LoopName = std::string(L->getName());
446  LoopUnrollResult Result =
447  tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
448  if (Result != LoopUnrollResult::Unmodified)
449  DidSomething = true;
450  if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)
451  U.markLoopAsDeleted(*L, LoopName);
452  }
453 
454  return DidSomething;
455 }
456 
457 namespace {
458 
459 class LoopUnrollAndJam : public LoopPass {
460 public:
461  static char ID; // Pass ID, replacement for typeid
462  unsigned OptLevel;
463 
464  LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
466  }
467 
468  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
469  if (skipLoop(L))
470  return false;
471 
472  auto *F = L->getHeader()->getParent();
473  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
474  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
475  auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
476  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
477  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
478  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
479  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
480 
482  tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
483 
484  if (Result == LoopUnrollResult::FullyUnrolled)
485  LPM.markLoopAsDeleted(*L);
486 
488  }
489 
490  /// This transformation requires natural loop information & requires that
491  /// loop preheaders be inserted into the CFG...
492  void getAnalysisUsage(AnalysisUsage &AU) const override {
501  }
502 };
503 
504 } // end anonymous namespace
505 
506 char LoopUnrollAndJam::ID = 0;
507 
508 INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
509  "Unroll and Jam loops", false, false)
513 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
514 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
520 INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
521  "Unroll and Jam loops", false, false)
522 
524  return new LoopUnrollAndJam(OptLevel);
525 }
526 
530  LPMUpdater &U) {
531  Function &F = *LN.getParent();
532 
533  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
535 
536  if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,
537  OptLevel, U))
538  return PreservedAnalyses::all();
539 
540  auto PA = getLoopPassPreservedAnalyses();
541  PA.preserve<LoopNestAnalysis>();
542  return PA;
543 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:478
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:485
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
LoopSimplify.h
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:54
Optional.h
Metadata.h
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
hasUnrollAndJamEnablePragma
static bool hasUnrollAndJamEnablePragma(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:129
UnrollAndJamCount
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:979
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:7215
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
Pass.h
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
LCSSA.h
llvm::UnrollAndJamLoop
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
Definition: LoopUnrollAndJam.cpp:225
LLVMLoopUnrollAndJamFollowupRemainderInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
Definition: LoopUnrollAndJamPass.cpp:71
LLVMLoopUnrollAndJamFollowupOuter
static const char *const LLVMLoopUnrollAndJamFollowupOuter
Definition: LoopUnrollAndJamPass.cpp:69
computeUnrollAndJamCount
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
Definition: LoopUnrollAndJamPass.cpp:158
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:271
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:504
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:497
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:149
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:462
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ScalarEvolution.h
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:52
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:667
llvm::Optional
Definition: APInt.h:33
LoopUnrollAndJamPass.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:894
AllowUnrollAndJam
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:55
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
PassRegistry.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
getUnrollMetadataForLoop
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition: LoopUnrollAndJamPass.cpp:98
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1113
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
LoopAnalysisManager.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
PriorityWorklist.h
CommandLine.h
CodeMetrics.h
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:539
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:826
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVMLoopUnrollAndJamFollowupInner
static const char *const LLVMLoopUnrollAndJamFollowupInner
Definition: LoopUnrollAndJamPass.cpp:67
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:493
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: PriorityWorklist.h:154
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
false
Definition: StackSlotColoring.cpp:142
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:499
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
tryToUnrollAndJamLoop
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
Definition: LoopUnrollAndJamPass.cpp:284
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
LoopUtils.h
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2067
llvm::LPPassManager
Definition: LoopPass.h:75
SmallPtrSet.h
Utils.h
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:866
llvm::None
const NoneType None
Definition: None.h:23
llvm::LoopNestAnalysis
This analysis provides information for a loop nest.
Definition: LoopNestAnalysis.h:179
loops
loop unroll and Unroll and Jam loops
Definition: LoopUnrollAndJamPass.cpp:521
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7179
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1107
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
uint64_t
llvm::LoopPass
Definition: LoopPass.h:27
llvm::hasUnrollAndJamTransformation
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:381
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:249
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:256
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53
ArrayRef.h
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:68
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
getUnrollAndJammedLoopSize
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollAndJamPass.cpp:150
llvm::TM_Disable
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:275
llvm::initializeLoopUnrollAndJamPass
void initializeLoopUnrollAndJamPass(PassRegistry &)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:269
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
llvm::LoopNest::getOutermostLoop
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Definition: LoopNestAnalysis.h:80
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TM_ForcedByUser
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:283
LoopPass.h
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1429
UnrollAndJamThreshold
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
Compiler.h
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:56
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:153
LLVMLoopUnrollAndJamFollowupRemainderOuter
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
Definition: LoopUnrollAndJamPass.cpp:73
hasAnyUnrollPragma
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
Definition: LoopUnrollAndJamPass.cpp:106
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::isSafeToUnrollAndJam
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
Definition: LoopUnrollAndJam.cpp:867
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12612
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:527
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:113
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:59
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:503
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:487
Casting.h
Function.h
LLVMLoopUnrollAndJamFollowupAll
static const char *const LLVMLoopUnrollAndJamFollowupAll
Definition: LoopUnrollAndJamPass.cpp:65
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LoopUnrollAndJamPass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopUnrollAndJamPass.cpp:527
PassManager.h
PragmaUnrollAndJamThreshold
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:57
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:53
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:109
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
jam
loop unroll and jam
Definition: LoopUnrollAndJamPass.cpp:520
Dominators.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:436
UnrollLoop.h
TargetTransformInfo.h
unrollAndJamCountPragmaValue
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:135
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
llvm::cl::desc
Definition: CommandLine.h:414
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
LoopPeel.h
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
InitializePasses.h
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:8727
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TransformationMode
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:266
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:612
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_END(LoopUnrollAndJam
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::createLoopUnrollAndJamPass
Pass * createLoopUnrollAndJamPass(int OptLevel=2)
Definition: LoopUnrollAndJamPass.cpp:523
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37