LLVM  13.0.0git
LoopUnrollAndJamPass.cpp
Go to the documentation of this file.
1 //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an unroll and jam pass. Most of the work is done by
10 // Utils/UnrollLoopAndJam.cpp.
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/None.h"
16 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Metadata.h"
34 #include "llvm/IR/PassManager.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/PassRegistry.h"
38 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/Debug.h"
43 #include "llvm/Transforms/Scalar.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <vector>
51 
52 namespace llvm {
53 class Instruction;
54 class Value;
55 } // namespace llvm
56 
57 using namespace llvm;
58 
59 #define DEBUG_TYPE "loop-unroll-and-jam"
60 
61 /// @{
62 /// Metadata attribute names
63 static const char *const LLVMLoopUnrollAndJamFollowupAll =
64  "llvm.loop.unroll_and_jam.followup_all";
65 static const char *const LLVMLoopUnrollAndJamFollowupInner =
66  "llvm.loop.unroll_and_jam.followup_inner";
67 static const char *const LLVMLoopUnrollAndJamFollowupOuter =
68  "llvm.loop.unroll_and_jam.followup_outer";
70  "llvm.loop.unroll_and_jam.followup_remainder_inner";
72  "llvm.loop.unroll_and_jam.followup_remainder_outer";
73 /// @}
74 
75 static cl::opt<bool>
76  AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
77  cl::desc("Allows loops to be unroll-and-jammed."));
78 
80  "unroll-and-jam-count", cl::Hidden,
81  cl::desc("Use this unroll count for all loops including those with "
82  "unroll_and_jam_count pragma values, for testing purposes"));
83 
85  "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
86  cl::desc("Threshold to use for inner loop when doing unroll and jam."));
87 
89  "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
90  cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
91  "unroll_count pragma."));
92 
93 // Returns the loop hint metadata node with the given name (for example,
94 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
95 // returned.
97  if (MDNode *LoopID = L->getLoopID())
98  return GetUnrollMetadata(LoopID, Name);
99  return nullptr;
100 }
101 
102 // Returns true if the loop has any metadata starting with Prefix. For example a
103 // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
104 static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
105  if (MDNode *LoopID = L->getLoopID()) {
106  // First operand should refer to the loop id itself.
107  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
108  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
109 
110  for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
111  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
112  if (!MD)
113  continue;
114 
115  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
116  if (!S)
117  continue;
118 
119  if (S->getString().startswith(Prefix))
120  return true;
121  }
122  }
123  return false;
124 }
125 
126 // Returns true if the loop has an unroll_and_jam(enable) pragma.
127 static bool hasUnrollAndJamEnablePragma(const Loop *L) {
128  return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
129 }
130 
131 // If loop has an unroll_and_jam_count pragma return the (necessarily
132 // positive) value from the pragma. Otherwise return 0.
133 static unsigned unrollAndJamCountPragmaValue(const Loop *L) {
134  MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
135  if (MD) {
136  assert(MD->getNumOperands() == 2 &&
137  "Unroll count hint metadata should have two operands.");
138  unsigned Count =
139  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
140  assert(Count >= 1 && "Unroll count must be positive.");
141  return Count;
142  }
143  return 0;
144 }
145 
146 // Returns loop size estimation for unrolled loop.
147 static uint64_t
148 getUnrollAndJammedLoopSize(unsigned LoopSize,
150  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
151  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
152 }
153 
154 // Calculates unroll and jam count and writes it to UP.Count. Returns true if
155 // unroll count was set explicitly.
157  Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
158  LoopInfo *LI, ScalarEvolution &SE,
159  const SmallPtrSetImpl<const Value *> &EphValues,
160  OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
161  unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
162  unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
164  // First up use computeUnrollCount from the loop unroller to get a count
165  // for unrolling the outer loop, plus any loops requiring explicit
166  // unrolling we leave to the unroller. This uses UP.Threshold /
167  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
168  // We have already checked that the loop has no unroll.* pragmas.
169  unsigned MaxTripCount = 0;
170  bool UseUpperBound = false;
171  bool ExplicitUnroll = computeUnrollCount(
172  L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
173  /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
174  UseUpperBound);
175  if (ExplicitUnroll || UseUpperBound) {
176  // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
177  // for the unroller instead.
178  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
179  "computeUnrollCount\n");
180  UP.Count = 0;
181  return false;
182  }
183 
184  // Override with any explicit Count from the "unroll-and-jam-count" option.
185  bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
186  if (UserUnrollCount) {
188  UP.Force = true;
189  if (UP.AllowRemainder &&
190  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
191  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
193  return true;
194  }
195 
196  // Check for unroll_and_jam pragmas
197  unsigned PragmaCount = unrollAndJamCountPragmaValue(L);
198  if (PragmaCount > 0) {
199  UP.Count = PragmaCount;
200  UP.Runtime = true;
201  UP.Force = true;
202  if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
203  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
204  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
206  return true;
207  }
208 
209  bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);
210  bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
211  bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
212 
213  // If the loop has an unrolling pragma, we want to be more aggressive with
214  // unrolling limits.
215  if (ExplicitUnrollAndJam)
217 
218  if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
220  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
221  "inner loop too large\n");
222  UP.Count = 0;
223  return false;
224  }
225 
226  // We have a sensible limit for the outer loop, now adjust it for the inner
227  // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
228  // explicitly, we want to stick to it.
229  if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
230  while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
232  UP.Count--;
233  }
234 
235  // If we are explicitly unroll and jamming, we are done. Otherwise there are a
236  // number of extra performance heuristics to check.
237  if (ExplicitUnrollAndJam)
238  return true;
239 
240  // If the inner loop count is known and small, leave the entire loop nest to
241  // be the unroller
242  if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
243  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
244  "being left for the unroller\n");
245  UP.Count = 0;
246  return false;
247  }
248 
249  // Check for situations where UnJ is likely to be unprofitable. Including
250  // subloops with more than 1 block.
251  if (SubLoop->getBlocks().size() != 1) {
252  LLVM_DEBUG(
253  dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
254  UP.Count = 0;
255  return false;
256  }
257 
258  // Limit to loops where there is something to gain from unrolling and
259  // jamming the loop. In this case, look for loads that are invariant in the
260  // outer loop and can become shared.
261  unsigned NumInvariant = 0;
262  for (BasicBlock *BB : SubLoop->getBlocks()) {
263  for (Instruction &I : *BB) {
264  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
265  Value *V = Ld->getPointerOperand();
266  const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
267  if (SE.isLoopInvariant(LSCEV, L))
268  NumInvariant++;
269  }
270  }
271  }
272  if (NumInvariant == 0) {
273  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
274  UP.Count = 0;
275  return false;
276  }
277 
278  return false;
279 }
280 
281 static LoopUnrollResult
285  OptimizationRemarkEmitter &ORE, int OptLevel) {
287  gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
288  None, None, None, None, None);
291 
293  if (EnableMode & TM_Disable)
295  if (EnableMode & TM_ForcedByUser)
296  UP.UnrollAndJam = true;
297 
300  if (UnrollAndJamThreshold.getNumOccurrences() > 0)
302  // Exit early if unrolling is disabled.
303  if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)
305 
306  LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
307  << L->getHeader()->getParent()->getName() << "] Loop %"
308  << L->getHeader()->getName() << "\n");
309 
310  // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
311  // the unroller, so long as it does not explicitly have unroll_and_jam
312  // metadata. This means #pragma nounroll will disable unroll and jam as well
313  // as unrolling
314  if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
315  !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
316  LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
318  }
319 
320  if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {
321  LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
323  }
324 
325  // Approximate the loop size and collect useful info
326  unsigned NumInlineCandidates;
327  bool NotDuplicatable;
328  bool Convergent;
330  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
331  Loop *SubLoop = L->getSubLoops()[0];
332  unsigned InnerLoopSize =
333  ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
334  Convergent, TTI, EphValues, UP.BEInsns);
335  unsigned OuterLoopSize =
336  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
337  TTI, EphValues, UP.BEInsns);
338  LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
339  LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
340  if (NotDuplicatable) {
341  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
342  "instructions.\n");
344  }
345  if (NumInlineCandidates != 0) {
346  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
348  }
349  if (Convergent) {
350  LLVM_DEBUG(
351  dbgs() << " Not unrolling loop with convergent instructions.\n");
353  }
354 
355  // Save original loop IDs for after the transformation.
356  MDNode *OrigOuterLoopID = L->getLoopID();
357  MDNode *OrigSubLoopID = SubLoop->getLoopID();
358 
359  // To assign the loop id of the epilogue, assign it before unrolling it so it
360  // is applied to every inner loop of the epilogue. We later apply the loop ID
361  // for the jammed inner loop.
362  Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
363  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
365  if (NewInnerEpilogueLoopID.hasValue())
366  SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
367 
368  // Find trip count and trip multiple
369  BasicBlock *Latch = L->getLoopLatch();
370  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
371  unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
372  unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
373  unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
374 
375  // Decide if, and by how much, to unroll
376  bool IsCountSetExplicitly = computeUnrollAndJamCount(
377  L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
378  OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
379  if (UP.Count <= 1)
381  // Unroll factor (Count) must be less or equal to TripCount.
382  if (OuterTripCount && UP.Count > OuterTripCount)
383  UP.Count = OuterTripCount;
384 
385  Loop *EpilogueOuterLoop = nullptr;
386  LoopUnrollResult UnrollResult = UnrollAndJamLoop(
387  L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
388  &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
389 
390  // Assign new loop attributes.
391  if (EpilogueOuterLoop) {
392  Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
393  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
395  if (NewOuterEpilogueLoopID.hasValue())
396  EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
397  }
398 
399  Optional<MDNode *> NewInnerLoopID =
402  if (NewInnerLoopID.hasValue())
403  SubLoop->setLoopID(NewInnerLoopID.getValue());
404  else
405  SubLoop->setLoopID(OrigSubLoopID);
406 
407  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
408  Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
409  OrigOuterLoopID,
411  if (NewOuterLoopID.hasValue()) {
412  L->setLoopID(NewOuterLoopID.getValue());
413 
414  // Do not setLoopAlreadyUnrolled if a followup was given.
415  return UnrollResult;
416  }
417  }
418 
419  // If loop has an unroll count pragma or unrolled by explicitly set count
420  // mark loop as unrolled to prevent unrolling beyond that requested.
421  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
423 
424  return UnrollResult;
425 }
426 
428  ScalarEvolution &SE,
429  const TargetTransformInfo &TTI,
432  int OptLevel) {
433  bool DidSomething = false;
434 
435  // The loop unroll and jam pass requires loops to be in simplified form, and
436  // also needs LCSSA. Since simplification may add new inner loops, it has to
437  // run before the legality and profitability checks. This means running the
438  // loop unroll and jam pass will simplify all loops, regardless of whether
439  // anything end up being unroll and jammed.
440  for (auto &L : LI) {
441  DidSomething |=
442  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
443  DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE);
444  }
445 
446  // Add the loop nests in the reverse order of LoopInfo. See method
447  // declaration.
449  appendLoopsToWorklist(LI, Worklist);
450  while (!Worklist.empty()) {
451  Loop *L = Worklist.pop_back_val();
452  LoopUnrollResult Result =
453  tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
454  if (Result != LoopUnrollResult::Unmodified)
455  DidSomething = true;
456  }
457 
458  return DidSomething;
459 }
460 
461 namespace {
462 
463 class LoopUnrollAndJam : public FunctionPass {
464 public:
465  static char ID; // Pass ID, replacement for typeid
466  unsigned OptLevel;
467 
468  LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) {
470  }
471 
472  bool runOnFunction(Function &F) override {
473  if (skipFunction(F))
474  return false;
475 
476  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
477  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
478  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
479  const TargetTransformInfo &TTI =
480  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
481  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
482  auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
483  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
484 
485  return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
486  }
487 
488  /// This transformation requires natural loop information & requires that
489  /// loop preheaders be inserted into the CFG...
490  void getAnalysisUsage(AnalysisUsage &AU) const override {
498  }
499 };
500 
501 } // end anonymous namespace
502 
503 char LoopUnrollAndJam::ID = 0;
504 
505 INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
506  "Unroll and Jam loops", false, false)
514 INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
515  "Unroll and Jam loops", false, false)
516 
518  return new LoopUnrollAndJam(OptLevel);
519 }
520 
524  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
531 
532  if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel))
533  return PreservedAnalyses::all();
534 
536 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:473
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2320
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:480
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2105
llvm
Definition: AllocatorList.h:23
LoopSimplify.h
Optional.h
Metadata.h
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
hasUnrollAndJamEnablePragma
static bool hasUnrollAndJamEnablePragma(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:127
UnrollAndJamCount
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:979
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:7079
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
Pass.h
llvm::UnrollAndJamLoop
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
Definition: LoopUnrollAndJam.cpp:225
LLVMLoopUnrollAndJamFollowupRemainderInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
Definition: LoopUnrollAndJamPass.cpp:69
LLVMLoopUnrollAndJamFollowupOuter
static const char *const LLVMLoopUnrollAndJamFollowupOuter
Definition: LoopUnrollAndJamPass.cpp:67
computeUnrollAndJamCount
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
Definition: LoopUnrollAndJamPass.cpp:156
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:328
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:499
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:492
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:457
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
ScalarEvolution.h
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1258
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:656
llvm::Optional
Definition: APInt.h:33
llvm::formLCSSARecursively
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:403
LoopUnrollAndJamPass.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
AllowUnrollAndJam
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
PassRegistry.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
getUnrollMetadataForLoop
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition: LoopUnrollAndJamPass.cpp:96
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
LoopAnalysisManager.h
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
PriorityWorklist.h
CommandLine.h
CodeMetrics.h
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:539
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:857
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVMLoopUnrollAndJamFollowupInner
static const char *const LLVMLoopUnrollAndJamFollowupInner
Definition: LoopUnrollAndJamPass.cpp:65
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:488
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: PriorityWorklist.h:154
false
Definition: StackSlotColoring.cpp:142
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:494
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
tryToUnrollAndJamLoop
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
Definition: LoopUnrollAndJamPass.cpp:282
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
LoopUtils.h
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2135
SmallPtrSet.h
llvm::None
const NoneType None
Definition: None.h:23
loops
loop unroll and Unroll and Jam loops
Definition: LoopUnrollAndJamPass.cpp:515
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:7043
llvm::LoopUnrollAndJamPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopUnrollAndJamPass.cpp:521
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1102
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:140
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:272
llvm::hasUnrollAndJamTransformation
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:442
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2376
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:256
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:423
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53
ArrayRef.h
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:68
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
getUnrollAndJammedLoopSize
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollAndJamPass.cpp:148
llvm::TM_Disable
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:280
llvm::initializeLoopUnrollAndJamPass
void initializeLoopUnrollAndJamPass(PassRegistry &)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::LoopInfo
Definition: LoopInfo.h:1080
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TM_ForcedByUser
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:288
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1532
UnrollAndJamThreshold
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
Compiler.h
llvm::DependenceAnalysis
AnalysisPass to compute dependence information in a function.
Definition: DependenceAnalysis.h:957
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
LLVMLoopUnrollAndJamFollowupRemainderOuter
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
Definition: LoopUnrollAndJamPass.cpp:71
hasAnyUnrollPragma
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
Definition: LoopUnrollAndJamPass.cpp:104
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:294
llvm::isSafeToUnrollAndJam
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
Definition: LoopUnrollAndJam.cpp:867
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12652
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:764
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:527
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:503
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:482
Casting.h
Function.h
LLVMLoopUnrollAndJamFollowupAll
static const char *const LLVMLoopUnrollAndJamFollowupAll
Definition: LoopUnrollAndJamPass.cpp:63
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
PassManager.h
llvm::simplifyLoop
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Definition: LoopSimplify.cpp:719
PragmaUnrollAndJamThreshold
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
jam
loop unroll and jam
Definition: LoopUnrollAndJamPass.cpp:514
Dominators.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:431
UnrollLoop.h
TargetTransformInfo.h
unrollAndJamCountPragmaValue
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:133
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
llvm::cl::desc
Definition: CommandLine.h:414
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
LoopPeel.h
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
InitializePasses.h
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:8570
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TransformationMode
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:271
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:624
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_END(LoopUnrollAndJam
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::createLoopUnrollAndJamPass
Pass * createLoopUnrollAndJamPass(int OptLevel=2)
Definition: LoopUnrollAndJamPass.cpp:517
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1233
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38