LLVM 20.0.0git
LoopUnrollAndJamPass.cpp
Go to the documentation of this file.
1//===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements an unroll and jam pass. Most of the work is done by
10// Utils/UnrollLoopAndJam.cpp.
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringRef.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
33#include "llvm/IR/Metadata.h"
34#include "llvm/IR/PassManager.h"
37#include "llvm/Support/Debug.h"
43#include <cassert>
44#include <cstdint>
45
46namespace llvm {
47class Instruction;
48class Value;
49} // namespace llvm
50
51using namespace llvm;
52
53#define DEBUG_TYPE "loop-unroll-and-jam"
54
55/// @{
56/// Metadata attribute names
57static const char *const LLVMLoopUnrollAndJamFollowupAll =
58 "llvm.loop.unroll_and_jam.followup_all";
59static const char *const LLVMLoopUnrollAndJamFollowupInner =
60 "llvm.loop.unroll_and_jam.followup_inner";
61static const char *const LLVMLoopUnrollAndJamFollowupOuter =
62 "llvm.loop.unroll_and_jam.followup_outer";
64 "llvm.loop.unroll_and_jam.followup_remainder_inner";
66 "llvm.loop.unroll_and_jam.followup_remainder_outer";
67/// @}
68
69static cl::opt<bool>
70 AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
71 cl::desc("Allows loops to be unroll-and-jammed."));
72
74 "unroll-and-jam-count", cl::Hidden,
75 cl::desc("Use this unroll count for all loops including those with "
76 "unroll_and_jam_count pragma values, for testing purposes"));
77
79 "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
80 cl::desc("Threshold to use for inner loop when doing unroll and jam."));
81
83 "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
84 cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
85 "unroll_count pragma."));
86
87// Returns the loop hint metadata node with the given name (for example,
88// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
89// returned.
91 if (MDNode *LoopID = L->getLoopID())
92 return GetUnrollMetadata(LoopID, Name);
93 return nullptr;
94}
95
96// Returns true if the loop has any metadata starting with Prefix. For example a
97// Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
98static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
99 if (MDNode *LoopID = L->getLoopID()) {
100 // First operand should refer to the loop id itself.
101 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
102 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
103
104 for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
105 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
106 if (!MD)
107 continue;
108
109 MDString *S = dyn_cast<MDString>(MD->getOperand(0));
110 if (!S)
111 continue;
112
113 if (S->getString().starts_with(Prefix))
114 return true;
115 }
116 }
117 return false;
118}
119
120// Returns true if the loop has an unroll_and_jam(enable) pragma.
121static bool hasUnrollAndJamEnablePragma(const Loop *L) {
122 return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
123}
124
125// If loop has an unroll_and_jam_count pragma return the (necessarily
126// positive) value from the pragma. Otherwise return 0.
127static unsigned unrollAndJamCountPragmaValue(const Loop *L) {
128 MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
129 if (MD) {
130 assert(MD->getNumOperands() == 2 &&
131 "Unroll count hint metadata should have two operands.");
132 unsigned Count =
133 mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
134 assert(Count >= 1 && "Unroll count must be positive.");
135 return Count;
136 }
137 return 0;
138}
139
140// Returns loop size estimation for unrolled loop.
141static uint64_t
144 assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
145 return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
146}
147
148// Calculates unroll and jam count and writes it to UP.Count. Returns true if
149// unroll count was set explicitly.
151 Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
153 const SmallPtrSetImpl<const Value *> &EphValues,
154 OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
155 unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE,
156 unsigned InnerTripCount, unsigned InnerLoopSize,
159 unsigned OuterLoopSize = OuterUCE.getRolledLoopSize();
160 // First up use computeUnrollCount from the loop unroller to get a count
161 // for unrolling the outer loop, plus any loops requiring explicit
162 // unrolling we leave to the unroller. This uses UP.Threshold /
163 // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
164 // We have already checked that the loop has no unroll.* pragmas.
165 unsigned MaxTripCount = 0;
166 bool UseUpperBound = false;
167 bool ExplicitUnroll = computeUnrollCount(
168 L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
169 /*MaxOrZero*/ false, OuterTripMultiple, OuterUCE, UP, PP,
170 UseUpperBound);
171 if (ExplicitUnroll || UseUpperBound) {
172 // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
173 // for the unroller instead.
174 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
175 "computeUnrollCount\n");
176 UP.Count = 0;
177 return false;
178 }
179
180 // Override with any explicit Count from the "unroll-and-jam-count" option.
181 bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
182 if (UserUnrollCount) {
184 UP.Force = true;
185 if (UP.AllowRemainder &&
186 getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
187 getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
189 return true;
190 }
191
192 // Check for unroll_and_jam pragmas
193 unsigned PragmaCount = unrollAndJamCountPragmaValue(L);
194 if (PragmaCount > 0) {
195 UP.Count = PragmaCount;
196 UP.Runtime = true;
197 UP.Force = true;
198 if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
199 getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
200 getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
202 return true;
203 }
204
205 bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);
206 bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
207 bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
208
209 // If the loop has an unrolling pragma, we want to be more aggressive with
210 // unrolling limits.
211 if (ExplicitUnrollAndJam)
213
214 if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
216 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
217 "inner loop too large\n");
218 UP.Count = 0;
219 return false;
220 }
221
222 // We have a sensible limit for the outer loop, now adjust it for the inner
223 // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
224 // explicitly, we want to stick to it.
225 if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
226 while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
228 UP.Count--;
229 }
230
231 // If we are explicitly unroll and jamming, we are done. Otherwise there are a
232 // number of extra performance heuristics to check.
233 if (ExplicitUnrollAndJam)
234 return true;
235
236 // If the inner loop count is known and small, leave the entire loop nest to
237 // be the unroller
238 if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
239 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
240 "being left for the unroller\n");
241 UP.Count = 0;
242 return false;
243 }
244
245 // Check for situations where UnJ is likely to be unprofitable. Including
246 // subloops with more than 1 block.
247 if (SubLoop->getBlocks().size() != 1) {
249 dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
250 UP.Count = 0;
251 return false;
252 }
253
254 // Limit to loops where there is something to gain from unrolling and
255 // jamming the loop. In this case, look for loads that are invariant in the
256 // outer loop and can become shared.
257 unsigned NumInvariant = 0;
258 for (BasicBlock *BB : SubLoop->getBlocks()) {
259 for (Instruction &I : *BB) {
260 if (auto *Ld = dyn_cast<LoadInst>(&I)) {
261 Value *V = Ld->getPointerOperand();
262 const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
263 if (SE.isLoopInvariant(LSCEV, L))
264 NumInvariant++;
265 }
266 }
267 }
268 if (NumInvariant == 0) {
269 LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
270 UP.Count = 0;
271 return false;
272 }
273
274 return false;
275}
276
277static LoopUnrollResult
281 OptimizationRemarkEmitter &ORE, int OptLevel) {
283 L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt,
284 std::nullopt, std::nullopt, std::nullopt, std::nullopt);
286 gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt);
287
289 if (EnableMode & TM_Disable)
291 if (EnableMode & TM_ForcedByUser)
292 UP.UnrollAndJam = true;
293
294 if (AllowUnrollAndJam.getNumOccurrences() > 0)
296 if (UnrollAndJamThreshold.getNumOccurrences() > 0)
298 // Exit early if unrolling is disabled.
301
302 LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
303 << L->getHeader()->getParent()->getName() << "] Loop %"
304 << L->getHeader()->getName() << "\n");
305
306 // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
307 // the unroller, so long as it does not explicitly have unroll_and_jam
308 // metadata. This means #pragma nounroll will disable unroll and jam as well
309 // as unrolling
310 if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
311 !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
312 LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
314 }
315
316 if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {
317 LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
319 }
320
321 // Approximate the loop size and collect useful info
323 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
324 Loop *SubLoop = L->getSubLoops()[0];
325 UnrollCostEstimator InnerUCE(SubLoop, TTI, EphValues, UP.BEInsns);
326 UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);
327
328 if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {
329 LLVM_DEBUG(dbgs() << " Loop not considered unrollable\n");
331 }
332
333 unsigned InnerLoopSize = InnerUCE.getRolledLoopSize();
334 LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterUCE.getRolledLoopSize()
335 << "\n");
336 LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
337
338 if (InnerUCE.NumInlineCandidates != 0 || OuterUCE.NumInlineCandidates != 0) {
339 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
341 }
342 // FIXME: The call to canUnroll() allows some controlled convergent
343 // operations, but we block them here for future changes.
344 if (InnerUCE.Convergence != ConvergenceKind::None ||
347 dbgs() << " Not unrolling loop with convergent instructions.\n");
349 }
350
351 // Save original loop IDs for after the transformation.
352 MDNode *OrigOuterLoopID = L->getLoopID();
353 MDNode *OrigSubLoopID = SubLoop->getLoopID();
354
355 // To assign the loop id of the epilogue, assign it before unrolling it so it
356 // is applied to every inner loop of the epilogue. We later apply the loop ID
357 // for the jammed inner loop.
358 std::optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
359 OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
361 if (NewInnerEpilogueLoopID)
362 SubLoop->setLoopID(*NewInnerEpilogueLoopID);
363
364 // Find trip count and trip multiple
365 BasicBlock *Latch = L->getLoopLatch();
366 BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
367 unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
368 unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
369 unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
370
371 // Decide if, and by how much, to unroll
372 bool IsCountSetExplicitly = computeUnrollAndJamCount(
373 L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
374 OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP);
375 if (UP.Count <= 1)
377 // Unroll factor (Count) must be less or equal to TripCount.
378 if (OuterTripCount && UP.Count > OuterTripCount)
379 UP.Count = OuterTripCount;
380
381 Loop *EpilogueOuterLoop = nullptr;
382 LoopUnrollResult UnrollResult = UnrollAndJamLoop(
383 L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
384 &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
385
386 // Assign new loop attributes.
387 if (EpilogueOuterLoop) {
388 std::optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
389 OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
391 if (NewOuterEpilogueLoopID)
392 EpilogueOuterLoop->setLoopID(*NewOuterEpilogueLoopID);
393 }
394
395 std::optional<MDNode *> NewInnerLoopID =
398 if (NewInnerLoopID)
399 SubLoop->setLoopID(*NewInnerLoopID);
400 else
401 SubLoop->setLoopID(OrigSubLoopID);
402
403 if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
404 std::optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
405 OrigOuterLoopID,
407 if (NewOuterLoopID) {
408 L->setLoopID(*NewOuterLoopID);
409
410 // Do not setLoopAlreadyUnrolled if a followup was given.
411 return UnrollResult;
412 }
413 }
414
415 // If loop has an unroll count pragma or unrolled by explicitly set count
416 // mark loop as unrolled to prevent unrolling beyond that requested.
417 if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
418 L->setLoopAlreadyUnrolled();
419
420 return UnrollResult;
421}
422
424 ScalarEvolution &SE,
427 OptimizationRemarkEmitter &ORE, int OptLevel,
428 LPMUpdater &U) {
429 bool DidSomething = false;
431 Loop *OutmostLoop = &LN.getOutermostLoop();
432
433 // Add the loop nests in the reverse order of LN. See method
434 // declaration.
436 appendLoopsToWorklist(Loops, Worklist);
437 while (!Worklist.empty()) {
438 Loop *L = Worklist.pop_back_val();
439 std::string LoopName = std::string(L->getName());
440 LoopUnrollResult Result =
441 tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
442 if (Result != LoopUnrollResult::Unmodified)
443 DidSomething = true;
444 if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)
445 U.markLoopAsDeleted(*L, LoopName);
446 }
447
448 return DidSomething;
449}
450
454 LPMUpdater &U) {
455 Function &F = *LN.getParent();
456
457 DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
459
460 if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,
461 OptLevel, U))
462 return PreservedAnalyses::all();
463
465 PA.preserve<LoopNestAnalysis>();
466 return PA;
467}
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::string Name
Hexagon Hardware Loops
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static const char *const LLVMLoopUnrollAndJamFollowupInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
static const char *const LLVMLoopUnrollAndJamFollowupOuter
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
static bool hasUnrollAndJamEnablePragma(const Loop *L)
static const char *const LLVMLoopUnrollAndJamFollowupAll
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
This file provides a priority worklist.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
DependenceInfo - This class is the main dependence-analysis driver.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
This analysis provides information for a loop nest.
This class represents a loop nest and can be used to query its properties.
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:502
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Determine if the PriorityWorklist is empty or not.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Produce an estimate of the unrolled cost of the specified loop.
Definition: UnrollLoop.h:128
ConvergenceKind Convergence
Definition: UnrollLoop.h:134
bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
Definition: UnrollLoop.h:144
LLVM Value Representation.
Definition: Value.h:74
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:870
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:56
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:277
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:294
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:286
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1814
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...