File: | llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |
Warning: | line 987, column 5 Value stored to 'TripCount' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass implements a simple loop unroller. It works best when loops have |
10 | // been canonicalized by the -indvars pass, allowing it to determine the trip |
11 | // counts of loops easily. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/Scalar/LoopUnrollPass.h" |
15 | #include "llvm/ADT/DenseMap.h" |
16 | #include "llvm/ADT/DenseMapInfo.h" |
17 | #include "llvm/ADT/DenseSet.h" |
18 | #include "llvm/ADT/None.h" |
19 | #include "llvm/ADT/Optional.h" |
20 | #include "llvm/ADT/STLExtras.h" |
21 | #include "llvm/ADT/SetVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | #include "llvm/Analysis/AssumptionCache.h" |
26 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
27 | #include "llvm/Analysis/CodeMetrics.h" |
28 | #include "llvm/Analysis/LazyBlockFrequencyInfo.h" |
29 | #include "llvm/Analysis/LoopAnalysisManager.h" |
30 | #include "llvm/Analysis/LoopInfo.h" |
31 | #include "llvm/Analysis/LoopPass.h" |
32 | #include "llvm/Analysis/LoopUnrollAnalyzer.h" |
33 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
34 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
35 | #include "llvm/Analysis/ScalarEvolution.h" |
36 | #include "llvm/Analysis/TargetTransformInfo.h" |
37 | #include "llvm/IR/BasicBlock.h" |
38 | #include "llvm/IR/CFG.h" |
39 | #include "llvm/IR/Constant.h" |
40 | #include "llvm/IR/Constants.h" |
41 | #include "llvm/IR/DiagnosticInfo.h" |
42 | #include "llvm/IR/Dominators.h" |
43 | #include "llvm/IR/Function.h" |
44 | #include "llvm/IR/Instruction.h" |
45 | #include "llvm/IR/Instructions.h" |
46 | #include "llvm/IR/IntrinsicInst.h" |
47 | #include "llvm/IR/Metadata.h" |
48 | #include "llvm/IR/PassManager.h" |
49 | #include "llvm/InitializePasses.h" |
50 | #include "llvm/Pass.h" |
51 | #include "llvm/Support/Casting.h" |
52 | #include "llvm/Support/CommandLine.h" |
53 | #include "llvm/Support/Debug.h" |
54 | #include "llvm/Support/ErrorHandling.h" |
55 | #include "llvm/Support/raw_ostream.h" |
56 | #include "llvm/Transforms/Scalar.h" |
57 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
58 | #include "llvm/Transforms/Utils.h" |
59 | #include "llvm/Transforms/Utils/LoopPeel.h" |
60 | #include "llvm/Transforms/Utils/LoopSimplify.h" |
61 | #include "llvm/Transforms/Utils/LoopUtils.h" |
62 | #include "llvm/Transforms/Utils/SizeOpts.h" |
63 | #include "llvm/Transforms/Utils/UnrollLoop.h" |
64 | #include <algorithm> |
65 | #include <cassert> |
66 | #include <cstdint> |
67 | #include <limits> |
68 | #include <string> |
69 | #include <tuple> |
70 | #include <utility> |
71 | |
72 | using namespace llvm; |
73 | |
74 | #define DEBUG_TYPE"loop-unroll" "loop-unroll" |
75 | |
76 | cl::opt<bool> llvm::ForgetSCEVInLoopUnroll( |
77 | "forget-scev-loop-unroll", cl::init(false), cl::Hidden, |
78 | cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just" |
79 | " the current top-most loop. This is sometimes preferred to reduce" |
80 | " compile time.")); |
81 | |
82 | static cl::opt<unsigned> |
83 | UnrollThreshold("unroll-threshold", cl::Hidden, |
84 | cl::desc("The cost threshold for loop unrolling")); |
85 | |
86 | static cl::opt<unsigned> |
87 | UnrollOptSizeThreshold( |
88 | "unroll-optsize-threshold", cl::init(0), cl::Hidden, |
89 | cl::desc("The cost threshold for loop unrolling when optimizing for " |
90 | "size")); |
91 | |
92 | static cl::opt<unsigned> UnrollPartialThreshold( |
93 | "unroll-partial-threshold", cl::Hidden, |
94 | cl::desc("The cost threshold for partial loop unrolling")); |
95 | |
96 | static cl::opt<unsigned> UnrollMaxPercentThresholdBoost( |
97 | "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, |
98 | cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " |
99 | "to the threshold when aggressively unrolling a loop due to the " |
100 | "dynamic cost savings. If completely unrolling a loop will reduce " |
101 | "the total runtime from X to Y, we boost the loop unroll " |
102 | "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " |
103 | "X/Y). This limit avoids excessive code bloat.")); |
104 | |
105 | static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze( |
106 | "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, |
107 | cl::desc("Don't allow loop unrolling to simulate more than this number of" |
108 | "iterations when checking full unroll profitability")); |
109 | |
110 | static cl::opt<unsigned> UnrollCount( |
111 | "unroll-count", cl::Hidden, |
112 | cl::desc("Use this unroll count for all loops including those with " |
113 | "unroll_count pragma values, for testing purposes")); |
114 | |
115 | static cl::opt<unsigned> UnrollMaxCount( |
116 | "unroll-max-count", cl::Hidden, |
117 | cl::desc("Set the max unroll count for partial and runtime unrolling, for" |
118 | "testing purposes")); |
119 | |
120 | static cl::opt<unsigned> UnrollFullMaxCount( |
121 | "unroll-full-max-count", cl::Hidden, |
122 | cl::desc( |
123 | "Set the max unroll count for full unrolling, for testing purposes")); |
124 | |
125 | static cl::opt<bool> |
126 | UnrollAllowPartial("unroll-allow-partial", cl::Hidden, |
127 | cl::desc("Allows loops to be partially unrolled until " |
128 | "-unroll-threshold loop size is reached.")); |
129 | |
130 | static cl::opt<bool> UnrollAllowRemainder( |
131 | "unroll-allow-remainder", cl::Hidden, |
132 | cl::desc("Allow generation of a loop remainder (extra iterations) " |
133 | "when unrolling a loop.")); |
134 | |
135 | static cl::opt<bool> |
136 | UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, |
137 | cl::desc("Unroll loops with run-time trip counts")); |
138 | |
139 | static cl::opt<unsigned> UnrollMaxUpperBound( |
140 | "unroll-max-upperbound", cl::init(8), cl::Hidden, |
141 | cl::desc( |
142 | "The max of trip count upper bound that is considered in unrolling")); |
143 | |
144 | static cl::opt<unsigned> PragmaUnrollThreshold( |
145 | "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden, |
146 | cl::desc("Unrolled size limit for loops with an unroll(full) or " |
147 | "unroll_count pragma.")); |
148 | |
149 | static cl::opt<unsigned> FlatLoopTripCountThreshold( |
150 | "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, |
151 | cl::desc("If the runtime tripcount for the loop is lower than the " |
152 | "threshold, the loop is considered as flat and will be less " |
153 | "aggressively unrolled.")); |
154 | |
155 | static cl::opt<bool> UnrollUnrollRemainder( |
156 | "unroll-remainder", cl::Hidden, |
157 | cl::desc("Allow the loop remainder to be unrolled.")); |
158 | |
159 | // This option isn't ever intended to be enabled, it serves to allow |
160 | // experiments to check the assumptions about when this kind of revisit is |
161 | // necessary. |
162 | static cl::opt<bool> UnrollRevisitChildLoops( |
163 | "unroll-revisit-child-loops", cl::Hidden, |
164 | cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " |
165 | "This shouldn't typically be needed as child loops (or their " |
166 | "clones) were already visited.")); |
167 | |
168 | static cl::opt<unsigned> UnrollThresholdAggressive( |
169 | "unroll-threshold-aggressive", cl::init(300), cl::Hidden, |
170 | cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " |
171 | "optimizations")); |
172 | static cl::opt<unsigned> |
173 | UnrollThresholdDefault("unroll-threshold-default", cl::init(150), |
174 | cl::Hidden, |
175 | cl::desc("Default threshold (max size of unrolled " |
176 | "loop), used in all but O3 optimizations")); |
177 | |
178 | /// A magic value for use with the Threshold parameter to indicate |
179 | /// that the loop unroll should be performed regardless of how much |
180 | /// code expansion would result. |
181 | static const unsigned NoThreshold = std::numeric_limits<unsigned>::max(); |
182 | |
183 | /// Gather the various unrolling parameters based on the defaults, compiler |
184 | /// flags, TTI overrides and user specified parameters. |
185 | TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( |
186 | Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, |
187 | BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, |
188 | OptimizationRemarkEmitter &ORE, int OptLevel, |
189 | Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, |
190 | Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, |
191 | Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) { |
192 | TargetTransformInfo::UnrollingPreferences UP; |
193 | |
194 | // Set up the defaults |
195 | UP.Threshold = |
196 | OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault; |
197 | UP.MaxPercentThresholdBoost = 400; |
198 | UP.OptSizeThreshold = UnrollOptSizeThreshold; |
199 | UP.PartialThreshold = 150; |
200 | UP.PartialOptSizeThreshold = UnrollOptSizeThreshold; |
201 | UP.Count = 0; |
202 | UP.DefaultUnrollRuntimeCount = 8; |
203 | UP.MaxCount = std::numeric_limits<unsigned>::max(); |
204 | UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max(); |
205 | UP.BEInsns = 2; |
206 | UP.Partial = false; |
207 | UP.Runtime = false; |
208 | UP.AllowRemainder = true; |
209 | UP.UnrollRemainder = false; |
210 | UP.AllowExpensiveTripCount = false; |
211 | UP.Force = false; |
212 | UP.UpperBound = false; |
213 | UP.UnrollAndJam = false; |
214 | UP.UnrollAndJamInnerLoopThreshold = 60; |
215 | UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; |
216 | |
217 | // Override with any target specific settings |
218 | TTI.getUnrollingPreferences(L, SE, UP, &ORE); |
219 | |
220 | // Apply size attributes |
221 | bool OptForSize = L->getHeader()->getParent()->hasOptSize() || |
222 | // Let unroll hints / pragmas take precedence over PGSO. |
223 | (hasUnrollTransformation(L) != TM_ForcedByUser && |
224 | llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, |
225 | PGSOQueryType::IRPass)); |
226 | if (OptForSize) { |
227 | UP.Threshold = UP.OptSizeThreshold; |
228 | UP.PartialThreshold = UP.PartialOptSizeThreshold; |
229 | UP.MaxPercentThresholdBoost = 100; |
230 | } |
231 | |
232 | // Apply any user values specified by cl::opt |
233 | if (UnrollThreshold.getNumOccurrences() > 0) |
234 | UP.Threshold = UnrollThreshold; |
235 | if (UnrollPartialThreshold.getNumOccurrences() > 0) |
236 | UP.PartialThreshold = UnrollPartialThreshold; |
237 | if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0) |
238 | UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost; |
239 | if (UnrollMaxCount.getNumOccurrences() > 0) |
240 | UP.MaxCount = UnrollMaxCount; |
241 | if (UnrollFullMaxCount.getNumOccurrences() > 0) |
242 | UP.FullUnrollMaxCount = UnrollFullMaxCount; |
243 | if (UnrollAllowPartial.getNumOccurrences() > 0) |
244 | UP.Partial = UnrollAllowPartial; |
245 | if (UnrollAllowRemainder.getNumOccurrences() > 0) |
246 | UP.AllowRemainder = UnrollAllowRemainder; |
247 | if (UnrollRuntime.getNumOccurrences() > 0) |
248 | UP.Runtime = UnrollRuntime; |
249 | if (UnrollMaxUpperBound == 0) |
250 | UP.UpperBound = false; |
251 | if (UnrollUnrollRemainder.getNumOccurrences() > 0) |
252 | UP.UnrollRemainder = UnrollUnrollRemainder; |
253 | if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0) |
254 | UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; |
255 | |
256 | // Apply user values provided by argument |
257 | if (UserThreshold.hasValue()) { |
258 | UP.Threshold = *UserThreshold; |
259 | UP.PartialThreshold = *UserThreshold; |
260 | } |
261 | if (UserCount.hasValue()) |
262 | UP.Count = *UserCount; |
263 | if (UserAllowPartial.hasValue()) |
264 | UP.Partial = *UserAllowPartial; |
265 | if (UserRuntime.hasValue()) |
266 | UP.Runtime = *UserRuntime; |
267 | if (UserUpperBound.hasValue()) |
268 | UP.UpperBound = *UserUpperBound; |
269 | if (UserFullUnrollMaxCount.hasValue()) |
270 | UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; |
271 | |
272 | return UP; |
273 | } |
274 | |
275 | namespace { |
276 | |
277 | /// A struct to densely store the state of an instruction after unrolling at |
278 | /// each iteration. |
279 | /// |
280 | /// This is designed to work like a tuple of <Instruction *, int> for the |
281 | /// purposes of hashing and lookup, but to be able to associate two boolean |
282 | /// states with each key. |
283 | struct UnrolledInstState { |
284 | Instruction *I; |
285 | int Iteration : 30; |
286 | unsigned IsFree : 1; |
287 | unsigned IsCounted : 1; |
288 | }; |
289 | |
290 | /// Hashing and equality testing for a set of the instruction states. |
291 | struct UnrolledInstStateKeyInfo { |
292 | using PtrInfo = DenseMapInfo<Instruction *>; |
293 | using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>; |
294 | |
295 | static inline UnrolledInstState getEmptyKey() { |
296 | return {PtrInfo::getEmptyKey(), 0, 0, 0}; |
297 | } |
298 | |
299 | static inline UnrolledInstState getTombstoneKey() { |
300 | return {PtrInfo::getTombstoneKey(), 0, 0, 0}; |
301 | } |
302 | |
303 | static inline unsigned getHashValue(const UnrolledInstState &S) { |
304 | return PairInfo::getHashValue({S.I, S.Iteration}); |
305 | } |
306 | |
307 | static inline bool isEqual(const UnrolledInstState &LHS, |
308 | const UnrolledInstState &RHS) { |
309 | return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration}); |
310 | } |
311 | }; |
312 | |
313 | struct EstimatedUnrollCost { |
314 | /// The estimated cost after unrolling. |
315 | unsigned UnrolledCost; |
316 | |
317 | /// The estimated dynamic cost of executing the instructions in the |
318 | /// rolled form. |
319 | unsigned RolledDynamicCost; |
320 | }; |
321 | |
322 | struct PragmaInfo { |
323 | PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU) |
324 | : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC), |
325 | PragmaEnableUnroll(PEU) {} |
326 | const bool UserUnrollCount; |
327 | const bool PragmaFullUnroll; |
328 | const unsigned PragmaCount; |
329 | const bool PragmaEnableUnroll; |
330 | }; |
331 | |
332 | } // end anonymous namespace |
333 | |
334 | /// Figure out if the loop is worth full unrolling. |
335 | /// |
336 | /// Complete loop unrolling can make some loads constant, and we need to know |
337 | /// if that would expose any further optimization opportunities. This routine |
338 | /// estimates this optimization. It computes cost of unrolled loop |
339 | /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By |
340 | /// dynamic cost we mean that we won't count costs of blocks that are known not |
341 | /// to be executed (i.e. if we have a branch in the loop and we know that at the |
342 | /// given iteration its condition would be resolved to true, we won't add up the |
343 | /// cost of the 'false'-block). |
344 | /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If |
345 | /// the analysis failed (no benefits expected from the unrolling, or the loop is |
346 | /// too big to analyze), the returned value is None. |
347 | static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost( |
348 | const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, |
349 | const SmallPtrSetImpl<const Value *> &EphValues, |
350 | const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, |
351 | unsigned MaxIterationsCountToAnalyze) { |
352 | // We want to be able to scale offsets by the trip count and add more offsets |
353 | // to them without checking for overflows, and we already don't want to |
354 | // analyze *massive* trip counts, so we force the max to be reasonably small. |
355 | assert(MaxIterationsCountToAnalyze <(static_cast <bool> (MaxIterationsCountToAnalyze < ( unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!") ? void (0) : __assert_fail ("MaxIterationsCountToAnalyze < (unsigned)(std::numeric_limits<int>::max() / 2) && \"The unroll iterations max is too large!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 357, __extension__ __PRETTY_FUNCTION__)) |
356 | (unsigned)(std::numeric_limits<int>::max() / 2) &&(static_cast <bool> (MaxIterationsCountToAnalyze < ( unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!") ? void (0) : __assert_fail ("MaxIterationsCountToAnalyze < (unsigned)(std::numeric_limits<int>::max() / 2) && \"The unroll iterations max is too large!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 357, __extension__ __PRETTY_FUNCTION__)) |
357 | "The unroll iterations max is too large!")(static_cast <bool> (MaxIterationsCountToAnalyze < ( unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!") ? void (0) : __assert_fail ("MaxIterationsCountToAnalyze < (unsigned)(std::numeric_limits<int>::max() / 2) && \"The unroll iterations max is too large!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 357, __extension__ __PRETTY_FUNCTION__)); |
358 | |
359 | // Only analyze inner loops. We can't properly estimate cost of nested loops |
360 | // and we won't visit inner loops again anyway. |
361 | if (!L->isInnermost()) |
362 | return None; |
363 | |
364 | // Don't simulate loops with a big or unknown tripcount |
365 | if (!TripCount || TripCount > MaxIterationsCountToAnalyze) |
366 | return None; |
367 | |
368 | SmallSetVector<BasicBlock *, 16> BBWorklist; |
369 | SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist; |
370 | DenseMap<Value *, Value *> SimplifiedValues; |
371 | SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues; |
372 | |
373 | // The estimated cost of the unrolled form of the loop. We try to estimate |
374 | // this by simplifying as much as we can while computing the estimate. |
375 | InstructionCost UnrolledCost = 0; |
376 | |
377 | // We also track the estimated dynamic (that is, actually executed) cost in |
378 | // the rolled form. This helps identify cases when the savings from unrolling |
379 | // aren't just exposing dead control flows, but actual reduced dynamic |
380 | // instructions due to the simplifications which we expect to occur after |
381 | // unrolling. |
382 | InstructionCost RolledDynamicCost = 0; |
383 | |
384 | // We track the simplification of each instruction in each iteration. We use |
385 | // this to recursively merge costs into the unrolled cost on-demand so that |
386 | // we don't count the cost of any dead code. This is essentially a map from |
387 | // <instruction, int> to <bool, bool>, but stored as a densely packed struct. |
388 | DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap; |
389 | |
390 | // A small worklist used to accumulate cost of instructions from each |
391 | // observable and reached root in the loop. |
392 | SmallVector<Instruction *, 16> CostWorklist; |
393 | |
394 | // PHI-used worklist used between iterations while accumulating cost. |
395 | SmallVector<Instruction *, 4> PHIUsedList; |
396 | |
397 | // Helper function to accumulate cost for instructions in the loop. |
398 | auto AddCostRecursively = [&](Instruction &RootI, int Iteration) { |
399 | assert(Iteration >= 0 && "Cannot have a negative iteration!")(static_cast <bool> (Iteration >= 0 && "Cannot have a negative iteration!" ) ? void (0) : __assert_fail ("Iteration >= 0 && \"Cannot have a negative iteration!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 399, __extension__ __PRETTY_FUNCTION__)); |
400 | assert(CostWorklist.empty() && "Must start with an empty cost list")(static_cast <bool> (CostWorklist.empty() && "Must start with an empty cost list" ) ? void (0) : __assert_fail ("CostWorklist.empty() && \"Must start with an empty cost list\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 400, __extension__ __PRETTY_FUNCTION__)); |
401 | assert(PHIUsedList.empty() && "Must start with an empty phi used list")(static_cast <bool> (PHIUsedList.empty() && "Must start with an empty phi used list" ) ? void (0) : __assert_fail ("PHIUsedList.empty() && \"Must start with an empty phi used list\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 401, __extension__ __PRETTY_FUNCTION__)); |
402 | CostWorklist.push_back(&RootI); |
403 | TargetTransformInfo::TargetCostKind CostKind = |
404 | RootI.getFunction()->hasMinSize() ? |
405 | TargetTransformInfo::TCK_CodeSize : |
406 | TargetTransformInfo::TCK_SizeAndLatency; |
407 | for (;; --Iteration) { |
408 | do { |
409 | Instruction *I = CostWorklist.pop_back_val(); |
410 | |
411 | // InstCostMap only uses I and Iteration as a key, the other two values |
412 | // don't matter here. |
413 | auto CostIter = InstCostMap.find({I, Iteration, 0, 0}); |
414 | if (CostIter == InstCostMap.end()) |
415 | // If an input to a PHI node comes from a dead path through the loop |
416 | // we may have no cost data for it here. What that actually means is |
417 | // that it is free. |
418 | continue; |
419 | auto &Cost = *CostIter; |
420 | if (Cost.IsCounted) |
421 | // Already counted this instruction. |
422 | continue; |
423 | |
424 | // Mark that we are counting the cost of this instruction now. |
425 | Cost.IsCounted = true; |
426 | |
427 | // If this is a PHI node in the loop header, just add it to the PHI set. |
428 | if (auto *PhiI = dyn_cast<PHINode>(I)) |
429 | if (PhiI->getParent() == L->getHeader()) { |
430 | assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "(static_cast <bool> (Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " "inherently simplify during unrolling.") ? void (0) : __assert_fail ("Cost.IsFree && \"Loop PHIs shouldn't be evaluated as they \" \"inherently simplify during unrolling.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 431, __extension__ __PRETTY_FUNCTION__)) |
431 | "inherently simplify during unrolling.")(static_cast <bool> (Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " "inherently simplify during unrolling.") ? void (0) : __assert_fail ("Cost.IsFree && \"Loop PHIs shouldn't be evaluated as they \" \"inherently simplify during unrolling.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 431, __extension__ __PRETTY_FUNCTION__)); |
432 | if (Iteration == 0) |
433 | continue; |
434 | |
435 | // Push the incoming value from the backedge into the PHI used list |
436 | // if it is an in-loop instruction. We'll use this to populate the |
437 | // cost worklist for the next iteration (as we count backwards). |
438 | if (auto *OpI = dyn_cast<Instruction>( |
439 | PhiI->getIncomingValueForBlock(L->getLoopLatch()))) |
440 | if (L->contains(OpI)) |
441 | PHIUsedList.push_back(OpI); |
442 | continue; |
443 | } |
444 | |
445 | // First accumulate the cost of this instruction. |
446 | if (!Cost.IsFree) { |
447 | UnrolledCost += TTI.getUserCost(I, CostKind); |
448 | LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "; } } while (false) |
449 | << Iteration << "): ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "; } } while (false); |
450 | LLVM_DEBUG(I->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { I->dump(); } } while (false); |
451 | } |
452 | |
453 | // We must count the cost of every operand which is not free, |
454 | // recursively. If we reach a loop PHI node, simply add it to the set |
455 | // to be considered on the next iteration (backwards!). |
456 | for (Value *Op : I->operands()) { |
457 | // Check whether this operand is free due to being a constant or |
458 | // outside the loop. |
459 | auto *OpI = dyn_cast<Instruction>(Op); |
460 | if (!OpI || !L->contains(OpI)) |
461 | continue; |
462 | |
463 | // Otherwise accumulate its cost. |
464 | CostWorklist.push_back(OpI); |
465 | } |
466 | } while (!CostWorklist.empty()); |
467 | |
468 | if (PHIUsedList.empty()) |
469 | // We've exhausted the search. |
470 | break; |
471 | |
472 | assert(Iteration > 0 &&(static_cast <bool> (Iteration > 0 && "Cannot track PHI-used values past the first iteration!" ) ? void (0) : __assert_fail ("Iteration > 0 && \"Cannot track PHI-used values past the first iteration!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 473, __extension__ __PRETTY_FUNCTION__)) |
473 | "Cannot track PHI-used values past the first iteration!")(static_cast <bool> (Iteration > 0 && "Cannot track PHI-used values past the first iteration!" ) ? void (0) : __assert_fail ("Iteration > 0 && \"Cannot track PHI-used values past the first iteration!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 473, __extension__ __PRETTY_FUNCTION__)); |
474 | CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end()); |
475 | PHIUsedList.clear(); |
476 | } |
477 | }; |
478 | |
479 | // Ensure that we don't violate the loop structure invariants relied on by |
480 | // this analysis. |
481 | assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.")(static_cast <bool> (L->isLoopSimplifyForm() && "Must put loop into normal form first.") ? void (0) : __assert_fail ("L->isLoopSimplifyForm() && \"Must put loop into normal form first.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 481, __extension__ __PRETTY_FUNCTION__)); |
482 | assert(L->isLCSSAForm(DT) &&(static_cast <bool> (L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values." ) ? void (0) : __assert_fail ("L->isLCSSAForm(DT) && \"Must have loops in LCSSA form to track live-out values.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 483, __extension__ __PRETTY_FUNCTION__)) |
483 | "Must have loops in LCSSA form to track live-out values.")(static_cast <bool> (L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values." ) ? void (0) : __assert_fail ("L->isLCSSAForm(DT) && \"Must have loops in LCSSA form to track live-out values.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 483, __extension__ __PRETTY_FUNCTION__)); |
484 | |
485 | LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Starting LoopUnroll profitability analysis...\n" ; } } while (false); |
486 | |
487 | TargetTransformInfo::TargetCostKind CostKind = |
488 | L->getHeader()->getParent()->hasMinSize() ? |
489 | TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; |
490 | // Simulate execution of each iteration of the loop counting instructions, |
491 | // which would be simplified. |
492 | // Since the same load will take different values on different iterations, |
493 | // we literally have to go through all loop's iterations. |
494 | for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { |
495 | LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Analyzing iteration " << Iteration << "\n"; } } while (false); |
496 | |
497 | // Prepare for the iteration by collecting any simplified entry or backedge |
498 | // inputs. |
499 | for (Instruction &I : *L->getHeader()) { |
500 | auto *PHI = dyn_cast<PHINode>(&I); |
501 | if (!PHI) |
502 | break; |
503 | |
504 | // The loop header PHI nodes must have exactly two input: one from the |
505 | // loop preheader and one from the loop latch. |
506 | assert((static_cast <bool> (PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch." ) ? void (0) : __assert_fail ("PHI->getNumIncomingValues() == 2 && \"Must have an incoming value only for the preheader and the latch.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 508, __extension__ __PRETTY_FUNCTION__)) |
507 | PHI->getNumIncomingValues() == 2 &&(static_cast <bool> (PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch." ) ? void (0) : __assert_fail ("PHI->getNumIncomingValues() == 2 && \"Must have an incoming value only for the preheader and the latch.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 508, __extension__ __PRETTY_FUNCTION__)) |
508 | "Must have an incoming value only for the preheader and the latch.")(static_cast <bool> (PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch." ) ? void (0) : __assert_fail ("PHI->getNumIncomingValues() == 2 && \"Must have an incoming value only for the preheader and the latch.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 508, __extension__ __PRETTY_FUNCTION__)); |
509 | |
510 | Value *V = PHI->getIncomingValueForBlock( |
511 | Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); |
512 | if (Iteration != 0 && SimplifiedValues.count(V)) |
513 | V = SimplifiedValues.lookup(V); |
514 | SimplifiedInputValues.push_back({PHI, V}); |
515 | } |
516 | |
517 | // Now clear and re-populate the map for the next iteration. |
518 | SimplifiedValues.clear(); |
519 | while (!SimplifiedInputValues.empty()) |
520 | SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); |
521 | |
522 | UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L); |
523 | |
524 | BBWorklist.clear(); |
525 | BBWorklist.insert(L->getHeader()); |
526 | // Note that we *must not* cache the size, this loop grows the worklist. |
527 | for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { |
528 | BasicBlock *BB = BBWorklist[Idx]; |
529 | |
530 | // Visit all instructions in the given basic block and try to simplify |
531 | // it. We don't change the actual IR, just count optimization |
532 | // opportunities. |
533 | for (Instruction &I : *BB) { |
534 | // These won't get into the final code - don't even try calculating the |
535 | // cost for them. |
536 | if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I)) |
537 | continue; |
538 | |
539 | // Track this instruction's expected baseline cost when executing the |
540 | // rolled loop form. |
541 | RolledDynamicCost += TTI.getUserCost(&I, CostKind); |
542 | |
543 | // Visit the instruction to analyze its loop cost after unrolling, |
544 | // and if the visitor returns true, mark the instruction as free after |
545 | // unrolling and continue. |
546 | bool IsFree = Analyzer.visit(I); |
547 | bool Inserted = InstCostMap.insert({&I, (int)Iteration, |
548 | (unsigned)IsFree, |
549 | /*IsCounted*/ false}).second; |
550 | (void)Inserted; |
551 | assert(Inserted && "Cannot have a state for an unvisited instruction!")(static_cast <bool> (Inserted && "Cannot have a state for an unvisited instruction!" ) ? void (0) : __assert_fail ("Inserted && \"Cannot have a state for an unvisited instruction!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 551, __extension__ __PRETTY_FUNCTION__)); |
552 | |
553 | if (IsFree) |
554 | continue; |
555 | |
556 | // Can't properly model a cost of a call. |
557 | // FIXME: With a proper cost model we should be able to do it. |
558 | if (auto *CI = dyn_cast<CallInst>(&I)) { |
559 | const Function *Callee = CI->getCalledFunction(); |
560 | if (!Callee || TTI.isLoweredToCall(Callee)) { |
561 | LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Can't analyze cost of loop with call\n" ; } } while (false); |
562 | return None; |
563 | } |
564 | } |
565 | |
566 | // If the instruction might have a side-effect recursively account for |
567 | // the cost of it and all the instructions leading up to it. |
568 | if (I.mayHaveSideEffects()) |
569 | AddCostRecursively(I, Iteration); |
570 | |
571 | // If unrolled body turns out to be too big, bail out. |
572 | if (UnrolledCost > MaxUnrolledLoopSize) { |
573 | LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false ) |
574 | << " UnrolledCost: " << UnrolledCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false ) |
575 | << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false ) |
576 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false ); |
577 | return None; |
578 | } |
579 | } |
580 | |
581 | Instruction *TI = BB->getTerminator(); |
582 | |
583 | auto getSimplifiedConstant = [&](Value *V) -> Constant * { |
584 | if (SimplifiedValues.count(V)) |
585 | V = SimplifiedValues.lookup(V); |
586 | return dyn_cast<Constant>(V); |
587 | }; |
588 | |
589 | // Add in the live successors by first checking whether we have terminator |
590 | // that may be simplified based on the values simplified by this call. |
591 | BasicBlock *KnownSucc = nullptr; |
592 | if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { |
593 | if (BI->isConditional()) { |
594 | if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) { |
595 | // Just take the first successor if condition is undef |
596 | if (isa<UndefValue>(SimpleCond)) |
597 | KnownSucc = BI->getSuccessor(0); |
598 | else if (ConstantInt *SimpleCondVal = |
599 | dyn_cast<ConstantInt>(SimpleCond)) |
600 | KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0); |
601 | } |
602 | } |
603 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { |
604 | if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) { |
605 | // Just take the first successor if condition is undef |
606 | if (isa<UndefValue>(SimpleCond)) |
607 | KnownSucc = SI->getSuccessor(0); |
608 | else if (ConstantInt *SimpleCondVal = |
609 | dyn_cast<ConstantInt>(SimpleCond)) |
610 | KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor(); |
611 | } |
612 | } |
613 | if (KnownSucc) { |
614 | if (L->contains(KnownSucc)) |
615 | BBWorklist.insert(KnownSucc); |
616 | else |
617 | ExitWorklist.insert({BB, KnownSucc}); |
618 | continue; |
619 | } |
620 | |
621 | // Add BB's successors to the worklist. |
622 | for (BasicBlock *Succ : successors(BB)) |
623 | if (L->contains(Succ)) |
624 | BBWorklist.insert(Succ); |
625 | else |
626 | ExitWorklist.insert({BB, Succ}); |
627 | AddCostRecursively(*TI, Iteration); |
628 | } |
629 | |
630 | // If we found no optimization opportunities on the first iteration, we |
631 | // won't find them on later ones too. |
632 | if (UnrolledCost == RolledDynamicCost) { |
633 | LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n" ; } } while (false) |
634 | << " UnrolledCost: " << UnrolledCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n" ; } } while (false); |
635 | return None; |
636 | } |
637 | } |
638 | |
639 | while (!ExitWorklist.empty()) { |
640 | BasicBlock *ExitingBB, *ExitBB; |
641 | std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val(); |
642 | |
643 | for (Instruction &I : *ExitBB) { |
644 | auto *PN = dyn_cast<PHINode>(&I); |
645 | if (!PN) |
646 | break; |
647 | |
648 | Value *Op = PN->getIncomingValueForBlock(ExitingBB); |
649 | if (auto *OpI = dyn_cast<Instruction>(Op)) |
650 | if (L->contains(OpI)) |
651 | AddCostRecursively(*OpI, TripCount - 1); |
652 | } |
653 | } |
654 | |
655 | assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&(static_cast <bool> (UnrolledCost.isValid() && RolledDynamicCost .isValid() && "All instructions must have a valid cost, whether the " "loop is rolled or unrolled.") ? void (0) : __assert_fail ("UnrolledCost.isValid() && RolledDynamicCost.isValid() && \"All instructions must have a valid cost, whether the \" \"loop is rolled or unrolled.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 657, __extension__ __PRETTY_FUNCTION__)) |
656 | "All instructions must have a valid cost, whether the "(static_cast <bool> (UnrolledCost.isValid() && RolledDynamicCost .isValid() && "All instructions must have a valid cost, whether the " "loop is rolled or unrolled.") ? void (0) : __assert_fail ("UnrolledCost.isValid() && RolledDynamicCost.isValid() && \"All instructions must have a valid cost, whether the \" \"loop is rolled or unrolled.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 657, __extension__ __PRETTY_FUNCTION__)) |
657 | "loop is rolled or unrolled.")(static_cast <bool> (UnrolledCost.isValid() && RolledDynamicCost .isValid() && "All instructions must have a valid cost, whether the " "loop is rolled or unrolled.") ? void (0) : __assert_fail ("UnrolledCost.isValid() && RolledDynamicCost.isValid() && \"All instructions must have a valid cost, whether the \" \"loop is rolled or unrolled.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 657, __extension__ __PRETTY_FUNCTION__)); |
658 | |
659 | LLVM_DEBUG(dbgs() << "Analysis finished:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n" ; } } while (false) |
660 | << "UnrolledCost: " << UnrolledCost << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n" ; } } while (false) |
661 | << "RolledDynamicCost: " << RolledDynamicCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n" ; } } while (false); |
662 | return {{unsigned(*UnrolledCost.getValue()), |
663 | unsigned(*RolledDynamicCost.getValue())}}; |
664 | } |
665 | |
666 | /// ApproximateLoopSize - Approximate the size of the loop. |
667 | unsigned llvm::ApproximateLoopSize( |
668 | const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, |
669 | const TargetTransformInfo &TTI, |
670 | const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) { |
671 | CodeMetrics Metrics; |
672 | for (BasicBlock *BB : L->blocks()) |
673 | Metrics.analyzeBasicBlock(BB, TTI, EphValues); |
674 | NumCalls = Metrics.NumInlineCandidates; |
675 | NotDuplicatable = Metrics.notDuplicatable; |
676 | Convergent = Metrics.convergent; |
677 | |
678 | unsigned LoopSize = Metrics.NumInsts; |
679 | |
680 | // Don't allow an estimate of size zero. This would allows unrolling of loops |
681 | // with huge iteration counts, which is a compile time problem even if it's |
682 | // not a problem for code quality. Also, the code using this size may assume |
683 | // that each loop has at least three instructions (likely a conditional |
684 | // branch, a comparison feeding that branch, and some kind of loop increment |
685 | // feeding that comparison instruction). |
686 | LoopSize = std::max(LoopSize, BEInsns + 1); |
687 | |
688 | return LoopSize; |
689 | } |
690 | |
691 | // Returns the loop hint metadata node with the given name (for example, |
692 | // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is |
693 | // returned. |
694 | static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) { |
695 | if (MDNode *LoopID = L->getLoopID()) |
696 | return GetUnrollMetadata(LoopID, Name); |
697 | return nullptr; |
698 | } |
699 | |
700 | // Returns true if the loop has an unroll(full) pragma. |
701 | static bool hasUnrollFullPragma(const Loop *L) { |
702 | return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); |
703 | } |
704 | |
705 | // Returns true if the loop has an unroll(enable) pragma. This metadata is used |
706 | // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives. |
707 | static bool hasUnrollEnablePragma(const Loop *L) { |
708 | return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); |
709 | } |
710 | |
711 | // Returns true if the loop has an runtime unroll(disable) pragma. |
712 | static bool hasRuntimeUnrollDisablePragma(const Loop *L) { |
713 | return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); |
714 | } |
715 | |
716 | // If loop has an unroll_count pragma return the (necessarily |
717 | // positive) value from the pragma. Otherwise return 0. |
718 | static unsigned unrollCountPragmaValue(const Loop *L) { |
719 | MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count"); |
720 | if (MD) { |
721 | assert(MD->getNumOperands() == 2 &&(static_cast <bool> (MD->getNumOperands() == 2 && "Unroll count hint metadata should have two operands.") ? void (0) : __assert_fail ("MD->getNumOperands() == 2 && \"Unroll count hint metadata should have two operands.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 722, __extension__ __PRETTY_FUNCTION__)) |
722 | "Unroll count hint metadata should have two operands.")(static_cast <bool> (MD->getNumOperands() == 2 && "Unroll count hint metadata should have two operands.") ? void (0) : __assert_fail ("MD->getNumOperands() == 2 && \"Unroll count hint metadata should have two operands.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 722, __extension__ __PRETTY_FUNCTION__)); |
723 | unsigned Count = |
724 | mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue(); |
725 | assert(Count >= 1 && "Unroll count must be positive.")(static_cast <bool> (Count >= 1 && "Unroll count must be positive." ) ? void (0) : __assert_fail ("Count >= 1 && \"Unroll count must be positive.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 725, __extension__ __PRETTY_FUNCTION__)); |
726 | return Count; |
727 | } |
728 | return 0; |
729 | } |
730 | |
731 | // Computes the boosting factor for complete unrolling. |
732 | // If fully unrolling the loop would save a lot of RolledDynamicCost, it would |
733 | // be beneficial to fully unroll the loop even if unrolledcost is large. We |
734 | // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust |
735 | // the unroll threshold. |
736 | static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, |
737 | unsigned MaxPercentThresholdBoost) { |
738 | if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100) |
739 | return 100; |
740 | else if (Cost.UnrolledCost != 0) |
741 | // The boosting factor is RolledDynamicCost / UnrolledCost |
742 | return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost, |
743 | MaxPercentThresholdBoost); |
744 | else |
745 | return MaxPercentThresholdBoost; |
746 | } |
747 | |
748 | // Produce an estimate of the unrolled cost of the specified loop. This |
749 | // is used to a) produce a cost estimate for partial unrolling and b) to |
750 | // cheaply estimate cost for full unrolling when we don't want to symbolically |
751 | // evaluate all iterations. |
752 | class UnrollCostEstimator { |
753 | const unsigned LoopSize; |
754 | |
755 | public: |
756 | UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {} |
757 | |
758 | // Returns loop size estimation for unrolled loop, given the unrolling |
759 | // configuration specified by UP. |
760 | uint64_t |
761 | getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, |
762 | const unsigned CountOverwrite = 0) const { |
763 | assert(LoopSize >= UP.BEInsns &&(static_cast <bool> (LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!") ? void (0) : __assert_fail ("LoopSize >= UP.BEInsns && \"LoopSize should not be less than BEInsns!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 764, __extension__ __PRETTY_FUNCTION__)) |
764 | "LoopSize should not be less than BEInsns!")(static_cast <bool> (LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!") ? void (0) : __assert_fail ("LoopSize >= UP.BEInsns && \"LoopSize should not be less than BEInsns!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 764, __extension__ __PRETTY_FUNCTION__)); |
765 | if (CountOverwrite) |
766 | return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite + |
767 | UP.BEInsns; |
768 | else |
769 | return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + |
770 | UP.BEInsns; |
771 | } |
772 | }; |
773 | |
774 | static Optional<unsigned> |
775 | shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, |
776 | const unsigned TripMultiple, const unsigned TripCount, |
777 | const UnrollCostEstimator UCE, |
778 | const TargetTransformInfo::UnrollingPreferences &UP) { |
779 | |
780 | // Using unroll pragma |
781 | // 1st priority is unroll count set by "unroll-count" option. |
782 | |
783 | if (PInfo.UserUnrollCount) { |
784 | if (UP.AllowRemainder && |
785 | UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold) |
786 | return (unsigned)UnrollCount; |
787 | } |
788 | |
789 | // 2nd priority is unroll count set by pragma. |
790 | if (PInfo.PragmaCount > 0) { |
791 | if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) && |
792 | UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold) |
793 | return PInfo.PragmaCount; |
794 | } |
795 | |
796 | if (PInfo.PragmaFullUnroll && TripCount != 0) { |
797 | if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold) |
798 | return TripCount; |
799 | } |
800 | // if didn't return until here, should continue to other priorties |
801 | return None; |
802 | } |
803 | |
804 | static Optional<unsigned> shouldFullUnroll( |
805 | Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, |
806 | ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, |
807 | const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, |
808 | const TargetTransformInfo::UnrollingPreferences &UP) { |
809 | |
810 | if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { |
811 | // When computing the unrolled size, note that BEInsns are not replicated |
812 | // like the rest of the loop body. |
813 | if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { |
814 | return FullUnrollTripCount; |
815 | |
816 | } else { |
817 | // The loop isn't that small, but we still can fully unroll it if that |
818 | // helps to remove a significant number of instructions. |
819 | // To check that, run additional analysis on the loop. |
820 | if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( |
821 | L, FullUnrollTripCount, DT, SE, EphValues, TTI, |
822 | UP.Threshold * UP.MaxPercentThresholdBoost / 100, |
823 | UP.MaxIterationsCountToAnalyze)) { |
824 | unsigned Boost = |
825 | getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); |
826 | if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { |
827 | return FullUnrollTripCount; |
828 | } |
829 | } |
830 | } |
831 | } |
832 | return None; |
833 | } |
834 | |
835 | static Optional<unsigned> |
836 | shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, |
837 | const UnrollCostEstimator UCE, |
838 | const TargetTransformInfo::UnrollingPreferences &UP) { |
839 | |
840 | unsigned count = UP.Count; |
841 | if (TripCount) { |
842 | if (!UP.Partial) { |
843 | LLVM_DEBUG(dbgs() << " will not try to unroll partially because "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"; } } while (false ) |
844 | << "-unroll-allow-partial not given\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"; } } while (false ); |
845 | count = 0; |
846 | return count; |
847 | } |
848 | if (count == 0) |
849 | count = TripCount; |
850 | if (UP.PartialThreshold != NoThreshold) { |
851 | // Reduce unroll count to be modulo of TripCount for partial unrolling. |
852 | if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) |
853 | count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / |
854 | (LoopSize - UP.BEInsns); |
855 | if (count > UP.MaxCount) |
856 | count = UP.MaxCount; |
857 | while (count != 0 && TripCount % count != 0) |
858 | count--; |
859 | if (UP.AllowRemainder && count <= 1) { |
860 | // If there is no Count that is modulo of TripCount, set Count to |
861 | // largest power-of-two factor that satisfies the threshold limit. |
862 | // As we'll create fixup loop, do the type of unrolling only if |
863 | // remainder loop is allowed. |
864 | count = UP.DefaultUnrollRuntimeCount; |
865 | while (count != 0 && |
866 | UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) |
867 | count >>= 1; |
868 | } |
869 | if (count < 2) { |
870 | count = 0; |
871 | } |
872 | } else { |
873 | count = TripCount; |
874 | } |
875 | if (count > UP.MaxCount) |
876 | count = UP.MaxCount; |
877 | |
878 | LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " partially unrolling with count: " << count << "\n"; } } while (false); |
879 | |
880 | return count; |
881 | } |
882 | |
883 | // if didn't return until here, should continue to other priorties |
884 | return None; |
885 | } |
886 | // Returns true if unroll count was set explicitly. |
887 | // Calculates unroll count and writes it to UP.Count. |
888 | // Unless IgnoreUser is true, will also use metadata and command-line options |
889 | // that are specific to to the LoopUnroll pass (which, for instance, are |
890 | // irrelevant for the LoopUnrollAndJam pass). |
891 | // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes |
892 | // many LoopUnroll-specific options. The shared functionality should be |
893 | // refactored into it own function. |
894 | bool llvm::computeUnrollCount( |
895 | Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, |
896 | ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, |
897 | OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, |
898 | bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, |
899 | TargetTransformInfo::UnrollingPreferences &UP, |
900 | TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { |
901 | |
902 | UnrollCostEstimator UCE(*L, LoopSize); |
903 | Optional<unsigned> UnrollFactor; |
904 | |
905 | const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; |
906 | const bool PragmaFullUnroll = hasUnrollFullPragma(L); |
907 | const unsigned PragmaCount = unrollCountPragmaValue(L); |
908 | const bool PragmaEnableUnroll = hasUnrollEnablePragma(L); |
909 | |
910 | const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || |
911 | PragmaEnableUnroll || UserUnrollCount; |
912 | |
913 | PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount, |
914 | PragmaEnableUnroll); |
915 | // Use an explicit peel count that has been specified for testing. In this |
916 | // case it's not permitted to also specify an explicit unroll count. |
917 | if (PP.PeelCount) { |
918 | if (UnrollCount.getNumOccurrences() > 0) { |
919 | report_fatal_error("Cannot specify both explicit peel count and " |
920 | "explicit unroll count"); |
921 | } |
922 | UP.Count = 1; |
923 | UP.Runtime = false; |
924 | return true; |
925 | } |
926 | // Check for explicit Count. |
927 | // 1st priority is unroll count set by "unroll-count" option. |
928 | // 2nd priority is unroll count set by pragma. |
929 | UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP); |
930 | |
931 | if (UnrollFactor) { |
932 | UP.Count = *UnrollFactor; |
933 | |
934 | if (UserUnrollCount || (PragmaCount > 0)) { |
935 | UP.AllowExpensiveTripCount = true; |
936 | UP.Force = true; |
937 | } |
938 | UP.Runtime |= (PragmaCount > 0); |
939 | return ExplicitUnroll; |
940 | } else { |
941 | if (ExplicitUnroll && TripCount != 0) { |
942 | // If the loop has an unrolling pragma, we want to be more aggressive with |
943 | // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold |
944 | // value which is larger than the default limits. |
945 | UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold); |
946 | UP.PartialThreshold = |
947 | std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold); |
948 | } |
949 | } |
950 | |
951 | // 3rd priority is full unroll count. |
952 | // Full unroll makes sense only when TripCount or its upper bound could be |
953 | // statically calculated. |
954 | // Also we need to check if we exceed FullUnrollMaxCount. |
955 | |
956 | // We can unroll by the upper bound amount if it's generally allowed or if |
957 | // we know that the loop is executed either the upper bound or zero times. |
958 | // (MaxOrZero unrolling keeps only the first loop test, so the number of |
959 | // loop tests remains the same compared to the non-unrolled version, whereas |
960 | // the generic upper bound unrolling keeps all but the last loop test so the |
961 | // number of loop tests goes up which may end up being worse on targets with |
962 | // constrained branch predictor resources so is controlled by an option.) |
963 | // In addition we only unroll small upper bounds. |
964 | unsigned FullUnrollMaxTripCount = MaxTripCount; |
965 | if (!(UP.UpperBound || MaxOrZero) || |
966 | FullUnrollMaxTripCount > UnrollMaxUpperBound) |
967 | FullUnrollMaxTripCount = 0; |
968 | |
969 | // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only |
970 | // compute the former when the latter is zero. |
971 | unsigned ExactTripCount = TripCount; |
972 | assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&(static_cast <bool> ((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && "ExtractTripCount and UnrollByMaxCount cannot both be non zero." ) ? void (0) : __assert_fail ("(ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && \"ExtractTripCount and UnrollByMaxCount cannot both be non zero.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 973, __extension__ __PRETTY_FUNCTION__)) |
973 | "ExtractTripCount and UnrollByMaxCount cannot both be non zero.")(static_cast <bool> ((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && "ExtractTripCount and UnrollByMaxCount cannot both be non zero." ) ? void (0) : __assert_fail ("(ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && \"ExtractTripCount and UnrollByMaxCount cannot both be non zero.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 973, __extension__ __PRETTY_FUNCTION__)); |
974 | |
975 | unsigned FullUnrollTripCount = |
976 | ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; |
977 | UP.Count = FullUnrollTripCount; |
978 | |
979 | UnrollFactor = |
980 | shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP); |
981 | |
982 | // if shouldFullUnroll can do the unrolling, some side parameteres should be |
983 | // set |
984 | if (UnrollFactor) { |
985 | UP.Count = *UnrollFactor; |
986 | UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); |
987 | TripCount = FullUnrollTripCount; |
Value stored to 'TripCount' is never read | |
988 | TripMultiple = UP.UpperBound ? 1 : TripMultiple; |
989 | return ExplicitUnroll; |
990 | } else { |
991 | UP.Count = FullUnrollTripCount; |
992 | } |
993 | |
994 | // 4th priority is loop peeling. |
995 | computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold); |
996 | if (PP.PeelCount) { |
997 | UP.Runtime = false; |
998 | UP.Count = 1; |
999 | return ExplicitUnroll; |
1000 | } |
1001 | |
1002 | // Before starting partial unrolling, set up.partial to true, |
1003 | // if user explicitly asked for unrolling |
1004 | if (TripCount) |
1005 | UP.Partial |= ExplicitUnroll; |
1006 | |
1007 | // 5th priority is partial unrolling. |
1008 | // Try partial unroll only when TripCount could be statically calculated. |
1009 | UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP); |
1010 | |
1011 | if (UnrollFactor) { |
1012 | UP.Count = *UnrollFactor; |
1013 | |
1014 | if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && |
1015 | UP.Count != TripCount) |
1016 | ORE->emit([&]() { |
1017 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-unroll", |
1018 | "FullUnrollAsDirectedTooLarge", |
1019 | L->getStartLoc(), L->getHeader()) |
1020 | << "Unable to fully unroll loop as directed by unroll pragma " |
1021 | "because " |
1022 | "unrolled size is too large."; |
1023 | }); |
1024 | |
1025 | if (UP.PartialThreshold != NoThreshold) { |
1026 | if (UP.Count == 0) { |
1027 | if (PragmaEnableUnroll) |
1028 | ORE->emit([&]() { |
1029 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-unroll", |
1030 | "UnrollAsDirectedTooLarge", |
1031 | L->getStartLoc(), L->getHeader()) |
1032 | << "Unable to unroll loop as directed by unroll(enable) " |
1033 | "pragma " |
1034 | "because unrolled size is too large."; |
1035 | }); |
1036 | } |
1037 | } |
1038 | return ExplicitUnroll; |
1039 | } |
1040 | assert(TripCount == 0 &&(static_cast <bool> (TripCount == 0 && "All cases when TripCount is constant should be covered here." ) ? void (0) : __assert_fail ("TripCount == 0 && \"All cases when TripCount is constant should be covered here.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 1041, __extension__ __PRETTY_FUNCTION__)) |
1041 | "All cases when TripCount is constant should be covered here.")(static_cast <bool> (TripCount == 0 && "All cases when TripCount is constant should be covered here." ) ? void (0) : __assert_fail ("TripCount == 0 && \"All cases when TripCount is constant should be covered here.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 1041, __extension__ __PRETTY_FUNCTION__)); |
1042 | if (PragmaFullUnroll) |
1043 | ORE->emit([&]() { |
1044 | return OptimizationRemarkMissed( |
1045 | DEBUG_TYPE"loop-unroll", "CantFullUnrollAsDirectedRuntimeTripCount", |
1046 | L->getStartLoc(), L->getHeader()) |
1047 | << "Unable to fully unroll loop as directed by unroll(full) " |
1048 | "pragma " |
1049 | "because loop has a runtime trip count."; |
1050 | }); |
1051 | |
1052 | // 6th priority is runtime unrolling. |
1053 | // Don't unroll a runtime trip count loop when it is disabled. |
1054 | if (hasRuntimeUnrollDisablePragma(L)) { |
1055 | UP.Count = 0; |
1056 | return false; |
1057 | } |
1058 | |
1059 | // Don't unroll a small upper bound loop unless user or TTI asked to do so. |
1060 | if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { |
1061 | UP.Count = 0; |
1062 | return false; |
1063 | } |
1064 | |
1065 | // Check if the runtime trip count is too small when profile is available. |
1066 | if (L->getHeader()->getParent()->hasProfileData()) { |
1067 | if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { |
1068 | if (*ProfileTripCount < FlatLoopTripCountThreshold) |
1069 | return false; |
1070 | else |
1071 | UP.AllowExpensiveTripCount = true; |
1072 | } |
1073 | } |
1074 | UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; |
1075 | if (!UP.Runtime) { |
1076 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll loop with runtime trip count " << "-unroll-runtime not given\n"; } } while (false) |
1077 | dbgs() << " will not try to unroll loop with runtime trip count "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll loop with runtime trip count " << "-unroll-runtime not given\n"; } } while (false) |
1078 | << "-unroll-runtime not given\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll loop with runtime trip count " << "-unroll-runtime not given\n"; } } while (false); |
1079 | UP.Count = 0; |
1080 | return false; |
1081 | } |
1082 | if (UP.Count == 0) |
1083 | UP.Count = UP.DefaultUnrollRuntimeCount; |
1084 | |
1085 | // Reduce unroll count to be the largest power-of-two factor of |
1086 | // the original count which satisfies the threshold limit. |
1087 | while (UP.Count != 0 && |
1088 | UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) |
1089 | UP.Count >>= 1; |
1090 | |
1091 | #ifndef NDEBUG |
1092 | unsigned OrigCount = UP.Count; |
1093 | #endif |
1094 | |
1095 | if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) { |
1096 | while (UP.Count != 0 && TripMultiple % UP.Count != 0) |
1097 | UP.Count >>= 1; |
1098 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false) |
1099 | dbgs() << "Remainder loop is restricted (that could architecture "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false) |
1100 | "specific or because the loop contains a convergent "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false) |
1101 | "instruction), so unroll count must divide the trip "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false) |
1102 | "multiple, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false) |
1103 | << TripMultiple << ". Reducing unroll count from " << OrigCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false) |
1104 | << " to " << UP.Count << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false); |
1105 | |
1106 | using namespace ore; |
1107 | |
1108 | if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder) |
1109 | ORE->emit([&]() { |
1110 | return OptimizationRemarkMissed(DEBUG_TYPE"loop-unroll", |
1111 | "DifferentUnrollCountFromDirected", |
1112 | L->getStartLoc(), L->getHeader()) |
1113 | << "Unable to unroll loop the number of times directed by " |
1114 | "unroll_count pragma because remainder loop is restricted " |
1115 | "(that could architecture specific or because the loop " |
1116 | "contains a convergent instruction) and so must have an " |
1117 | "unroll " |
1118 | "count that divides the loop trip multiple of " |
1119 | << NV("TripMultiple", TripMultiple) << ". Unrolling instead " |
1120 | << NV("UnrollCount", UP.Count) << " time(s)."; |
1121 | }); |
1122 | } |
1123 | |
1124 | if (UP.Count > UP.MaxCount) |
1125 | UP.Count = UP.MaxCount; |
1126 | |
1127 | if (MaxTripCount && UP.Count > MaxTripCount) |
1128 | UP.Count = MaxTripCount; |
1129 | |
1130 | LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Countdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " runtime unrolling with count: " << UP.Count << "\n"; } } while (false) |
1131 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " runtime unrolling with count: " << UP.Count << "\n"; } } while (false); |
1132 | if (UP.Count < 2) |
1133 | UP.Count = 0; |
1134 | return ExplicitUnroll; |
1135 | } |
1136 | |
1137 | static LoopUnrollResult tryToUnrollLoop( |
1138 | Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, |
1139 | const TargetTransformInfo &TTI, AssumptionCache &AC, |
1140 | OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, |
1141 | ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, |
1142 | bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount, |
1143 | Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial, |
1144 | Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound, |
1145 | Optional<bool> ProvidedAllowPeeling, |
1146 | Optional<bool> ProvidedAllowProfileBasedPeeling, |
1147 | Optional<unsigned> ProvidedFullUnrollMaxCount) { |
1148 | LLVM_DEBUG(dbgs() << "Loop Unroll: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"; } } while (false) |
1149 | << L->getHeader()->getParent()->getName() << "] Loop %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"; } } while (false) |
1150 | << L->getHeader()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"; } } while (false); |
1151 | TransformationMode TM = hasUnrollTransformation(L); |
1152 | if (TM & TM_Disable) |
1153 | return LoopUnrollResult::Unmodified; |
1154 | if (!L->isLoopSimplifyForm()) { |
1155 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which is not in loop-simplify form.\n" ; } } while (false) |
1156 | dbgs() << " Not unrolling loop which is not in loop-simplify form.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which is not in loop-simplify form.\n" ; } } while (false); |
1157 | return LoopUnrollResult::Unmodified; |
1158 | } |
1159 | |
1160 | // When automatic unrolling is disabled, do not unroll unless overridden for |
1161 | // this loop. |
1162 | if (OnlyWhenForced && !(TM & TM_Enable)) |
1163 | return LoopUnrollResult::Unmodified; |
1164 | |
1165 | bool OptForSize = L->getHeader()->getParent()->hasOptSize(); |
1166 | unsigned NumInlineCandidates; |
1167 | bool NotDuplicatable; |
1168 | bool Convergent; |
1169 | TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( |
1170 | L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount, |
1171 | ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, |
1172 | ProvidedFullUnrollMaxCount); |
1173 | TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( |
1174 | L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true); |
1175 | |
1176 | // Exit early if unrolling is disabled. For OptForSize, we pick the loop size |
1177 | // as threshold later on. |
1178 | if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) && |
1179 | !OptForSize) |
1180 | return LoopUnrollResult::Unmodified; |
1181 | |
1182 | SmallPtrSet<const Value *, 32> EphValues; |
1183 | CodeMetrics::collectEphemeralValues(L, &AC, EphValues); |
1184 | |
1185 | unsigned LoopSize = |
1186 | ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent, |
1187 | TTI, EphValues, UP.BEInsns); |
1188 | LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Loop Size = " << LoopSize << "\n"; } } while (false); |
1189 | if (NotDuplicatable) { |
1190 | LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"; } } while (false) |
1191 | << " instructions.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"; } } while (false); |
1192 | return LoopUnrollResult::Unmodified; |
1193 | } |
1194 | |
1195 | // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold |
1196 | // later), to (fully) unroll loops, if it does not increase code size. |
1197 | if (OptForSize) |
1198 | UP.Threshold = std::max(UP.Threshold, LoopSize + 1); |
1199 | |
1200 | if (NumInlineCandidates != 0) { |
1201 | LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop with inlinable calls.\n" ; } } while (false); |
1202 | return LoopUnrollResult::Unmodified; |
1203 | } |
1204 | |
1205 | // Find the smallest exact trip count for any exit. This is an upper bound |
1206 | // on the loop trip count, but an exit at an earlier iteration is still |
1207 | // possible. An unroll by the smallest exact trip count guarantees that all |
1208 | // brnaches relating to at least one exit can be eliminated. This is unlike |
1209 | // the max trip count, which only guarantees that the backedge can be broken. |
1210 | unsigned TripCount = 0; |
1211 | unsigned TripMultiple = 1; |
1212 | SmallVector<BasicBlock *, 8> ExitingBlocks; |
1213 | L->getExitingBlocks(ExitingBlocks); |
1214 | for (BasicBlock *ExitingBlock : ExitingBlocks) |
1215 | if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock)) |
1216 | if (!TripCount || TC < TripCount) |
1217 | TripCount = TripMultiple = TC; |
1218 | |
1219 | if (!TripCount) { |
1220 | // If no exact trip count is known, determine the trip multiple of either |
1221 | // the loop latch or the single exiting block. |
1222 | // TODO: Relax for multiple exits. |
1223 | BasicBlock *ExitingBlock = L->getLoopLatch(); |
1224 | if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) |
1225 | ExitingBlock = L->getExitingBlock(); |
1226 | if (ExitingBlock) |
1227 | TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); |
1228 | } |
1229 | |
1230 | // If the loop contains a convergent operation, the prelude we'd add |
1231 | // to do the first few instructions before we hit the unrolled loop |
1232 | // is unsafe -- it adds a control-flow dependency to the convergent |
1233 | // operation. Therefore restrict remainder loop (try unrolling without). |
1234 | // |
1235 | // TODO: This is quite conservative. In practice, convergent_op() |
1236 | // is likely to be called unconditionally in the loop. In this |
1237 | // case, the program would be ill-formed (on most architectures) |
1238 | // unless n were the same on all threads in a thread group. |
1239 | // Assuming n is the same on all threads, any kind of unrolling is |
1240 | // safe. But currently llvm's notion of convergence isn't powerful |
1241 | // enough to express this. |
1242 | if (Convergent) |
1243 | UP.AllowRemainder = false; |
1244 | |
1245 | // Try to find the trip count upper bound if we cannot find the exact trip |
1246 | // count. |
1247 | unsigned MaxTripCount = 0; |
1248 | bool MaxOrZero = false; |
1249 | if (!TripCount) { |
1250 | MaxTripCount = SE.getSmallConstantMaxTripCount(L); |
1251 | MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L); |
1252 | } |
1253 | |
1254 | // computeUnrollCount() decides whether it is beneficial to use upper bound to |
1255 | // fully unroll the loop. |
1256 | bool UseUpperBound = false; |
1257 | bool IsCountSetExplicitly = computeUnrollCount( |
1258 | L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, |
1259 | TripMultiple, LoopSize, UP, PP, UseUpperBound); |
1260 | if (!UP.Count) |
1261 | return LoopUnrollResult::Unmodified; |
1262 | |
1263 | if (PP.PeelCount) { |
1264 | assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step")(static_cast <bool> (UP.Count == 1 && "Cannot perform peel and unroll in the same step" ) ? void (0) : __assert_fail ("UP.Count == 1 && \"Cannot perform peel and unroll in the same step\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 1264, __extension__ __PRETTY_FUNCTION__)); |
1265 | LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "PEELING loop %" << L ->getHeader()->getName() << " with iteration count " << PP.PeelCount << "!\n"; } } while (false) |
1266 | << " with iteration count " << PP.PeelCount << "!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "PEELING loop %" << L ->getHeader()->getName() << " with iteration count " << PP.PeelCount << "!\n"; } } while (false); |
1267 | ORE.emit([&]() { |
1268 | return OptimizationRemark(DEBUG_TYPE"loop-unroll", "Peeled", L->getStartLoc(), |
1269 | L->getHeader()) |
1270 | << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount) |
1271 | << " iterations"; |
1272 | }); |
1273 | |
1274 | if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) { |
1275 | simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI); |
1276 | // If the loop was peeled, we already "used up" the profile information |
1277 | // we had, so we don't want to unroll or peel again. |
1278 | if (PP.PeelProfiledIterations) |
1279 | L->setLoopAlreadyUnrolled(); |
1280 | return LoopUnrollResult::PartiallyUnrolled; |
1281 | } |
1282 | return LoopUnrollResult::Unmodified; |
1283 | } |
1284 | |
1285 | // At this point, UP.Runtime indicates that run-time unrolling is allowed. |
1286 | // However, we only want to actually perform it if we don't know the trip |
1287 | // count and the unroll count doesn't divide the known trip multiple. |
1288 | // TODO: This decision should probably be pushed up into |
1289 | // computeUnrollCount(). |
1290 | UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0; |
1291 | |
1292 | // Save loop properties before it is transformed. |
1293 | MDNode *OrigLoopID = L->getLoopID(); |
1294 | |
1295 | // Unroll the loop. |
1296 | Loop *RemainderLoop = nullptr; |
1297 | LoopUnrollResult UnrollResult = UnrollLoop( |
1298 | L, |
1299 | {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, |
1300 | UP.UnrollRemainder, ForgetAllSCEV}, |
1301 | LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); |
1302 | if (UnrollResult == LoopUnrollResult::Unmodified) |
1303 | return LoopUnrollResult::Unmodified; |
1304 | |
1305 | if (RemainderLoop) { |
1306 | Optional<MDNode *> RemainderLoopID = |
1307 | makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, |
1308 | LLVMLoopUnrollFollowupRemainder}); |
1309 | if (RemainderLoopID.hasValue()) |
1310 | RemainderLoop->setLoopID(RemainderLoopID.getValue()); |
1311 | } |
1312 | |
1313 | if (UnrollResult != LoopUnrollResult::FullyUnrolled) { |
1314 | Optional<MDNode *> NewLoopID = |
1315 | makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, |
1316 | LLVMLoopUnrollFollowupUnrolled}); |
1317 | if (NewLoopID.hasValue()) { |
1318 | L->setLoopID(NewLoopID.getValue()); |
1319 | |
1320 | // Do not setLoopAlreadyUnrolled if loop attributes have been specified |
1321 | // explicitly. |
1322 | return UnrollResult; |
1323 | } |
1324 | } |
1325 | |
1326 | // If loop has an unroll count pragma or unrolled by explicitly set count |
1327 | // mark loop as unrolled to prevent unrolling beyond that requested. |
1328 | if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly) |
1329 | L->setLoopAlreadyUnrolled(); |
1330 | |
1331 | return UnrollResult; |
1332 | } |
1333 | |
1334 | namespace { |
1335 | |
1336 | class LoopUnroll : public LoopPass { |
1337 | public: |
1338 | static char ID; // Pass ID, replacement for typeid |
1339 | |
1340 | int OptLevel; |
1341 | |
1342 | /// If false, use a cost model to determine whether unrolling of a loop is |
1343 | /// profitable. If true, only loops that explicitly request unrolling via |
1344 | /// metadata are considered. All other loops are skipped. |
1345 | bool OnlyWhenForced; |
1346 | |
1347 | /// If false, when SCEV is invalidated, only forget everything in the |
1348 | /// top-most loop (call forgetTopMostLoop), of the loop being processed. |
1349 | /// Otherwise, forgetAllLoops and rebuild when needed next. |
1350 | bool ForgetAllSCEV; |
1351 | |
1352 | Optional<unsigned> ProvidedCount; |
1353 | Optional<unsigned> ProvidedThreshold; |
1354 | Optional<bool> ProvidedAllowPartial; |
1355 | Optional<bool> ProvidedRuntime; |
1356 | Optional<bool> ProvidedUpperBound; |
1357 | Optional<bool> ProvidedAllowPeeling; |
1358 | Optional<bool> ProvidedAllowProfileBasedPeeling; |
1359 | Optional<unsigned> ProvidedFullUnrollMaxCount; |
1360 | |
1361 | LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, |
1362 | bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None, |
1363 | Optional<unsigned> Count = None, |
1364 | Optional<bool> AllowPartial = None, Optional<bool> Runtime = None, |
1365 | Optional<bool> UpperBound = None, |
1366 | Optional<bool> AllowPeeling = None, |
1367 | Optional<bool> AllowProfileBasedPeeling = None, |
1368 | Optional<unsigned> ProvidedFullUnrollMaxCount = None) |
1369 | : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), |
1370 | ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), |
1371 | ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), |
1372 | ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), |
1373 | ProvidedAllowPeeling(AllowPeeling), |
1374 | ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), |
1375 | ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { |
1376 | initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); |
1377 | } |
1378 | |
1379 | bool runOnLoop(Loop *L, LPPassManager &LPM) override { |
1380 | if (skipLoop(L)) |
1381 | return false; |
1382 | |
1383 | Function &F = *L->getHeader()->getParent(); |
1384 | |
1385 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1386 | LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
1387 | ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
1388 | const TargetTransformInfo &TTI = |
1389 | getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
1390 | auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
1391 | // For the old PM, we can't use OptimizationRemarkEmitter as an analysis |
1392 | // pass. Function analyses need to be preserved across loop transformations |
1393 | // but ORE cannot be preserved (see comment before the pass definition). |
1394 | OptimizationRemarkEmitter ORE(&F); |
1395 | bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); |
1396 | |
1397 | LoopUnrollResult Result = tryToUnrollLoop( |
1398 | L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, |
1399 | OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, |
1400 | ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, |
1401 | ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, |
1402 | ProvidedFullUnrollMaxCount); |
1403 | |
1404 | if (Result == LoopUnrollResult::FullyUnrolled) |
1405 | LPM.markLoopAsDeleted(*L); |
1406 | |
1407 | return Result != LoopUnrollResult::Unmodified; |
1408 | } |
1409 | |
1410 | /// This transformation requires natural loop information & requires that |
1411 | /// loop preheaders be inserted into the CFG... |
1412 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1413 | AU.addRequired<AssumptionCacheTracker>(); |
1414 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
1415 | // FIXME: Loop passes are required to preserve domtree, and for now we just |
1416 | // recreate dom info if anything gets unrolled. |
1417 | getLoopAnalysisUsage(AU); |
1418 | } |
1419 | }; |
1420 | |
1421 | } // end anonymous namespace |
1422 | |
1423 | char LoopUnroll::ID = 0; |
1424 | |
1425 | INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)static void *initializeLoopUnrollPassOnce(PassRegistry &Registry ) { |
1426 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); |
1427 | INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry); |
1428 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); |
1429 | INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)PassInfo *PI = new PassInfo( "Unroll loops", "loop-unroll", & LoopUnroll::ID, PassInfo::NormalCtor_t(callDefaultCtor<LoopUnroll >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeLoopUnrollPassFlag; void llvm::initializeLoopUnrollPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopUnrollPassFlag, initializeLoopUnrollPassOnce , std::ref(Registry)); } |
1430 | |
1431 | Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced, |
1432 | bool ForgetAllSCEV, int Threshold, int Count, |
1433 | int AllowPartial, int Runtime, int UpperBound, |
1434 | int AllowPeeling) { |
1435 | // TODO: It would make more sense for this function to take the optionals |
1436 | // directly, but that's dangerous since it would silently break out of tree |
1437 | // callers. |
1438 | return new LoopUnroll( |
1439 | OptLevel, OnlyWhenForced, ForgetAllSCEV, |
1440 | Threshold == -1 ? None : Optional<unsigned>(Threshold), |
1441 | Count == -1 ? None : Optional<unsigned>(Count), |
1442 | AllowPartial == -1 ? None : Optional<bool>(AllowPartial), |
1443 | Runtime == -1 ? None : Optional<bool>(Runtime), |
1444 | UpperBound == -1 ? None : Optional<bool>(UpperBound), |
1445 | AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling)); |
1446 | } |
1447 | |
1448 | Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced, |
1449 | bool ForgetAllSCEV) { |
1450 | return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1, |
1451 | 0, 0, 0, 1); |
1452 | } |
1453 | |
1454 | PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, |
1455 | LoopStandardAnalysisResults &AR, |
1456 | LPMUpdater &Updater) { |
1457 | // For the new PM, we can't use OptimizationRemarkEmitter as an analysis |
1458 | // pass. Function analyses need to be preserved across loop transformations |
1459 | // but ORE cannot be preserved (see comment before the pass definition). |
1460 | OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); |
1461 | |
1462 | // Keep track of the previous loop structure so we can identify new loops |
1463 | // created by unrolling. |
1464 | Loop *ParentL = L.getParentLoop(); |
1465 | SmallPtrSet<Loop *, 4> OldLoops; |
1466 | if (ParentL) |
1467 | OldLoops.insert(ParentL->begin(), ParentL->end()); |
1468 | else |
1469 | OldLoops.insert(AR.LI.begin(), AR.LI.end()); |
1470 | |
1471 | std::string LoopName = std::string(L.getName()); |
1472 | |
1473 | bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, |
1474 | /*BFI*/ nullptr, /*PSI*/ nullptr, |
1475 | /*PreserveLCSSA*/ true, OptLevel, |
1476 | OnlyWhenForced, ForgetSCEV, /*Count*/ None, |
1477 | /*Threshold*/ None, /*AllowPartial*/ false, |
1478 | /*Runtime*/ false, /*UpperBound*/ false, |
1479 | /*AllowPeeling*/ true, |
1480 | /*AllowProfileBasedPeeling*/ false, |
1481 | /*FullUnrollMaxCount*/ None) != |
1482 | LoopUnrollResult::Unmodified; |
1483 | if (!Changed) |
1484 | return PreservedAnalyses::all(); |
1485 | |
1486 | // The parent must not be damaged by unrolling! |
1487 | #ifndef NDEBUG |
1488 | if (ParentL) |
1489 | ParentL->verifyLoop(); |
1490 | #endif |
1491 | |
1492 | // Unrolling can do several things to introduce new loops into a loop nest: |
1493 | // - Full unrolling clones child loops within the current loop but then |
1494 | // removes the current loop making all of the children appear to be new |
1495 | // sibling loops. |
1496 | // |
1497 | // When a new loop appears as a sibling loop after fully unrolling, |
1498 | // its nesting structure has fundamentally changed and we want to revisit |
1499 | // it to reflect that. |
1500 | // |
1501 | // When unrolling has removed the current loop, we need to tell the |
1502 | // infrastructure that it is gone. |
1503 | // |
1504 | // Finally, we support a debugging/testing mode where we revisit child loops |
1505 | // as well. These are not expected to require further optimizations as either |
1506 | // they or the loop they were cloned from have been directly visited already. |
1507 | // But the debugging mode allows us to check this assumption. |
1508 | bool IsCurrentLoopValid = false; |
1509 | SmallVector<Loop *, 4> SibLoops; |
1510 | if (ParentL) |
1511 | SibLoops.append(ParentL->begin(), ParentL->end()); |
1512 | else |
1513 | SibLoops.append(AR.LI.begin(), AR.LI.end()); |
1514 | erase_if(SibLoops, [&](Loop *SibLoop) { |
1515 | if (SibLoop == &L) { |
1516 | IsCurrentLoopValid = true; |
1517 | return true; |
1518 | } |
1519 | |
1520 | // Otherwise erase the loop from the list if it was in the old loops. |
1521 | return OldLoops.contains(SibLoop); |
1522 | }); |
1523 | Updater.addSiblingLoops(SibLoops); |
1524 | |
1525 | if (!IsCurrentLoopValid) { |
1526 | Updater.markLoopAsDeleted(L, LoopName); |
1527 | } else { |
1528 | // We can only walk child loops if the current loop remained valid. |
1529 | if (UnrollRevisitChildLoops) { |
1530 | // Walk *all* of the child loops. |
1531 | SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end()); |
1532 | Updater.addChildLoops(ChildLoops); |
1533 | } |
1534 | } |
1535 | |
1536 | return getLoopPassPreservedAnalyses(); |
1537 | } |
1538 | |
1539 | PreservedAnalyses LoopUnrollPass::run(Function &F, |
1540 | FunctionAnalysisManager &AM) { |
1541 | auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); |
1542 | auto &LI = AM.getResult<LoopAnalysis>(F); |
1543 | auto &TTI = AM.getResult<TargetIRAnalysis>(F); |
1544 | auto &DT = AM.getResult<DominatorTreeAnalysis>(F); |
1545 | auto &AC = AM.getResult<AssumptionAnalysis>(F); |
1546 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); |
1547 | |
1548 | LoopAnalysisManager *LAM = nullptr; |
1549 | if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F)) |
1550 | LAM = &LAMProxy->getManager(); |
1551 | |
1552 | auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); |
1553 | ProfileSummaryInfo *PSI = |
1554 | MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); |
1555 | auto *BFI = (PSI && PSI->hasProfileSummary()) ? |
1556 | &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; |
1557 | |
1558 | bool Changed = false; |
1559 | |
1560 | // The unroller requires loops to be in simplified form, and also needs LCSSA. |
1561 | // Since simplification may add new inner loops, it has to run before the |
1562 | // legality and profitability checks. This means running the loop unroller |
1563 | // will simplify all loops, regardless of whether anything end up being |
1564 | // unrolled. |
1565 | for (auto &L : LI) { |
1566 | Changed |= |
1567 | simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */); |
1568 | Changed |= formLCSSARecursively(*L, DT, &LI, &SE); |
1569 | } |
1570 | |
1571 | // Add the loop nests in the reverse order of LoopInfo. See method |
1572 | // declaration. |
1573 | SmallPriorityWorklist<Loop *, 4> Worklist; |
1574 | appendLoopsToWorklist(LI, Worklist); |
1575 | |
1576 | while (!Worklist.empty()) { |
1577 | // Because the LoopInfo stores the loops in RPO, we walk the worklist |
1578 | // from back to front so that we work forward across the CFG, which |
1579 | // for unrolling is only needed to get optimization remarks emitted in |
1580 | // a forward order. |
1581 | Loop &L = *Worklist.pop_back_val(); |
1582 | #ifndef NDEBUG |
1583 | Loop *ParentL = L.getParentLoop(); |
1584 | #endif |
1585 | |
1586 | // Check if the profile summary indicates that the profiled application |
1587 | // has a huge working set size, in which case we disable peeling to avoid |
1588 | // bloating it further. |
1589 | Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling; |
1590 | if (PSI && PSI->hasHugeWorkingSetSize()) |
1591 | LocalAllowPeeling = false; |
1592 | std::string LoopName = std::string(L.getName()); |
1593 | // The API here is quite complex to call and we allow to select some |
1594 | // flavors of unrolling during construction time (by setting UnrollOpts). |
1595 | LoopUnrollResult Result = tryToUnrollLoop( |
1596 | &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI, |
1597 | /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, |
1598 | UnrollOpts.ForgetSCEV, /*Count*/ None, |
1599 | /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, |
1600 | UnrollOpts.AllowUpperBound, LocalAllowPeeling, |
1601 | UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); |
1602 | Changed |= Result != LoopUnrollResult::Unmodified; |
1603 | |
1604 | // The parent must not be damaged by unrolling! |
1605 | #ifndef NDEBUG |
1606 | if (Result != LoopUnrollResult::Unmodified && ParentL) |
1607 | ParentL->verifyLoop(); |
1608 | #endif |
1609 | |
1610 | // Clear any cached analysis results for L if we removed it completely. |
1611 | if (LAM && Result == LoopUnrollResult::FullyUnrolled) |
1612 | LAM->clear(L, LoopName); |
1613 | } |
1614 | |
1615 | if (!Changed) |
1616 | return PreservedAnalyses::all(); |
1617 | |
1618 | return getLoopPassPreservedAnalyses(); |
1619 | } |
1620 | |
1621 | void LoopUnrollPass::printPipeline( |
1622 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
1623 | static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline( |
1624 | OS, MapClassName2PassName); |
1625 | OS << "<"; |
1626 | if (UnrollOpts.AllowPartial != None) |
1627 | OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;"; |
1628 | if (UnrollOpts.AllowPeeling != None) |
1629 | OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;"; |
1630 | if (UnrollOpts.AllowRuntime != None) |
1631 | OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;"; |
1632 | if (UnrollOpts.AllowUpperBound != None) |
1633 | OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;"; |
1634 | if (UnrollOpts.AllowProfileBasedPeeling != None) |
1635 | OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-") |
1636 | << "profile-peeling;"; |
1637 | if (UnrollOpts.FullUnrollMaxCount != None) |
1638 | OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";"; |
1639 | OS << "O" << UnrollOpts.OptLevel; |
1640 | OS << ">"; |
1641 | } |