/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Bug Summary

File:	llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Warning:	line 987, column 5 Value stored to 'TripCount' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name LoopUnrollPass.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/build-llvm -ferror-limit 19 -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-11-10-160236-22541-1 -x c++ /build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

1	//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass implements a simple loop unroller. It works best when loops have
10	// been canonicalized by the -indvars pass, allowing it to determine the trip
11	// counts of loops easily.
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/DenseMapInfo.h"
17	#include "llvm/ADT/DenseSet.h"
18	#include "llvm/ADT/None.h"
19	#include "llvm/ADT/Optional.h"
20	#include "llvm/ADT/STLExtras.h"
21	#include "llvm/ADT/SetVector.h"
22	#include "llvm/ADT/SmallPtrSet.h"
23	#include "llvm/ADT/SmallVector.h"
24	#include "llvm/ADT/StringRef.h"
25	#include "llvm/Analysis/AssumptionCache.h"
26	#include "llvm/Analysis/BlockFrequencyInfo.h"
27	#include "llvm/Analysis/CodeMetrics.h"
28	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
29	#include "llvm/Analysis/LoopAnalysisManager.h"
30	#include "llvm/Analysis/LoopInfo.h"
31	#include "llvm/Analysis/LoopPass.h"
32	#include "llvm/Analysis/LoopUnrollAnalyzer.h"
33	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
34	#include "llvm/Analysis/ProfileSummaryInfo.h"
35	#include "llvm/Analysis/ScalarEvolution.h"
36	#include "llvm/Analysis/TargetTransformInfo.h"
37	#include "llvm/IR/BasicBlock.h"
38	#include "llvm/IR/CFG.h"
39	#include "llvm/IR/Constant.h"
40	#include "llvm/IR/Constants.h"
41	#include "llvm/IR/DiagnosticInfo.h"
42	#include "llvm/IR/Dominators.h"
43	#include "llvm/IR/Function.h"
44	#include "llvm/IR/Instruction.h"
45	#include "llvm/IR/Instructions.h"
46	#include "llvm/IR/IntrinsicInst.h"
47	#include "llvm/IR/Metadata.h"
48	#include "llvm/IR/PassManager.h"
49	#include "llvm/InitializePasses.h"
50	#include "llvm/Pass.h"
51	#include "llvm/Support/Casting.h"
52	#include "llvm/Support/CommandLine.h"
53	#include "llvm/Support/Debug.h"
54	#include "llvm/Support/ErrorHandling.h"
55	#include "llvm/Support/raw_ostream.h"
56	#include "llvm/Transforms/Scalar.h"
57	#include "llvm/Transforms/Scalar/LoopPassManager.h"
58	#include "llvm/Transforms/Utils.h"
59	#include "llvm/Transforms/Utils/LoopPeel.h"
60	#include "llvm/Transforms/Utils/LoopSimplify.h"
61	#include "llvm/Transforms/Utils/LoopUtils.h"
62	#include "llvm/Transforms/Utils/SizeOpts.h"
63	#include "llvm/Transforms/Utils/UnrollLoop.h"
64	#include <algorithm>
65	#include <cassert>
66	#include <cstdint>
67	#include <limits>
68	#include <string>
69	#include <tuple>
70	#include <utility>
71
72	using namespace llvm;
73
74	#define DEBUG_TYPE"loop-unroll" "loop-unroll"
75
76	cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
77	"forget-scev-loop-unroll", cl::init(false), cl::Hidden,
78	cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
79	" the current top-most loop. This is sometimes preferred to reduce"
80	" compile time."));
81
82	static cl::opt<unsigned>
83	UnrollThreshold("unroll-threshold", cl::Hidden,
84	cl::desc("The cost threshold for loop unrolling"));
85
86	static cl::opt<unsigned>
87	UnrollOptSizeThreshold(
88	"unroll-optsize-threshold", cl::init(0), cl::Hidden,
89	cl::desc("The cost threshold for loop unrolling when optimizing for "
90	"size"));
91
92	static cl::opt<unsigned> UnrollPartialThreshold(
93	"unroll-partial-threshold", cl::Hidden,
94	cl::desc("The cost threshold for partial loop unrolling"));
95
96	static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
97	"unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
98	cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
99	"to the threshold when aggressively unrolling a loop due to the "
100	"dynamic cost savings. If completely unrolling a loop will reduce "
101	"the total runtime from X to Y, we boost the loop unroll "
102	"threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103	"X/Y). This limit avoids excessive code bloat."));
104
105	static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
106	"unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
107	cl::desc("Don't allow loop unrolling to simulate more than this number of"
108	"iterations when checking full unroll profitability"));
109
110	static cl::opt<unsigned> UnrollCount(
111	"unroll-count", cl::Hidden,
112	cl::desc("Use this unroll count for all loops including those with "
113	"unroll_count pragma values, for testing purposes"));
114
115	static cl::opt<unsigned> UnrollMaxCount(
116	"unroll-max-count", cl::Hidden,
117	cl::desc("Set the max unroll count for partial and runtime unrolling, for"
118	"testing purposes"));
119
120	static cl::opt<unsigned> UnrollFullMaxCount(
121	"unroll-full-max-count", cl::Hidden,
122	cl::desc(
123	"Set the max unroll count for full unrolling, for testing purposes"));
124
125	static cl::opt<bool>
126	UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
127	cl::desc("Allows loops to be partially unrolled until "
128	"-unroll-threshold loop size is reached."));
129
130	static cl::opt<bool> UnrollAllowRemainder(
131	"unroll-allow-remainder", cl::Hidden,
132	cl::desc("Allow generation of a loop remainder (extra iterations) "
133	"when unrolling a loop."));
134
135	static cl::opt<bool>
136	UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
137	cl::desc("Unroll loops with run-time trip counts"));
138
139	static cl::opt<unsigned> UnrollMaxUpperBound(
140	"unroll-max-upperbound", cl::init(8), cl::Hidden,
141	cl::desc(
142	"The max of trip count upper bound that is considered in unrolling"));
143
144	static cl::opt<unsigned> PragmaUnrollThreshold(
145	"pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
146	cl::desc("Unrolled size limit for loops with an unroll(full) or "
147	"unroll_count pragma."));
148
149	static cl::opt<unsigned> FlatLoopTripCountThreshold(
150	"flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
151	cl::desc("If the runtime tripcount for the loop is lower than the "
152	"threshold, the loop is considered as flat and will be less "
153	"aggressively unrolled."));
154
155	static cl::opt<bool> UnrollUnrollRemainder(
156	"unroll-remainder", cl::Hidden,
157	cl::desc("Allow the loop remainder to be unrolled."));
158
159	// This option isn't ever intended to be enabled, it serves to allow
160	// experiments to check the assumptions about when this kind of revisit is
161	// necessary.
162	static cl::opt<bool> UnrollRevisitChildLoops(
163	"unroll-revisit-child-loops", cl::Hidden,
164	cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
165	"This shouldn't typically be needed as child loops (or their "
166	"clones) were already visited."));
167
168	static cl::opt<unsigned> UnrollThresholdAggressive(
169	"unroll-threshold-aggressive", cl::init(300), cl::Hidden,
170	cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
171	"optimizations"));
172	static cl::opt<unsigned>
173	UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
174	cl::Hidden,
175	cl::desc("Default threshold (max size of unrolled "
176	"loop), used in all but O3 optimizations"));
177
178	/// A magic value for use with the Threshold parameter to indicate
179	/// that the loop unroll should be performed regardless of how much
180	/// code expansion would result.
181	static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
182
183	/// Gather the various unrolling parameters based on the defaults, compiler
184	/// flags, TTI overrides and user specified parameters.
185	TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
186	Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
187	BlockFrequencyInfo BFI, ProfileSummaryInfo PSI,
188	OptimizationRemarkEmitter &ORE, int OptLevel,
189	Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
190	Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
191	Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
192	TargetTransformInfo::UnrollingPreferences UP;
193
194	// Set up the defaults
195	UP.Threshold =
196	OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault;
197	UP.MaxPercentThresholdBoost = 400;
198	UP.OptSizeThreshold = UnrollOptSizeThreshold;
199	UP.PartialThreshold = 150;
200	UP.PartialOptSizeThreshold = UnrollOptSizeThreshold;
201	UP.Count = 0;
202	UP.DefaultUnrollRuntimeCount = 8;
203	UP.MaxCount = std::numeric_limits<unsigned>::max();
204	UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
205	UP.BEInsns = 2;
206	UP.Partial = false;
207	UP.Runtime = false;
208	UP.AllowRemainder = true;
209	UP.UnrollRemainder = false;
210	UP.AllowExpensiveTripCount = false;
211	UP.Force = false;
212	UP.UpperBound = false;
213	UP.UnrollAndJam = false;
214	UP.UnrollAndJamInnerLoopThreshold = 60;
215	UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
216
217	// Override with any target specific settings
218	TTI.getUnrollingPreferences(L, SE, UP, &ORE);
219
220	// Apply size attributes
221	bool OptForSize = L->getHeader()->getParent()->hasOptSize() \|\|
222	// Let unroll hints / pragmas take precedence over PGSO.
223	(hasUnrollTransformation(L) != TM_ForcedByUser &&
224	llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
225	PGSOQueryType::IRPass));
226	if (OptForSize) {
227	UP.Threshold = UP.OptSizeThreshold;
228	UP.PartialThreshold = UP.PartialOptSizeThreshold;
229	UP.MaxPercentThresholdBoost = 100;
230	}
231
232	// Apply any user values specified by cl::opt
233	if (UnrollThreshold.getNumOccurrences() > 0)
234	UP.Threshold = UnrollThreshold;
235	if (UnrollPartialThreshold.getNumOccurrences() > 0)
236	UP.PartialThreshold = UnrollPartialThreshold;
237	if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
238	UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
239	if (UnrollMaxCount.getNumOccurrences() > 0)
240	UP.MaxCount = UnrollMaxCount;
241	if (UnrollFullMaxCount.getNumOccurrences() > 0)
242	UP.FullUnrollMaxCount = UnrollFullMaxCount;
243	if (UnrollAllowPartial.getNumOccurrences() > 0)
244	UP.Partial = UnrollAllowPartial;
245	if (UnrollAllowRemainder.getNumOccurrences() > 0)
246	UP.AllowRemainder = UnrollAllowRemainder;
247	if (UnrollRuntime.getNumOccurrences() > 0)
248	UP.Runtime = UnrollRuntime;
249	if (UnrollMaxUpperBound == 0)
250	UP.UpperBound = false;
251	if (UnrollUnrollRemainder.getNumOccurrences() > 0)
252	UP.UnrollRemainder = UnrollUnrollRemainder;
253	if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
254	UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
255
256	// Apply user values provided by argument
257	if (UserThreshold.hasValue()) {
258	UP.Threshold = *UserThreshold;
259	UP.PartialThreshold = *UserThreshold;
260	}
261	if (UserCount.hasValue())
262	UP.Count = *UserCount;
263	if (UserAllowPartial.hasValue())
264	UP.Partial = *UserAllowPartial;
265	if (UserRuntime.hasValue())
266	UP.Runtime = *UserRuntime;
267	if (UserUpperBound.hasValue())
268	UP.UpperBound = *UserUpperBound;
269	if (UserFullUnrollMaxCount.hasValue())
270	UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
271
272	return UP;
273	}
274
275	namespace {
276
277	/// A struct to densely store the state of an instruction after unrolling at
278	/// each iteration.
279	///
280	/// This is designed to work like a tuple of <Instruction *, int> for the
281	/// purposes of hashing and lookup, but to be able to associate two boolean
282	/// states with each key.
283	struct UnrolledInstState {
284	Instruction *I;
285	int Iteration : 30;
286	unsigned IsFree : 1;
287	unsigned IsCounted : 1;
288	};
289
290	/// Hashing and equality testing for a set of the instruction states.
291	struct UnrolledInstStateKeyInfo {
292	using PtrInfo = DenseMapInfo<Instruction *>;
293	using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
294
295	static inline UnrolledInstState getEmptyKey() {
296	return {PtrInfo::getEmptyKey(), 0, 0, 0};
297	}
298
299	static inline UnrolledInstState getTombstoneKey() {
300	return {PtrInfo::getTombstoneKey(), 0, 0, 0};
301	}
302
303	static inline unsigned getHashValue(const UnrolledInstState &S) {
304	return PairInfo::getHashValue({S.I, S.Iteration});
305	}
306
307	static inline bool isEqual(const UnrolledInstState &LHS,
308	const UnrolledInstState &RHS) {
309	return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
310	}
311	};
312
313	struct EstimatedUnrollCost {
314	/// The estimated cost after unrolling.
315	unsigned UnrolledCost;
316
317	/// The estimated dynamic cost of executing the instructions in the
318	/// rolled form.
319	unsigned RolledDynamicCost;
320	};
321
322	struct PragmaInfo {
323	PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
324	: UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
325	PragmaEnableUnroll(PEU) {}
326	const bool UserUnrollCount;
327	const bool PragmaFullUnroll;
328	const unsigned PragmaCount;
329	const bool PragmaEnableUnroll;
330	};
331
332	} // end anonymous namespace
333
334	/// Figure out if the loop is worth full unrolling.
335	///
336	/// Complete loop unrolling can make some loads constant, and we need to know
337	/// if that would expose any further optimization opportunities. This routine
338	/// estimates this optimization. It computes cost of unrolled loop
339	/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
340	/// dynamic cost we mean that we won't count costs of blocks that are known not
341	/// to be executed (i.e. if we have a branch in the loop and we know that at the
342	/// given iteration its condition would be resolved to true, we won't add up the
343	/// cost of the 'false'-block).
344	/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
345	/// the analysis failed (no benefits expected from the unrolling, or the loop is
346	/// too big to analyze), the returned value is None.
347	static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
348	const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
349	const SmallPtrSetImpl<const Value *> &EphValues,
350	const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
351	unsigned MaxIterationsCountToAnalyze) {
352	// We want to be able to scale offsets by the trip count and add more offsets
353	// to them without checking for overflows, and we already don't want to
354	// analyze massive trip counts, so we force the max to be reasonably small.
355	assert(MaxIterationsCountToAnalyze <(static_cast <bool> (MaxIterationsCountToAnalyze < ( unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!") ? void (0) : __assert_fail ("MaxIterationsCountToAnalyze < (unsigned)(std::numeric_limits<int>::max() / 2) && \"The unroll iterations max is too large!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 357, __extension__ __PRETTY_FUNCTION__))
356	(unsigned)(std::numeric_limits<int>::max() / 2) &&(static_cast <bool> (MaxIterationsCountToAnalyze < ( unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!") ? void (0) : __assert_fail ("MaxIterationsCountToAnalyze < (unsigned)(std::numeric_limits<int>::max() / 2) && \"The unroll iterations max is too large!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 357, __extension__ __PRETTY_FUNCTION__))
357	"The unroll iterations max is too large!")(static_cast <bool> (MaxIterationsCountToAnalyze < ( unsigned)(std::numeric_limits<int>::max() / 2) && "The unroll iterations max is too large!") ? void (0) : __assert_fail ("MaxIterationsCountToAnalyze < (unsigned)(std::numeric_limits<int>::max() / 2) && \"The unroll iterations max is too large!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 357, __extension__ __PRETTY_FUNCTION__));
358
359	// Only analyze inner loops. We can't properly estimate cost of nested loops
360	// and we won't visit inner loops again anyway.
361	if (!L->isInnermost())
362	return None;
363
364	// Don't simulate loops with a big or unknown tripcount
365	if (!TripCount \|\| TripCount > MaxIterationsCountToAnalyze)
366	return None;
367
368	SmallSetVector<BasicBlock *, 16> BBWorklist;
369	SmallSetVector<std::pair<BasicBlock , BasicBlock >, 4> ExitWorklist;
370	DenseMap<Value , Value > SimplifiedValues;
371	SmallVector<std::pair<Value , Value >, 4> SimplifiedInputValues;
372
373	// The estimated cost of the unrolled form of the loop. We try to estimate
374	// this by simplifying as much as we can while computing the estimate.
375	InstructionCost UnrolledCost = 0;
376
377	// We also track the estimated dynamic (that is, actually executed) cost in
378	// the rolled form. This helps identify cases when the savings from unrolling
379	// aren't just exposing dead control flows, but actual reduced dynamic
380	// instructions due to the simplifications which we expect to occur after
381	// unrolling.
382	InstructionCost RolledDynamicCost = 0;
383
384	// We track the simplification of each instruction in each iteration. We use
385	// this to recursively merge costs into the unrolled cost on-demand so that
386	// we don't count the cost of any dead code. This is essentially a map from
387	// <instruction, int> to <bool, bool>, but stored as a densely packed struct.
388	DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
389
390	// A small worklist used to accumulate cost of instructions from each
391	// observable and reached root in the loop.
392	SmallVector<Instruction *, 16> CostWorklist;
393
394	// PHI-used worklist used between iterations while accumulating cost.
395	SmallVector<Instruction *, 4> PHIUsedList;
396
397	// Helper function to accumulate cost for instructions in the loop.
398	auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
399	assert(Iteration >= 0 && "Cannot have a negative iteration!")(static_cast <bool> (Iteration >= 0 && "Cannot have a negative iteration!" ) ? void (0) : __assert_fail ("Iteration >= 0 && \"Cannot have a negative iteration!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 399, __extension__ __PRETTY_FUNCTION__));
400	assert(CostWorklist.empty() && "Must start with an empty cost list")(static_cast <bool> (CostWorklist.empty() && "Must start with an empty cost list" ) ? void (0) : __assert_fail ("CostWorklist.empty() && \"Must start with an empty cost list\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 400, __extension__ __PRETTY_FUNCTION__));
401	assert(PHIUsedList.empty() && "Must start with an empty phi used list")(static_cast <bool> (PHIUsedList.empty() && "Must start with an empty phi used list" ) ? void (0) : __assert_fail ("PHIUsedList.empty() && \"Must start with an empty phi used list\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 401, __extension__ __PRETTY_FUNCTION__));
402	CostWorklist.push_back(&RootI);
403	TargetTransformInfo::TargetCostKind CostKind =
404	RootI.getFunction()->hasMinSize() ?
405	TargetTransformInfo::TCK_CodeSize :
406	TargetTransformInfo::TCK_SizeAndLatency;
407	for (;; --Iteration) {
408	do {
409	Instruction *I = CostWorklist.pop_back_val();
410
411	// InstCostMap only uses I and Iteration as a key, the other two values
412	// don't matter here.
413	auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
414	if (CostIter == InstCostMap.end())
415	// If an input to a PHI node comes from a dead path through the loop
416	// we may have no cost data for it here. What that actually means is
417	// that it is free.
418	continue;
419	auto &Cost = *CostIter;
420	if (Cost.IsCounted)
421	// Already counted this instruction.
422	continue;
423
424	// Mark that we are counting the cost of this instruction now.
425	Cost.IsCounted = true;
426
427	// If this is a PHI node in the loop header, just add it to the PHI set.
428	if (auto *PhiI = dyn_cast<PHINode>(I))
429	if (PhiI->getParent() == L->getHeader()) {
430	assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "(static_cast <bool> (Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " "inherently simplify during unrolling.") ? void (0) : __assert_fail ("Cost.IsFree && \"Loop PHIs shouldn't be evaluated as they \" \"inherently simplify during unrolling.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 431, __extension__ __PRETTY_FUNCTION__))
431	"inherently simplify during unrolling.")(static_cast <bool> (Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " "inherently simplify during unrolling.") ? void (0) : __assert_fail ("Cost.IsFree && \"Loop PHIs shouldn't be evaluated as they \" \"inherently simplify during unrolling.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 431, __extension__ __PRETTY_FUNCTION__));
432	if (Iteration == 0)
433	continue;
434
435	// Push the incoming value from the backedge into the PHI used list
436	// if it is an in-loop instruction. We'll use this to populate the
437	// cost worklist for the next iteration (as we count backwards).
438	if (auto *OpI = dyn_cast<Instruction>(
439	PhiI->getIncomingValueForBlock(L->getLoopLatch())))
440	if (L->contains(OpI))
441	PHIUsedList.push_back(OpI);
442	continue;
443	}
444
445	// First accumulate the cost of this instruction.
446	if (!Cost.IsFree) {
447	UnrolledCost += TTI.getUserCost(I, CostKind);
448	LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "; } } while (false)
449	<< Iteration << "): ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "; } } while (false);
450	LLVM_DEBUG(I->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { I->dump(); } } while (false);
451	}
452
453	// We must count the cost of every operand which is not free,
454	// recursively. If we reach a loop PHI node, simply add it to the set
455	// to be considered on the next iteration (backwards!).
456	for (Value *Op : I->operands()) {
457	// Check whether this operand is free due to being a constant or
458	// outside the loop.
459	auto *OpI = dyn_cast<Instruction>(Op);
460	if (!OpI \|\| !L->contains(OpI))
461	continue;
462
463	// Otherwise accumulate its cost.
464	CostWorklist.push_back(OpI);
465	}
466	} while (!CostWorklist.empty());
467
468	if (PHIUsedList.empty())
469	// We've exhausted the search.
470	break;
471
472	assert(Iteration > 0 &&(static_cast <bool> (Iteration > 0 && "Cannot track PHI-used values past the first iteration!" ) ? void (0) : __assert_fail ("Iteration > 0 && \"Cannot track PHI-used values past the first iteration!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 473, __extension__ __PRETTY_FUNCTION__))
473	"Cannot track PHI-used values past the first iteration!")(static_cast <bool> (Iteration > 0 && "Cannot track PHI-used values past the first iteration!" ) ? void (0) : __assert_fail ("Iteration > 0 && \"Cannot track PHI-used values past the first iteration!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 473, __extension__ __PRETTY_FUNCTION__));
474	CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
475	PHIUsedList.clear();
476	}
477	};
478
479	// Ensure that we don't violate the loop structure invariants relied on by
480	// this analysis.
481	assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.")(static_cast <bool> (L->isLoopSimplifyForm() && "Must put loop into normal form first.") ? void (0) : __assert_fail ("L->isLoopSimplifyForm() && \"Must put loop into normal form first.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 481, __extension__ __PRETTY_FUNCTION__));
482	assert(L->isLCSSAForm(DT) &&(static_cast <bool> (L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values." ) ? void (0) : __assert_fail ("L->isLCSSAForm(DT) && \"Must have loops in LCSSA form to track live-out values.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 483, __extension__ __PRETTY_FUNCTION__))
483	"Must have loops in LCSSA form to track live-out values.")(static_cast <bool> (L->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values." ) ? void (0) : __assert_fail ("L->isLCSSAForm(DT) && \"Must have loops in LCSSA form to track live-out values.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 483, __extension__ __PRETTY_FUNCTION__));
484
485	LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Starting LoopUnroll profitability analysis...\n" ; } } while (false);
486
487	TargetTransformInfo::TargetCostKind CostKind =
488	L->getHeader()->getParent()->hasMinSize() ?
489	TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency;
490	// Simulate execution of each iteration of the loop counting instructions,
491	// which would be simplified.
492	// Since the same load will take different values on different iterations,
493	// we literally have to go through all loop's iterations.
494	for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
495	LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Analyzing iteration " << Iteration << "\n"; } } while (false);
496
497	// Prepare for the iteration by collecting any simplified entry or backedge
498	// inputs.
499	for (Instruction &I : *L->getHeader()) {
500	auto *PHI = dyn_cast<PHINode>(&I);
501	if (!PHI)
502	break;
503
504	// The loop header PHI nodes must have exactly two input: one from the
505	// loop preheader and one from the loop latch.
506	assert((static_cast <bool> (PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch." ) ? void (0) : __assert_fail ("PHI->getNumIncomingValues() == 2 && \"Must have an incoming value only for the preheader and the latch.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 508, __extension__ __PRETTY_FUNCTION__))
507	PHI->getNumIncomingValues() == 2 &&(static_cast <bool> (PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch." ) ? void (0) : __assert_fail ("PHI->getNumIncomingValues() == 2 && \"Must have an incoming value only for the preheader and the latch.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 508, __extension__ __PRETTY_FUNCTION__))
508	"Must have an incoming value only for the preheader and the latch.")(static_cast <bool> (PHI->getNumIncomingValues() == 2 && "Must have an incoming value only for the preheader and the latch." ) ? void (0) : __assert_fail ("PHI->getNumIncomingValues() == 2 && \"Must have an incoming value only for the preheader and the latch.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 508, __extension__ __PRETTY_FUNCTION__));
509
510	Value *V = PHI->getIncomingValueForBlock(
511	Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
512	if (Iteration != 0 && SimplifiedValues.count(V))
513	V = SimplifiedValues.lookup(V);
514	SimplifiedInputValues.push_back({PHI, V});
515	}
516
517	// Now clear and re-populate the map for the next iteration.
518	SimplifiedValues.clear();
519	while (!SimplifiedInputValues.empty())
520	SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
521
522	UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
523
524	BBWorklist.clear();
525	BBWorklist.insert(L->getHeader());
526	// Note that we must not cache the size, this loop grows the worklist.
527	for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
528	BasicBlock *BB = BBWorklist[Idx];
529
530	// Visit all instructions in the given basic block and try to simplify
531	// it. We don't change the actual IR, just count optimization
532	// opportunities.
533	for (Instruction &I : *BB) {
534	// These won't get into the final code - don't even try calculating the
535	// cost for them.
536	if (isa<DbgInfoIntrinsic>(I) \|\| EphValues.count(&I))
537	continue;
538
539	// Track this instruction's expected baseline cost when executing the
540	// rolled loop form.
541	RolledDynamicCost += TTI.getUserCost(&I, CostKind);
542
543	// Visit the instruction to analyze its loop cost after unrolling,
544	// and if the visitor returns true, mark the instruction as free after
545	// unrolling and continue.
546	bool IsFree = Analyzer.visit(I);
547	bool Inserted = InstCostMap.insert({&I, (int)Iteration,
548	(unsigned)IsFree,
549	/IsCounted/ false}).second;
550	(void)Inserted;
551	assert(Inserted && "Cannot have a state for an unvisited instruction!")(static_cast <bool> (Inserted && "Cannot have a state for an unvisited instruction!" ) ? void (0) : __assert_fail ("Inserted && \"Cannot have a state for an unvisited instruction!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 551, __extension__ __PRETTY_FUNCTION__));
552
553	if (IsFree)
554	continue;
555
556	// Can't properly model a cost of a call.
557	// FIXME: With a proper cost model we should be able to do it.
558	if (auto *CI = dyn_cast<CallInst>(&I)) {
559	const Function *Callee = CI->getCalledFunction();
560	if (!Callee \|\| TTI.isLoweredToCall(Callee)) {
561	LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Can't analyze cost of loop with call\n" ; } } while (false);
562	return None;
563	}
564	}
565
566	// If the instruction might have a side-effect recursively account for
567	// the cost of it and all the instructions leading up to it.
568	if (I.mayHaveSideEffects())
569	AddCostRecursively(I, Iteration);
570
571	// If unrolled body turns out to be too big, bail out.
572	if (UnrolledCost > MaxUnrolledLoopSize) {
573	LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false )
574	<< " UnrolledCost: " << UnrolledCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false )
575	<< ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false )
576	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Exceeded threshold.. exiting.\n" << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"; } } while (false );
577	return None;
578	}
579	}
580
581	Instruction *TI = BB->getTerminator();
582
583	auto getSimplifiedConstant = [&](Value V) -> Constant {
584	if (SimplifiedValues.count(V))
585	V = SimplifiedValues.lookup(V);
586	return dyn_cast<Constant>(V);
587	};
588
589	// Add in the live successors by first checking whether we have terminator
590	// that may be simplified based on the values simplified by this call.
591	BasicBlock *KnownSucc = nullptr;
592	if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
593	if (BI->isConditional()) {
594	if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
595	// Just take the first successor if condition is undef
596	if (isa<UndefValue>(SimpleCond))
597	KnownSucc = BI->getSuccessor(0);
598	else if (ConstantInt *SimpleCondVal =
599	dyn_cast<ConstantInt>(SimpleCond))
600	KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
601	}
602	}
603	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
604	if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
605	// Just take the first successor if condition is undef
606	if (isa<UndefValue>(SimpleCond))
607	KnownSucc = SI->getSuccessor(0);
608	else if (ConstantInt *SimpleCondVal =
609	dyn_cast<ConstantInt>(SimpleCond))
610	KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
611	}
612	}
613	if (KnownSucc) {
614	if (L->contains(KnownSucc))
615	BBWorklist.insert(KnownSucc);
616	else
617	ExitWorklist.insert({BB, KnownSucc});
618	continue;
619	}
620
621	// Add BB's successors to the worklist.
622	for (BasicBlock *Succ : successors(BB))
623	if (L->contains(Succ))
624	BBWorklist.insert(Succ);
625	else
626	ExitWorklist.insert({BB, Succ});
627	AddCostRecursively(*TI, Iteration);
628	}
629
630	// If we found no optimization opportunities on the first iteration, we
631	// won't find them on later ones too.
632	if (UnrolledCost == RolledDynamicCost) {
633	LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n" ; } } while (false)
634	<< " UnrolledCost: " << UnrolledCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n" ; } } while (false);
635	return None;
636	}
637	}
638
639	while (!ExitWorklist.empty()) {
640	BasicBlock ExitingBB, ExitBB;
641	std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
642
643	for (Instruction &I : *ExitBB) {
644	auto *PN = dyn_cast<PHINode>(&I);
645	if (!PN)
646	break;
647
648	Value *Op = PN->getIncomingValueForBlock(ExitingBB);
649	if (auto *OpI = dyn_cast<Instruction>(Op))
650	if (L->contains(OpI))
651	AddCostRecursively(*OpI, TripCount - 1);
652	}
653	}
654
655	assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&(static_cast <bool> (UnrolledCost.isValid() && RolledDynamicCost .isValid() && "All instructions must have a valid cost, whether the " "loop is rolled or unrolled.") ? void (0) : __assert_fail ("UnrolledCost.isValid() && RolledDynamicCost.isValid() && \"All instructions must have a valid cost, whether the \" \"loop is rolled or unrolled.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 657, __extension__ __PRETTY_FUNCTION__))
656	"All instructions must have a valid cost, whether the "(static_cast <bool> (UnrolledCost.isValid() && RolledDynamicCost .isValid() && "All instructions must have a valid cost, whether the " "loop is rolled or unrolled.") ? void (0) : __assert_fail ("UnrolledCost.isValid() && RolledDynamicCost.isValid() && \"All instructions must have a valid cost, whether the \" \"loop is rolled or unrolled.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 657, __extension__ __PRETTY_FUNCTION__))
657	"loop is rolled or unrolled.")(static_cast <bool> (UnrolledCost.isValid() && RolledDynamicCost .isValid() && "All instructions must have a valid cost, whether the " "loop is rolled or unrolled.") ? void (0) : __assert_fail ("UnrolledCost.isValid() && RolledDynamicCost.isValid() && \"All instructions must have a valid cost, whether the \" \"loop is rolled or unrolled.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 657, __extension__ __PRETTY_FUNCTION__));
658
659	LLVM_DEBUG(dbgs() << "Analysis finished:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n" ; } } while (false)
660	<< "UnrolledCost: " << UnrolledCost << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n" ; } } while (false)
661	<< "RolledDynamicCost: " << RolledDynamicCost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Analysis finished:\n" << "UnrolledCost: " << UnrolledCost << ", " << "RolledDynamicCost: " << RolledDynamicCost << "\n" ; } } while (false);
662	return {{unsigned(*UnrolledCost.getValue()),
663	unsigned(*RolledDynamicCost.getValue())}};
664	}
665
666	/// ApproximateLoopSize - Approximate the size of the loop.
667	unsigned llvm::ApproximateLoopSize(
668	const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
669	const TargetTransformInfo &TTI,
670	const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
671	CodeMetrics Metrics;
672	for (BasicBlock *BB : L->blocks())
673	Metrics.analyzeBasicBlock(BB, TTI, EphValues);
674	NumCalls = Metrics.NumInlineCandidates;
675	NotDuplicatable = Metrics.notDuplicatable;
676	Convergent = Metrics.convergent;
677
678	unsigned LoopSize = Metrics.NumInsts;
679
680	// Don't allow an estimate of size zero. This would allows unrolling of loops
681	// with huge iteration counts, which is a compile time problem even if it's
682	// not a problem for code quality. Also, the code using this size may assume
683	// that each loop has at least three instructions (likely a conditional
684	// branch, a comparison feeding that branch, and some kind of loop increment
685	// feeding that comparison instruction).
686	LoopSize = std::max(LoopSize, BEInsns + 1);
687
688	return LoopSize;
689	}
690
691	// Returns the loop hint metadata node with the given name (for example,
692	// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
693	// returned.
694	static MDNode getUnrollMetadataForLoop(const Loop L, StringRef Name) {
695	if (MDNode *LoopID = L->getLoopID())
696	return GetUnrollMetadata(LoopID, Name);
697	return nullptr;
698	}
699
700	// Returns true if the loop has an unroll(full) pragma.
701	static bool hasUnrollFullPragma(const Loop *L) {
702	return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
703	}
704
705	// Returns true if the loop has an unroll(enable) pragma. This metadata is used
706	// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
707	static bool hasUnrollEnablePragma(const Loop *L) {
708	return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
709	}
710
711	// Returns true if the loop has an runtime unroll(disable) pragma.
712	static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
713	return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
714	}
715
716	// If loop has an unroll_count pragma return the (necessarily
717	// positive) value from the pragma. Otherwise return 0.
718	static unsigned unrollCountPragmaValue(const Loop *L) {
719	MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
720	if (MD) {
721	assert(MD->getNumOperands() == 2 &&(static_cast <bool> (MD->getNumOperands() == 2 && "Unroll count hint metadata should have two operands.") ? void (0) : __assert_fail ("MD->getNumOperands() == 2 && \"Unroll count hint metadata should have two operands.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 722, __extension__ __PRETTY_FUNCTION__))
722	"Unroll count hint metadata should have two operands.")(static_cast <bool> (MD->getNumOperands() == 2 && "Unroll count hint metadata should have two operands.") ? void (0) : __assert_fail ("MD->getNumOperands() == 2 && \"Unroll count hint metadata should have two operands.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 722, __extension__ __PRETTY_FUNCTION__));
723	unsigned Count =
724	mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
725	assert(Count >= 1 && "Unroll count must be positive.")(static_cast <bool> (Count >= 1 && "Unroll count must be positive." ) ? void (0) : __assert_fail ("Count >= 1 && \"Unroll count must be positive.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 725, __extension__ __PRETTY_FUNCTION__));
726	return Count;
727	}
728	return 0;
729	}
730
731	// Computes the boosting factor for complete unrolling.
732	// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
733	// be beneficial to fully unroll the loop even if unrolledcost is large. We
734	// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
735	// the unroll threshold.
736	static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
737	unsigned MaxPercentThresholdBoost) {
738	if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
739	return 100;
740	else if (Cost.UnrolledCost != 0)
741	// The boosting factor is RolledDynamicCost / UnrolledCost
742	return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
743	MaxPercentThresholdBoost);
744	else
745	return MaxPercentThresholdBoost;
746	}
747
748	// Produce an estimate of the unrolled cost of the specified loop. This
749	// is used to a) produce a cost estimate for partial unrolling and b) to
750	// cheaply estimate cost for full unrolling when we don't want to symbolically
751	// evaluate all iterations.
752	class UnrollCostEstimator {
753	const unsigned LoopSize;
754
755	public:
756	UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
757
758	// Returns loop size estimation for unrolled loop, given the unrolling
759	// configuration specified by UP.
760	uint64_t
761	getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
762	const unsigned CountOverwrite = 0) const {
763	assert(LoopSize >= UP.BEInsns &&(static_cast <bool> (LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!") ? void (0) : __assert_fail ("LoopSize >= UP.BEInsns && \"LoopSize should not be less than BEInsns!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 764, __extension__ __PRETTY_FUNCTION__))
764	"LoopSize should not be less than BEInsns!")(static_cast <bool> (LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!") ? void (0) : __assert_fail ("LoopSize >= UP.BEInsns && \"LoopSize should not be less than BEInsns!\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 764, __extension__ __PRETTY_FUNCTION__));
765	if (CountOverwrite)
766	return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
767	UP.BEInsns;
768	else
769	return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
770	UP.BEInsns;
771	}
772	};
773
774	static Optional<unsigned>
775	shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
776	const unsigned TripMultiple, const unsigned TripCount,
777	const UnrollCostEstimator UCE,
778	const TargetTransformInfo::UnrollingPreferences &UP) {
779
780	// Using unroll pragma
781	// 1st priority is unroll count set by "unroll-count" option.
782
783	if (PInfo.UserUnrollCount) {
784	if (UP.AllowRemainder &&
785	UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
786	return (unsigned)UnrollCount;
787	}
788
789	// 2nd priority is unroll count set by pragma.
790	if (PInfo.PragmaCount > 0) {
791	if ((UP.AllowRemainder \|\| (TripMultiple % PInfo.PragmaCount == 0)) &&
792	UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
793	return PInfo.PragmaCount;
794	}
795
796	if (PInfo.PragmaFullUnroll && TripCount != 0) {
797	if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
798	return TripCount;
799	}
800	// if didn't return until here, should continue to other priorties
801	return None;
802	}
803
804	static Optional<unsigned> shouldFullUnroll(
805	Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
806	ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
807	const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
808	const TargetTransformInfo::UnrollingPreferences &UP) {
809
810	if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
811	// When computing the unrolled size, note that BEInsns are not replicated
812	// like the rest of the loop body.
813	if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
814	return FullUnrollTripCount;
815
816	} else {
817	// The loop isn't that small, but we still can fully unroll it if that
818	// helps to remove a significant number of instructions.
819	// To check that, run additional analysis on the loop.
820	if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
821	L, FullUnrollTripCount, DT, SE, EphValues, TTI,
822	UP.Threshold * UP.MaxPercentThresholdBoost / 100,
823	UP.MaxIterationsCountToAnalyze)) {
824	unsigned Boost =
825	getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
826	if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
827	return FullUnrollTripCount;
828	}
829	}
830	}
831	}
832	return None;
833	}
834
835	static Optional<unsigned>
836	shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
837	const UnrollCostEstimator UCE,
838	const TargetTransformInfo::UnrollingPreferences &UP) {
839
840	unsigned count = UP.Count;
841	if (TripCount) {
842	if (!UP.Partial) {
843	LLVM_DEBUG(dbgs() << " will not try to unroll partially because "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"; } } while (false )
844	<< "-unroll-allow-partial not given\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"; } } while (false );
845	count = 0;
846	return count;
847	}
848	if (count == 0)
849	count = TripCount;
850	if (UP.PartialThreshold != NoThreshold) {
851	// Reduce unroll count to be modulo of TripCount for partial unrolling.
852	if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
853	count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
854	(LoopSize - UP.BEInsns);
855	if (count > UP.MaxCount)
856	count = UP.MaxCount;
857	while (count != 0 && TripCount % count != 0)
858	count--;
859	if (UP.AllowRemainder && count <= 1) {
860	// If there is no Count that is modulo of TripCount, set Count to
861	// largest power-of-two factor that satisfies the threshold limit.
862	// As we'll create fixup loop, do the type of unrolling only if
863	// remainder loop is allowed.
864	count = UP.DefaultUnrollRuntimeCount;
865	while (count != 0 &&
866	UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
867	count >>= 1;
868	}
869	if (count < 2) {
870	count = 0;
871	}
872	} else {
873	count = TripCount;
874	}
875	if (count > UP.MaxCount)
876	count = UP.MaxCount;
877
878	LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " partially unrolling with count: " << count << "\n"; } } while (false);
879
880	return count;
881	}
882
883	// if didn't return until here, should continue to other priorties
884	return None;
885	}
886	// Returns true if unroll count was set explicitly.
887	// Calculates unroll count and writes it to UP.Count.
888	// Unless IgnoreUser is true, will also use metadata and command-line options
889	// that are specific to to the LoopUnroll pass (which, for instance, are
890	// irrelevant for the LoopUnrollAndJam pass).
891	// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
892	// many LoopUnroll-specific options. The shared functionality should be
893	// refactored into it own function.
894	bool llvm::computeUnrollCount(
895	Loop L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo LI,
896	ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
897	OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
898	bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize,
899	TargetTransformInfo::UnrollingPreferences &UP,
900	TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
901
902	UnrollCostEstimator UCE(*L, LoopSize);
903	Optional<unsigned> UnrollFactor;
904
905	const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
906	const bool PragmaFullUnroll = hasUnrollFullPragma(L);
907	const unsigned PragmaCount = unrollCountPragmaValue(L);
908	const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
909
910	const bool ExplicitUnroll = PragmaCount > 0 \|\| PragmaFullUnroll \|\|
911	PragmaEnableUnroll \|\| UserUnrollCount;
912
913	PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
914	PragmaEnableUnroll);
915	// Use an explicit peel count that has been specified for testing. In this
916	// case it's not permitted to also specify an explicit unroll count.
917	if (PP.PeelCount) {
918	if (UnrollCount.getNumOccurrences() > 0) {
919	report_fatal_error("Cannot specify both explicit peel count and "
920	"explicit unroll count");
921	}
922	UP.Count = 1;
923	UP.Runtime = false;
924	return true;
925	}
926	// Check for explicit Count.
927	// 1st priority is unroll count set by "unroll-count" option.
928	// 2nd priority is unroll count set by pragma.
929	UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
930
931	if (UnrollFactor) {
932	UP.Count = *UnrollFactor;
933
934	if (UserUnrollCount \|\| (PragmaCount > 0)) {
935	UP.AllowExpensiveTripCount = true;
936	UP.Force = true;
937	}
938	UP.Runtime \|= (PragmaCount > 0);
939	return ExplicitUnroll;
940	} else {
941	if (ExplicitUnroll && TripCount != 0) {
942	// If the loop has an unrolling pragma, we want to be more aggressive with
943	// unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
944	// value which is larger than the default limits.
945	UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
946	UP.PartialThreshold =
947	std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
948	}
949	}
950
951	// 3rd priority is full unroll count.
952	// Full unroll makes sense only when TripCount or its upper bound could be
953	// statically calculated.
954	// Also we need to check if we exceed FullUnrollMaxCount.
955
956	// We can unroll by the upper bound amount if it's generally allowed or if
957	// we know that the loop is executed either the upper bound or zero times.
958	// (MaxOrZero unrolling keeps only the first loop test, so the number of
959	// loop tests remains the same compared to the non-unrolled version, whereas
960	// the generic upper bound unrolling keeps all but the last loop test so the
961	// number of loop tests goes up which may end up being worse on targets with
962	// constrained branch predictor resources so is controlled by an option.)
963	// In addition we only unroll small upper bounds.
964	unsigned FullUnrollMaxTripCount = MaxTripCount;
965	if (!(UP.UpperBound \|\| MaxOrZero) \|\|
966	FullUnrollMaxTripCount > UnrollMaxUpperBound)
967	FullUnrollMaxTripCount = 0;
968
969	// UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
970	// compute the former when the latter is zero.
971	unsigned ExactTripCount = TripCount;
972	assert((ExactTripCount == 0 \|\| FullUnrollMaxTripCount == 0) &&(static_cast <bool> ((ExactTripCount == 0 \|\| FullUnrollMaxTripCount == 0) && "ExtractTripCount and UnrollByMaxCount cannot both be non zero." ) ? void (0) : __assert_fail ("(ExactTripCount == 0 \|\| FullUnrollMaxTripCount == 0) && \"ExtractTripCount and UnrollByMaxCount cannot both be non zero.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 973, __extension__ __PRETTY_FUNCTION__))
973	"ExtractTripCount and UnrollByMaxCount cannot both be non zero.")(static_cast <bool> ((ExactTripCount == 0 \|\| FullUnrollMaxTripCount == 0) && "ExtractTripCount and UnrollByMaxCount cannot both be non zero." ) ? void (0) : __assert_fail ("(ExactTripCount == 0 \|\| FullUnrollMaxTripCount == 0) && \"ExtractTripCount and UnrollByMaxCount cannot both be non zero.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 973, __extension__ __PRETTY_FUNCTION__));
974
975	unsigned FullUnrollTripCount =
976	ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
977	UP.Count = FullUnrollTripCount;
978
979	UnrollFactor =
980	shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
981
982	// if shouldFullUnroll can do the unrolling, some side parameteres should be
983	// set
984	if (UnrollFactor) {
985	UP.Count = *UnrollFactor;
986	UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
987	TripCount = FullUnrollTripCount;
	Value stored to 'TripCount' is never read
988	TripMultiple = UP.UpperBound ? 1 : TripMultiple;
989	return ExplicitUnroll;
990	} else {
991	UP.Count = FullUnrollTripCount;
992	}
993
994	// 4th priority is loop peeling.
995	computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
996	if (PP.PeelCount) {
997	UP.Runtime = false;
998	UP.Count = 1;
999	return ExplicitUnroll;
1000	}
1001
1002	// Before starting partial unrolling, set up.partial to true,
1003	// if user explicitly asked for unrolling
1004	if (TripCount)
1005	UP.Partial \|= ExplicitUnroll;
1006
1007	// 5th priority is partial unrolling.
1008	// Try partial unroll only when TripCount could be statically calculated.
1009	UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
1010
1011	if (UnrollFactor) {
1012	UP.Count = *UnrollFactor;
1013
1014	if ((PragmaFullUnroll \|\| PragmaEnableUnroll) && TripCount &&
1015	UP.Count != TripCount)
1016	ORE->emit([&]() {
1017	return OptimizationRemarkMissed(DEBUG_TYPE"loop-unroll",
1018	"FullUnrollAsDirectedTooLarge",
1019	L->getStartLoc(), L->getHeader())
1020	<< "Unable to fully unroll loop as directed by unroll pragma "
1021	"because "
1022	"unrolled size is too large.";
1023	});
1024
1025	if (UP.PartialThreshold != NoThreshold) {
1026	if (UP.Count == 0) {
1027	if (PragmaEnableUnroll)
1028	ORE->emit([&]() {
1029	return OptimizationRemarkMissed(DEBUG_TYPE"loop-unroll",
1030	"UnrollAsDirectedTooLarge",
1031	L->getStartLoc(), L->getHeader())
1032	<< "Unable to unroll loop as directed by unroll(enable) "
1033	"pragma "
1034	"because unrolled size is too large.";
1035	});
1036	}
1037	}
1038	return ExplicitUnroll;
1039	}
1040	assert(TripCount == 0 &&(static_cast <bool> (TripCount == 0 && "All cases when TripCount is constant should be covered here." ) ? void (0) : __assert_fail ("TripCount == 0 && \"All cases when TripCount is constant should be covered here.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 1041, __extension__ __PRETTY_FUNCTION__))
1041	"All cases when TripCount is constant should be covered here.")(static_cast <bool> (TripCount == 0 && "All cases when TripCount is constant should be covered here." ) ? void (0) : __assert_fail ("TripCount == 0 && \"All cases when TripCount is constant should be covered here.\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 1041, __extension__ __PRETTY_FUNCTION__));
1042	if (PragmaFullUnroll)
1043	ORE->emit([&]() {
1044	return OptimizationRemarkMissed(
1045	DEBUG_TYPE"loop-unroll", "CantFullUnrollAsDirectedRuntimeTripCount",
1046	L->getStartLoc(), L->getHeader())
1047	<< "Unable to fully unroll loop as directed by unroll(full) "
1048	"pragma "
1049	"because loop has a runtime trip count.";
1050	});
1051
1052	// 6th priority is runtime unrolling.
1053	// Don't unroll a runtime trip count loop when it is disabled.
1054	if (hasRuntimeUnrollDisablePragma(L)) {
1055	UP.Count = 0;
1056	return false;
1057	}
1058
1059	// Don't unroll a small upper bound loop unless user or TTI asked to do so.
1060	if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
1061	UP.Count = 0;
1062	return false;
1063	}
1064
1065	// Check if the runtime trip count is too small when profile is available.
1066	if (L->getHeader()->getParent()->hasProfileData()) {
1067	if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1068	if (*ProfileTripCount < FlatLoopTripCountThreshold)
1069	return false;
1070	else
1071	UP.AllowExpensiveTripCount = true;
1072	}
1073	}
1074	UP.Runtime \|= PragmaEnableUnroll \|\| PragmaCount > 0 \|\| UserUnrollCount;
1075	if (!UP.Runtime) {
1076	LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll loop with runtime trip count " << "-unroll-runtime not given\n"; } } while (false)
1077	dbgs() << " will not try to unroll loop with runtime trip count "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll loop with runtime trip count " << "-unroll-runtime not given\n"; } } while (false)
1078	<< "-unroll-runtime not given\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " will not try to unroll loop with runtime trip count " << "-unroll-runtime not given\n"; } } while (false);
1079	UP.Count = 0;
1080	return false;
1081	}
1082	if (UP.Count == 0)
1083	UP.Count = UP.DefaultUnrollRuntimeCount;
1084
1085	// Reduce unroll count to be the largest power-of-two factor of
1086	// the original count which satisfies the threshold limit.
1087	while (UP.Count != 0 &&
1088	UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
1089	UP.Count >>= 1;
1090
1091	#ifndef NDEBUG
1092	unsigned OrigCount = UP.Count;
1093	#endif
1094
1095	if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1096	while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1097	UP.Count >>= 1;
1098	LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false)
1099	dbgs() << "Remainder loop is restricted (that could architecture "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false)
1100	"specific or because the loop contains a convergent "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false)
1101	"instruction), so unroll count must divide the trip "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false)
1102	"multiple, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false)
1103	<< TripMultiple << ". Reducing unroll count from " << OrigCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false)
1104	<< " to " << UP.Count << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Remainder loop is restricted (that could architecture " "specific or because the loop contains a convergent " "instruction), so unroll count must divide the trip " "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"; } } while (false);
1105
1106	using namespace ore;
1107
1108	if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
1109	ORE->emit([&]() {
1110	return OptimizationRemarkMissed(DEBUG_TYPE"loop-unroll",
1111	"DifferentUnrollCountFromDirected",
1112	L->getStartLoc(), L->getHeader())
1113	<< "Unable to unroll loop the number of times directed by "
1114	"unroll_count pragma because remainder loop is restricted "
1115	"(that could architecture specific or because the loop "
1116	"contains a convergent instruction) and so must have an "
1117	"unroll "
1118	"count that divides the loop trip multiple of "
1119	<< NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1120	<< NV("UnrollCount", UP.Count) << " time(s).";
1121	});
1122	}
1123
1124	if (UP.Count > UP.MaxCount)
1125	UP.Count = UP.MaxCount;
1126
1127	if (MaxTripCount && UP.Count > MaxTripCount)
1128	UP.Count = MaxTripCount;
1129
1130	LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Countdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " runtime unrolling with count: " << UP.Count << "\n"; } } while (false)
1131	<< "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " runtime unrolling with count: " << UP.Count << "\n"; } } while (false);
1132	if (UP.Count < 2)
1133	UP.Count = 0;
1134	return ExplicitUnroll;
1135	}
1136
1137	static LoopUnrollResult tryToUnrollLoop(
1138	Loop L, DominatorTree &DT, LoopInfo LI, ScalarEvolution &SE,
1139	const TargetTransformInfo &TTI, AssumptionCache &AC,
1140	OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
1141	ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1142	bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
1143	Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
1144	Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
1145	Optional<bool> ProvidedAllowPeeling,
1146	Optional<bool> ProvidedAllowProfileBasedPeeling,
1147	Optional<unsigned> ProvidedFullUnrollMaxCount) {
1148	LLVM_DEBUG(dbgs() << "Loop Unroll: F["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"; } } while (false)
1149	<< L->getHeader()->getParent()->getName() << "] Loop %"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"; } } while (false)
1150	<< L->getHeader()->getName() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"; } } while (false);
1151	TransformationMode TM = hasUnrollTransformation(L);
1152	if (TM & TM_Disable)
1153	return LoopUnrollResult::Unmodified;
1154	if (!L->isLoopSimplifyForm()) {
1155	LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which is not in loop-simplify form.\n" ; } } while (false)
1156	dbgs() << " Not unrolling loop which is not in loop-simplify form.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which is not in loop-simplify form.\n" ; } } while (false);
1157	return LoopUnrollResult::Unmodified;
1158	}
1159
1160	// When automatic unrolling is disabled, do not unroll unless overridden for
1161	// this loop.
1162	if (OnlyWhenForced && !(TM & TM_Enable))
1163	return LoopUnrollResult::Unmodified;
1164
1165	bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1166	unsigned NumInlineCandidates;
1167	bool NotDuplicatable;
1168	bool Convergent;
1169	TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
1170	L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1171	ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1172	ProvidedFullUnrollMaxCount);
1173	TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
1174	L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1175
1176	// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1177	// as threshold later on.
1178	if (UP.Threshold == 0 && (!UP.Partial \|\| UP.PartialThreshold == 0) &&
1179	!OptForSize)
1180	return LoopUnrollResult::Unmodified;
1181
1182	SmallPtrSet<const Value *, 32> EphValues;
1183	CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1184
1185	unsigned LoopSize =
1186	ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1187	TTI, EphValues, UP.BEInsns);
1188	LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Loop Size = " << LoopSize << "\n"; } } while (false);
1189	if (NotDuplicatable) {
1190	LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"; } } while (false)
1191	<< " instructions.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"; } } while (false);
1192	return LoopUnrollResult::Unmodified;
1193	}
1194
1195	// When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1196	// later), to (fully) unroll loops, if it does not increase code size.
1197	if (OptForSize)
1198	UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1199
1200	if (NumInlineCandidates != 0) {
1201	LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << " Not unrolling loop with inlinable calls.\n" ; } } while (false);
1202	return LoopUnrollResult::Unmodified;
1203	}
1204
1205	// Find the smallest exact trip count for any exit. This is an upper bound
1206	// on the loop trip count, but an exit at an earlier iteration is still
1207	// possible. An unroll by the smallest exact trip count guarantees that all
1208	// brnaches relating to at least one exit can be eliminated. This is unlike
1209	// the max trip count, which only guarantees that the backedge can be broken.
1210	unsigned TripCount = 0;
1211	unsigned TripMultiple = 1;
1212	SmallVector<BasicBlock *, 8> ExitingBlocks;
1213	L->getExitingBlocks(ExitingBlocks);
1214	for (BasicBlock *ExitingBlock : ExitingBlocks)
1215	if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1216	if (!TripCount \|\| TC < TripCount)
1217	TripCount = TripMultiple = TC;
1218
1219	if (!TripCount) {
1220	// If no exact trip count is known, determine the trip multiple of either
1221	// the loop latch or the single exiting block.
1222	// TODO: Relax for multiple exits.
1223	BasicBlock *ExitingBlock = L->getLoopLatch();
1224	if (!ExitingBlock \|\| !L->isLoopExiting(ExitingBlock))
1225	ExitingBlock = L->getExitingBlock();
1226	if (ExitingBlock)
1227	TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1228	}
1229
1230	// If the loop contains a convergent operation, the prelude we'd add
1231	// to do the first few instructions before we hit the unrolled loop
1232	// is unsafe -- it adds a control-flow dependency to the convergent
1233	// operation. Therefore restrict remainder loop (try unrolling without).
1234	//
1235	// TODO: This is quite conservative. In practice, convergent_op()
1236	// is likely to be called unconditionally in the loop. In this
1237	// case, the program would be ill-formed (on most architectures)
1238	// unless n were the same on all threads in a thread group.
1239	// Assuming n is the same on all threads, any kind of unrolling is
1240	// safe. But currently llvm's notion of convergence isn't powerful
1241	// enough to express this.
1242	if (Convergent)
1243	UP.AllowRemainder = false;
1244
1245	// Try to find the trip count upper bound if we cannot find the exact trip
1246	// count.
1247	unsigned MaxTripCount = 0;
1248	bool MaxOrZero = false;
1249	if (!TripCount) {
1250	MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1251	MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1252	}
1253
1254	// computeUnrollCount() decides whether it is beneficial to use upper bound to
1255	// fully unroll the loop.
1256	bool UseUpperBound = false;
1257	bool IsCountSetExplicitly = computeUnrollCount(
1258	L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
1259	TripMultiple, LoopSize, UP, PP, UseUpperBound);
1260	if (!UP.Count)
1261	return LoopUnrollResult::Unmodified;
1262
1263	if (PP.PeelCount) {
1264	assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step")(static_cast <bool> (UP.Count == 1 && "Cannot perform peel and unroll in the same step" ) ? void (0) : __assert_fail ("UP.Count == 1 && \"Cannot perform peel and unroll in the same step\"" , "/build/llvm-toolchain-snapshot-14~++20211110111138+cffbfd01e37b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp" , 1264, __extension__ __PRETTY_FUNCTION__));
1265	LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "PEELING loop %" << L ->getHeader()->getName() << " with iteration count " << PP.PeelCount << "!\n"; } } while (false)
1266	<< " with iteration count " << PP.PeelCount << "!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-unroll")) { dbgs() << "PEELING loop %" << L ->getHeader()->getName() << " with iteration count " << PP.PeelCount << "!\n"; } } while (false);
1267	ORE.emit([&]() {
1268	return OptimizationRemark(DEBUG_TYPE"loop-unroll", "Peeled", L->getStartLoc(),
1269	L->getHeader())
1270	<< " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1271	<< " iterations";
1272	});
1273
1274	if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
1275	simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
1276	// If the loop was peeled, we already "used up" the profile information
1277	// we had, so we don't want to unroll or peel again.
1278	if (PP.PeelProfiledIterations)
1279	L->setLoopAlreadyUnrolled();
1280	return LoopUnrollResult::PartiallyUnrolled;
1281	}
1282	return LoopUnrollResult::Unmodified;
1283	}
1284
1285	// At this point, UP.Runtime indicates that run-time unrolling is allowed.
1286	// However, we only want to actually perform it if we don't know the trip
1287	// count and the unroll count doesn't divide the known trip multiple.
1288	// TODO: This decision should probably be pushed up into
1289	// computeUnrollCount().
1290	UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1291
1292	// Save loop properties before it is transformed.
1293	MDNode *OrigLoopID = L->getLoopID();
1294
1295	// Unroll the loop.
1296	Loop *RemainderLoop = nullptr;
1297	LoopUnrollResult UnrollResult = UnrollLoop(
1298	L,
1299	{UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1300	UP.UnrollRemainder, ForgetAllSCEV},
1301	LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
1302	if (UnrollResult == LoopUnrollResult::Unmodified)
1303	return LoopUnrollResult::Unmodified;
1304
1305	if (RemainderLoop) {
1306	Optional<MDNode *> RemainderLoopID =
1307	makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
1308	LLVMLoopUnrollFollowupRemainder});
1309	if (RemainderLoopID.hasValue())
1310	RemainderLoop->setLoopID(RemainderLoopID.getValue());
1311	}
1312
1313	if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1314	Optional<MDNode *> NewLoopID =
1315	makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
1316	LLVMLoopUnrollFollowupUnrolled});
1317	if (NewLoopID.hasValue()) {
1318	L->setLoopID(NewLoopID.getValue());
1319
1320	// Do not setLoopAlreadyUnrolled if loop attributes have been specified
1321	// explicitly.
1322	return UnrollResult;
1323	}
1324	}
1325
1326	// If loop has an unroll count pragma or unrolled by explicitly set count
1327	// mark loop as unrolled to prevent unrolling beyond that requested.
1328	if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1329	L->setLoopAlreadyUnrolled();
1330
1331	return UnrollResult;
1332	}
1333
1334	namespace {
1335
1336	class LoopUnroll : public LoopPass {
1337	public:
1338	static char ID; // Pass ID, replacement for typeid
1339
1340	int OptLevel;
1341
1342	/// If false, use a cost model to determine whether unrolling of a loop is
1343	/// profitable. If true, only loops that explicitly request unrolling via
1344	/// metadata are considered. All other loops are skipped.
1345	bool OnlyWhenForced;
1346
1347	/// If false, when SCEV is invalidated, only forget everything in the
1348	/// top-most loop (call forgetTopMostLoop), of the loop being processed.
1349	/// Otherwise, forgetAllLoops and rebuild when needed next.
1350	bool ForgetAllSCEV;
1351
1352	Optional<unsigned> ProvidedCount;
1353	Optional<unsigned> ProvidedThreshold;
1354	Optional<bool> ProvidedAllowPartial;
1355	Optional<bool> ProvidedRuntime;
1356	Optional<bool> ProvidedUpperBound;
1357	Optional<bool> ProvidedAllowPeeling;
1358	Optional<bool> ProvidedAllowProfileBasedPeeling;
1359	Optional<unsigned> ProvidedFullUnrollMaxCount;
1360
1361	LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1362	bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1363	Optional<unsigned> Count = None,
1364	Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1365	Optional<bool> UpperBound = None,
1366	Optional<bool> AllowPeeling = None,
1367	Optional<bool> AllowProfileBasedPeeling = None,
1368	Optional<unsigned> ProvidedFullUnrollMaxCount = None)
1369	: LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1370	ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1371	ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1372	ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1373	ProvidedAllowPeeling(AllowPeeling),
1374	ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1375	ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1376	initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
1377	}
1378
1379	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1380	if (skipLoop(L))
1381	return false;
1382
1383	Function &F = *L->getHeader()->getParent();
1384
1385	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1386	LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1387	ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1388	const TargetTransformInfo &TTI =
1389	getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1390	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1391	// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1392	// pass. Function analyses need to be preserved across loop transformations
1393	// but ORE cannot be preserved (see comment before the pass definition).
1394	OptimizationRemarkEmitter ORE(&F);
1395	bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1396
1397	LoopUnrollResult Result = tryToUnrollLoop(
1398	L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1399	OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
1400	ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1401	ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
1402	ProvidedFullUnrollMaxCount);
1403
1404	if (Result == LoopUnrollResult::FullyUnrolled)
1405	LPM.markLoopAsDeleted(*L);
1406
1407	return Result != LoopUnrollResult::Unmodified;
1408	}
1409
1410	/// This transformation requires natural loop information & requires that
1411	/// loop preheaders be inserted into the CFG...
1412	void getAnalysisUsage(AnalysisUsage &AU) const override {
1413	AU.addRequired<AssumptionCacheTracker>();
1414	AU.addRequired<TargetTransformInfoWrapperPass>();
1415	// FIXME: Loop passes are required to preserve domtree, and for now we just
1416	// recreate dom info if anything gets unrolled.
1417	getLoopAnalysisUsage(AU);
1418	}
1419	};
1420
1421	} // end anonymous namespace
1422
1423	char LoopUnroll::ID = 0;
1424
1425	INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)static void *initializeLoopUnrollPassOnce(PassRegistry &Registry ) {
1426	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
1427	INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
1428	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
1429	INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)PassInfo PI = new PassInfo( "Unroll loops", "loop-unroll", & LoopUnroll::ID, PassInfo::NormalCtor_t(callDefaultCtor<LoopUnroll >), false, false); Registry.registerPass(PI, true); return PI; } static llvm::once_flag InitializeLoopUnrollPassFlag; void llvm::initializeLoopUnrollPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopUnrollPassFlag, initializeLoopUnrollPassOnce , std::ref(Registry)); }
1430
1431	Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1432	bool ForgetAllSCEV, int Threshold, int Count,
1433	int AllowPartial, int Runtime, int UpperBound,
1434	int AllowPeeling) {
1435	// TODO: It would make more sense for this function to take the optionals
1436	// directly, but that's dangerous since it would silently break out of tree
1437	// callers.
1438	return new LoopUnroll(
1439	OptLevel, OnlyWhenForced, ForgetAllSCEV,
1440	Threshold == -1 ? None : Optional<unsigned>(Threshold),
1441	Count == -1 ? None : Optional<unsigned>(Count),
1442	AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1443	Runtime == -1 ? None : Optional<bool>(Runtime),
1444	UpperBound == -1 ? None : Optional<bool>(UpperBound),
1445	AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1446	}
1447
1448	Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1449	bool ForgetAllSCEV) {
1450	return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1451	0, 0, 0, 1);
1452	}
1453
1454	PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
1455	LoopStandardAnalysisResults &AR,
1456	LPMUpdater &Updater) {
1457	// For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1458	// pass. Function analyses need to be preserved across loop transformations
1459	// but ORE cannot be preserved (see comment before the pass definition).
1460	OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
1461
1462	// Keep track of the previous loop structure so we can identify new loops
1463	// created by unrolling.
1464	Loop *ParentL = L.getParentLoop();
1465	SmallPtrSet<Loop *, 4> OldLoops;
1466	if (ParentL)
1467	OldLoops.insert(ParentL->begin(), ParentL->end());
1468	else
1469	OldLoops.insert(AR.LI.begin(), AR.LI.end());
1470
1471	std::string LoopName = std::string(L.getName());
1472
1473	bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1474	/BFI/ nullptr, /PSI/ nullptr,
1475	/PreserveLCSSA/ true, OptLevel,
1476	OnlyWhenForced, ForgetSCEV, /Count/ None,
1477	/Threshold/ None, /AllowPartial/ false,
1478	/Runtime/ false, /UpperBound/ false,
1479	/AllowPeeling/ true,
1480	/AllowProfileBasedPeeling/ false,
1481	/FullUnrollMaxCount/ None) !=
1482	LoopUnrollResult::Unmodified;
1483	if (!Changed)
1484	return PreservedAnalyses::all();
1485
1486	// The parent must not be damaged by unrolling!
1487	#ifndef NDEBUG
1488	if (ParentL)
1489	ParentL->verifyLoop();
1490	#endif
1491
1492	// Unrolling can do several things to introduce new loops into a loop nest:
1493	// - Full unrolling clones child loops within the current loop but then
1494	// removes the current loop making all of the children appear to be new
1495	// sibling loops.
1496	//
1497	// When a new loop appears as a sibling loop after fully unrolling,
1498	// its nesting structure has fundamentally changed and we want to revisit
1499	// it to reflect that.
1500	//
1501	// When unrolling has removed the current loop, we need to tell the
1502	// infrastructure that it is gone.
1503	//
1504	// Finally, we support a debugging/testing mode where we revisit child loops
1505	// as well. These are not expected to require further optimizations as either
1506	// they or the loop they were cloned from have been directly visited already.
1507	// But the debugging mode allows us to check this assumption.
1508	bool IsCurrentLoopValid = false;
1509	SmallVector<Loop *, 4> SibLoops;
1510	if (ParentL)
1511	SibLoops.append(ParentL->begin(), ParentL->end());
1512	else
1513	SibLoops.append(AR.LI.begin(), AR.LI.end());
1514	erase_if(SibLoops, [&](Loop *SibLoop) {
1515	if (SibLoop == &L) {
1516	IsCurrentLoopValid = true;
1517	return true;
1518	}
1519
1520	// Otherwise erase the loop from the list if it was in the old loops.
1521	return OldLoops.contains(SibLoop);
1522	});
1523	Updater.addSiblingLoops(SibLoops);
1524
1525	if (!IsCurrentLoopValid) {
1526	Updater.markLoopAsDeleted(L, LoopName);
1527	} else {
1528	// We can only walk child loops if the current loop remained valid.
1529	if (UnrollRevisitChildLoops) {
1530	// Walk all of the child loops.
1531	SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1532	Updater.addChildLoops(ChildLoops);
1533	}
1534	}
1535
1536	return getLoopPassPreservedAnalyses();
1537	}
1538
1539	PreservedAnalyses LoopUnrollPass::run(Function &F,
1540	FunctionAnalysisManager &AM) {
1541	auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1542	auto &LI = AM.getResult<LoopAnalysis>(F);
1543	auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1544	auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1545	auto &AC = AM.getResult<AssumptionAnalysis>(F);
1546	auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1547
1548	LoopAnalysisManager *LAM = nullptr;
1549	if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1550	LAM = &LAMProxy->getManager();
1551
1552	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1553	ProfileSummaryInfo *PSI =
1554	MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1555	auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1556	&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1557
1558	bool Changed = false;
1559
1560	// The unroller requires loops to be in simplified form, and also needs LCSSA.
1561	// Since simplification may add new inner loops, it has to run before the
1562	// legality and profitability checks. This means running the loop unroller
1563	// will simplify all loops, regardless of whether anything end up being
1564	// unrolled.
1565	for (auto &L : LI) {
1566	Changed \|=
1567	simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1568	Changed \|= formLCSSARecursively(*L, DT, &LI, &SE);
1569	}
1570
1571	// Add the loop nests in the reverse order of LoopInfo. See method
1572	// declaration.
1573	SmallPriorityWorklist<Loop *, 4> Worklist;
1574	appendLoopsToWorklist(LI, Worklist);
1575
1576	while (!Worklist.empty()) {
1577	// Because the LoopInfo stores the loops in RPO, we walk the worklist
1578	// from back to front so that we work forward across the CFG, which
1579	// for unrolling is only needed to get optimization remarks emitted in
1580	// a forward order.
1581	Loop &L = *Worklist.pop_back_val();
1582	#ifndef NDEBUG
1583	Loop *ParentL = L.getParentLoop();
1584	#endif
1585
1586	// Check if the profile summary indicates that the profiled application
1587	// has a huge working set size, in which case we disable peeling to avoid
1588	// bloating it further.
1589	Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1590	if (PSI && PSI->hasHugeWorkingSetSize())
1591	LocalAllowPeeling = false;
1592	std::string LoopName = std::string(L.getName());
1593	// The API here is quite complex to call and we allow to select some
1594	// flavors of unrolling during construction time (by setting UnrollOpts).
1595	LoopUnrollResult Result = tryToUnrollLoop(
1596	&L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1597	/PreserveLCSSA/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1598	UnrollOpts.ForgetSCEV, /Count/ None,
1599	/Threshold/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1600	UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1601	UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
1602	Changed \|= Result != LoopUnrollResult::Unmodified;
1603
1604	// The parent must not be damaged by unrolling!
1605	#ifndef NDEBUG
1606	if (Result != LoopUnrollResult::Unmodified && ParentL)
1607	ParentL->verifyLoop();
1608	#endif
1609
1610	// Clear any cached analysis results for L if we removed it completely.
1611	if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1612	LAM->clear(L, LoopName);
1613	}
1614
1615	if (!Changed)
1616	return PreservedAnalyses::all();
1617
1618	return getLoopPassPreservedAnalyses();
1619	}
1620
1621	void LoopUnrollPass::printPipeline(
1622	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1623	static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1624	OS, MapClassName2PassName);
1625	OS << "<";
1626	if (UnrollOpts.AllowPartial != None)
1627	OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
1628	if (UnrollOpts.AllowPeeling != None)
1629	OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
1630	if (UnrollOpts.AllowRuntime != None)
1631	OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
1632	if (UnrollOpts.AllowUpperBound != None)
1633	OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
1634	if (UnrollOpts.AllowProfileBasedPeeling != None)
1635	OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
1636	<< "profile-peeling;";
1637	if (UnrollOpts.FullUnrollMaxCount != None)
1638	OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
1639	OS << "O" << UnrollOpts.OptLevel;
1640	OS << ">";
1641	}