LLVM 20.0.0git
InlineOrder.cpp
Go to the documentation of this file.
1//===- InlineOrder.cpp - Inlining order abstraction -*- C++ ---*-----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
20
21using namespace llvm;
22
23#define DEBUG_TYPE "inline-order"
24
25enum class InlinePriorityMode : int { Size, Cost, CostBenefit, ML };
26
28 "inline-priority-mode", cl::init(InlinePriorityMode::Size), cl::Hidden,
29 cl::desc("Choose the priority mode to use in module inline"),
31 "Use callee size priority."),
33 "Use inline cost priority."),
35 "Use cost-benefit ratio."),
36 clEnumValN(InlinePriorityMode::ML, "ml", "Use ML.")));
37
39 "module-inliner-top-priority-threshold", cl::Hidden, cl::init(0),
40 cl::desc("The cost threshold for call sites that get inlined without the "
41 "cost-benefit analysis"));
42
43namespace {
44
45llvm::InlineCost getInlineCostWrapper(CallBase &CB,
47 const InlineParams &Params) {
48 Function &Caller = *CB.getCaller();
51 .getCachedResult<ProfileSummaryAnalysis>(
52 *CB.getParent()->getParent()->getParent());
53
55 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
57 };
58 auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
60 };
61 auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
63 };
64
66 auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
67 bool RemarksEnabled =
68 Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
70 return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
71 GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
72}
73
74class SizePriority {
75public:
76 SizePriority() = default;
77 SizePriority(const CallBase *CB, FunctionAnalysisManager &,
78 const InlineParams &) {
80 Size = Callee->getInstructionCount();
81 }
82
83 static bool isMoreDesirable(const SizePriority &P1, const SizePriority &P2) {
84 return P1.Size < P2.Size;
85 }
86
87private:
88 unsigned Size = UINT_MAX;
89};
90
91class CostPriority {
92public:
93 CostPriority() = default;
94 CostPriority(const CallBase *CB, FunctionAnalysisManager &FAM,
95 const InlineParams &Params) {
96 auto IC = getInlineCostWrapper(const_cast<CallBase &>(*CB), FAM, Params);
97 if (IC.isVariable())
98 Cost = IC.getCost();
99 else
100 Cost = IC.isNever() ? INT_MAX : INT_MIN;
101 }
102
103 static bool isMoreDesirable(const CostPriority &P1, const CostPriority &P2) {
104 return P1.Cost < P2.Cost;
105 }
106
107private:
108 int Cost = INT_MAX;
109};
110
111class CostBenefitPriority {
112public:
113 CostBenefitPriority() = default;
114 CostBenefitPriority(const CallBase *CB, FunctionAnalysisManager &FAM,
115 const InlineParams &Params) {
116 auto IC = getInlineCostWrapper(const_cast<CallBase &>(*CB), FAM, Params);
117 if (IC.isVariable())
118 Cost = IC.getCost();
119 else
120 Cost = IC.isNever() ? INT_MAX : INT_MIN;
121 StaticBonusApplied = IC.getStaticBonusApplied();
122 CostBenefit = IC.getCostBenefit();
123 }
124
125 static bool isMoreDesirable(const CostBenefitPriority &P1,
126 const CostBenefitPriority &P2) {
127 // We prioritize call sites in the dictionary order of the following
128 // priorities:
129 //
130 // 1. Those call sites that are expected to reduce the caller size when
131 // inlined. Within them, we prioritize those call sites with bigger
132 // reduction.
133 //
134 // 2. Those call sites that have gone through the cost-benefit analysis.
135 // Currently, they are limited to hot call sites. Within them, we
136 // prioritize those call sites with higher benefit-to-cost ratios.
137 //
138 // 3. Remaining call sites are prioritized according to their costs.
139
140 // We add back StaticBonusApplied to determine whether we expect the caller
141 // to shrink (even if we don't delete the callee).
142 bool P1ReducesCallerSize =
143 P1.Cost + P1.StaticBonusApplied < ModuleInlinerTopPriorityThreshold;
144 bool P2ReducesCallerSize =
145 P2.Cost + P2.StaticBonusApplied < ModuleInlinerTopPriorityThreshold;
146 if (P1ReducesCallerSize || P2ReducesCallerSize) {
147 // If one reduces the caller size while the other doesn't, then return
148 // true iff P1 reduces the caller size.
149 if (P1ReducesCallerSize != P2ReducesCallerSize)
150 return P1ReducesCallerSize;
151
152 // If they both reduce the caller size, pick the one with the smaller
153 // cost.
154 return P1.Cost < P2.Cost;
155 }
156
157 bool P1HasCB = P1.CostBenefit.has_value();
158 bool P2HasCB = P2.CostBenefit.has_value();
159 if (P1HasCB || P2HasCB) {
160 // If one has undergone the cost-benefit analysis while the other hasn't,
161 // then return true iff P1 has.
162 if (P1HasCB != P2HasCB)
163 return P1HasCB;
164
165 // If they have undergone the cost-benefit analysis, then pick the one
166 // with a higher benefit-to-cost ratio.
167 APInt LHS = P1.CostBenefit->getBenefit() * P2.CostBenefit->getCost();
168 APInt RHS = P2.CostBenefit->getBenefit() * P1.CostBenefit->getCost();
169 return LHS.ugt(RHS);
170 }
171
172 // Remaining call sites are ordered according to their costs.
173 return P1.Cost < P2.Cost;
174 }
175
176private:
177 int Cost = INT_MAX;
178 int StaticBonusApplied = 0;
179 std::optional<CostBenefitPair> CostBenefit;
180};
181
182class MLPriority {
183public:
184 MLPriority() = default;
185 MLPriority(const CallBase *CB, FunctionAnalysisManager &FAM,
186 const InlineParams &Params) {
187 auto IC = getInlineCostWrapper(const_cast<CallBase &>(*CB), FAM, Params);
188 if (IC.isVariable())
189 Cost = IC.getCost();
190 else
191 Cost = IC.isNever() ? INT_MAX : INT_MIN;
192 }
193
194 static bool isMoreDesirable(const MLPriority &P1, const MLPriority &P2) {
195 return P1.Cost < P2.Cost;
196 }
197
198private:
199 int Cost = INT_MAX;
200};
201
202template <typename PriorityT>
203class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
204 using T = std::pair<CallBase *, int>;
205
206 bool hasLowerPriority(const CallBase *L, const CallBase *R) const {
207 const auto I1 = Priorities.find(L);
208 const auto I2 = Priorities.find(R);
209 assert(I1 != Priorities.end() && I2 != Priorities.end());
210 return PriorityT::isMoreDesirable(I2->second, I1->second);
211 }
212
213 bool updateAndCheckDecreased(const CallBase *CB) {
214 auto It = Priorities.find(CB);
215 const auto OldPriority = It->second;
216 It->second = PriorityT(CB, FAM, Params);
217 const auto NewPriority = It->second;
218 return PriorityT::isMoreDesirable(OldPriority, NewPriority);
219 }
220
221 // A call site could become less desirable for inlining because of the size
222 // growth from prior inlining into the callee. This method is used to lazily
223 // update the desirability of a call site if it's decreasing. It is only
224 // called on pop(), not every time the desirability changes. When the
225 // desirability of the front call site decreases, an updated one would be
226 // pushed right back into the heap. For simplicity, those cases where the
227 // desirability of a call site increases are ignored here.
228 void pop_heap_adjust() {
229 std::pop_heap(Heap.begin(), Heap.end(), isLess);
230 while (updateAndCheckDecreased(Heap.back())) {
231 std::push_heap(Heap.begin(), Heap.end(), isLess);
232 std::pop_heap(Heap.begin(), Heap.end(), isLess);
233 }
234 }
235
236public:
237 PriorityInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params)
238 : FAM(FAM), Params(Params) {
239 isLess = [&](const CallBase *L, const CallBase *R) {
240 return hasLowerPriority(L, R);
241 };
242 }
243
244 size_t size() override { return Heap.size(); }
245
246 void push(const T &Elt) override {
247 CallBase *CB = Elt.first;
248 const int InlineHistoryID = Elt.second;
249
250 Heap.push_back(CB);
251 Priorities[CB] = PriorityT(CB, FAM, Params);
252 std::push_heap(Heap.begin(), Heap.end(), isLess);
253 InlineHistoryMap[CB] = InlineHistoryID;
254 }
255
256 T pop() override {
257 assert(size() > 0);
258 pop_heap_adjust();
259
260 CallBase *CB = Heap.pop_back_val();
261 T Result = std::make_pair(CB, InlineHistoryMap[CB]);
262 InlineHistoryMap.erase(CB);
263 return Result;
264 }
265
266 void erase_if(function_ref<bool(T)> Pred) override {
267 auto PredWrapper = [=](CallBase *CB) -> bool {
268 return Pred(std::make_pair(CB, InlineHistoryMap[CB]));
269 };
270 llvm::erase_if(Heap, PredWrapper);
271 std::make_heap(Heap.begin(), Heap.end(), isLess);
272 }
273
274private:
276 std::function<bool(const CallBase *L, const CallBase *R)> isLess;
277 DenseMap<CallBase *, int> InlineHistoryMap;
280 const InlineParams &Params;
281};
282
283} // namespace
284
286
287std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
289 const InlineParams &Params,
291 switch (UseInlinePriority) {
293 LLVM_DEBUG(dbgs() << " Current used priority: Size priority ---- \n");
294 return std::make_unique<PriorityInlineOrder<SizePriority>>(FAM, Params);
295
297 LLVM_DEBUG(dbgs() << " Current used priority: Cost priority ---- \n");
298 return std::make_unique<PriorityInlineOrder<CostPriority>>(FAM, Params);
299
302 dbgs() << " Current used priority: cost-benefit priority ---- \n");
303 return std::make_unique<PriorityInlineOrder<CostBenefitPriority>>(FAM,
304 Params);
306 LLVM_DEBUG(dbgs() << " Current used priority: ML priority ---- \n");
307 return std::make_unique<PriorityInlineOrder<MLPriority>>(FAM, Params);
308 }
309 return nullptr;
310}
311
312std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
316 LLVM_DEBUG(dbgs() << " Current used priority: plugin ---- \n");
317 return MAM.getResult<PluginInlineOrderAnalysis>(M).Factory(FAM, Params, MAM,
318 M);
319 }
320 return getDefaultInlineOrder(FAM, Params, MAM, M);
321}
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This is the interface for a simple mod/ref and alias analysis over globals.
static cl::opt< int > ModuleInlinerTopPriorityThreshold("module-inliner-top-priority-threshold", cl::Hidden, cl::init(0), cl::desc("The cost threshold for call sites that get inlined without the " "cost-benefit analysis"))
InlinePriorityMode
Definition: InlineOrder.cpp:25
static cl::opt< InlinePriorityMode > UseInlinePriority("inline-priority-mode", cl::init(InlinePriorityMode::Size), cl::Hidden, cl::desc("Choose the priority mode to use in module inline"), cl::values(clEnumValN(InlinePriorityMode::Size, "size", "Use callee size priority."), clEnumValN(InlinePriorityMode::Cost, "cost", "Use inline cost priority."), clEnumValN(InlinePriorityMode::CostBenefit, "cost-benefit", "Use cost-benefit ratio."), clEnumValN(InlinePriorityMode::ML, "ml", "Use ML.")))
#define DEBUG_TYPE
Definition: InlineOrder.cpp:23
#define F(x, y, z)
Definition: MD5.cpp:55
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
bool isPassRegistered() const
Returns true if the specified analysis pass is registered.
Definition: PassManager.h:402
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
Function * getCaller()
Helper to get the caller (the parent function).
Represents the cost of inlining a function.
Definition: InlineCost.h:89
virtual T pop()=0
virtual size_t size()=0
virtual void erase_if(function_ref< bool(T)> Pred)=0
virtual void push(const T &Elt)=0
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
Used for dynamically loading instances of InlineOrder as plugins.
Definition: InlineOrder.h:52
Analysis providing profile information.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
std::unique_ptr< InlineOrder< std::pair< CallBase *, int > > > getDefaultInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params, ModuleAnalysisManager &MAM, Module &M)
std::unique_ptr< InlineOrder< std::pair< CallBase *, int > > > getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params, ModuleAnalysisManager &MAM, Module &M)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:205