LLVM 23.0.0git
InlineCost.cpp
Go to the documentation of this file.
1//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inline cost analysis.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/SetVector.h"
18#include "llvm/ADT/Statistic.h"
33#include "llvm/Config/llvm-config.h"
35#include "llvm/IR/CallingConv.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Dominators.h"
39#include "llvm/IR/GlobalAlias.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/IR/InstVisitor.h"
43#include "llvm/IR/Operator.h"
46#include "llvm/Support/Debug.h"
49#include <climits>
50#include <limits>
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "inline-cost"
56
57STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
58
59static cl::opt<int>
60 DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
61 cl::desc("Default amount of inlining to perform"));
62
63// We introduce this option since there is a minor compile-time win by avoiding
64// addition of TTI attributes (target-features in particular) to inline
65// candidates when they are guaranteed to be the same as top level methods in
66// some use cases. If we avoid adding the attribute, we need an option to avoid
67// checking these attributes.
69 "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
70 cl::desc("Ignore TTI attributes compatibility check between callee/caller "
71 "during inline cost calculation"));
72
74 "print-instruction-comments", cl::Hidden, cl::init(false),
75 cl::desc("Prints comments for instruction based on inline cost analysis"));
76
78 "inline-threshold", cl::Hidden, cl::init(225),
79 cl::desc("Control the amount of inlining to perform (default = 225)"));
80
82 "inlinehint-threshold", cl::Hidden, cl::init(325),
83 cl::desc("Threshold for inlining functions with inline hint"));
84
85static cl::opt<int>
86 ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
87 cl::init(45),
88 cl::desc("Threshold for inlining cold callsites"));
89
91 "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
92 cl::desc("Enable the cost-benefit analysis for the inliner"));
93
94// InlineSavingsMultiplier overrides per TTI multipliers iff it is
95// specified explicitly in command line options. This option is exposed
96// for tuning and testing.
98 "inline-savings-multiplier", cl::Hidden, cl::init(8),
99 cl::desc("Multiplier to multiply cycle savings by during inlining"));
100
101// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
102// specified explicitly in command line options. This option is exposed
103// for tuning and testing.
105 "inline-savings-profitable-multiplier", cl::Hidden, cl::init(4),
106 cl::desc("A multiplier on top of cycle savings to decide whether the "
107 "savings won't justify the cost"));
108
109static cl::opt<int>
110 InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
111 cl::desc("The maximum size of a callee that get's "
112 "inlined without sufficient cycle savings"));
113
114// We introduce this threshold to help performance of instrumentation based
115// PGO before we actually hook up inliner with analysis passes such as BPI and
116// BFI.
118 "inlinecold-threshold", cl::Hidden, cl::init(45),
119 cl::desc("Threshold for inlining functions with cold attribute"));
120
121static cl::opt<int>
122 HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
123 cl::desc("Threshold for hot callsites "));
124
126 "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
127 cl::desc("Threshold for locally hot callsites "));
128
130 "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
131 cl::desc("Maximum block frequency, expressed as a percentage of caller's "
132 "entry frequency, for a callsite to be cold in the absence of "
133 "profile information."));
134
136 "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
137 cl::desc("Minimum block frequency, expressed as a multiple of caller's "
138 "entry frequency, for a callsite to be hot in the absence of "
139 "profile information."));
140
141static cl::opt<int>
142 InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
143 cl::desc("Cost of a single instruction when inlining"));
144
146 "inline-asm-instr-cost", cl::Hidden, cl::init(0),
147 cl::desc("Cost of a single inline asm instruction when inlining"));
148
149static cl::opt<int>
150 MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
151 cl::desc("Cost of load/store instruction when inlining"));
152
154 "inline-call-penalty", cl::Hidden, cl::init(25),
155 cl::desc("Call penalty that is applied per callsite when inlining"));
156
157static cl::opt<size_t>
158 StackSizeThreshold("inline-max-stacksize", cl::Hidden,
159 cl::init(std::numeric_limits<size_t>::max()),
160 cl::desc("Do not inline functions with a stack size "
161 "that exceeds the specified limit"));
162
164 "recursive-inline-max-stacksize", cl::Hidden,
166 cl::desc("Do not inline recursive functions with a stack "
167 "size that exceeds the specified limit"));
168
170 "inline-cost-full", cl::Hidden,
171 cl::desc("Compute the full inline cost of a call site even when the cost "
172 "exceeds the threshold."));
173
175 "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
176 cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
177 "attributes."));
178
180 "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
181 cl::desc("Disables evaluation of GetElementPtr with constant operands"));
182
184 "inline-all-viable-calls", cl::Hidden, cl::init(false),
185 cl::desc("Inline all viable calls, even if they exceed the inlining "
186 "threshold"));
187namespace llvm {
188std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
189 if (Attr.isValid()) {
190 int AttrValue = 0;
191 if (!Attr.getValueAsString().getAsInteger(10, AttrValue))
192 return AttrValue;
193 }
194 return std::nullopt;
195}
196
197std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
198 return getStringFnAttrAsInt(CB.getFnAttr(AttrKind));
199}
200
201std::optional<int> getStringFnAttrAsInt(Function *F, StringRef AttrKind) {
202 return getStringFnAttrAsInt(F->getFnAttribute(AttrKind));
203}
204
205namespace InlineConstants {
206int getInstrCost() { return InstrCost; }
207
208} // namespace InlineConstants
209
210} // namespace llvm
211
212namespace {
213class InlineCostCallAnalyzer;
214
215// This struct is used to store information about inline cost of a
216// particular instruction
217struct InstructionCostDetail {
218 int CostBefore = 0;
219 int CostAfter = 0;
220 int ThresholdBefore = 0;
221 int ThresholdAfter = 0;
222
223 int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
224
225 int getCostDelta() const { return CostAfter - CostBefore; }
226
227 bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
228};
229
230class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
231private:
232 InlineCostCallAnalyzer *const ICCA;
233
234public:
235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
236 void emitInstructionAnnot(const Instruction *I,
237 formatted_raw_ostream &OS) override;
238};
239
240/// Carry out call site analysis, in order to evaluate inlinability.
241/// NOTE: the type is currently used as implementation detail of functions such
242/// as llvm::getInlineCost. Note the function_ref constructor parameters - the
243/// expectation is that they come from the outer scope, from the wrapper
244/// functions. If we want to support constructing CallAnalyzer objects where
245/// lambdas are provided inline at construction, or where the object needs to
246/// otherwise survive past the scope of the provided functions, we need to
247/// revisit the argument types.
248class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
249 typedef InstVisitor<CallAnalyzer, bool> Base;
250 friend class InstVisitor<CallAnalyzer, bool>;
251
252protected:
253 virtual ~CallAnalyzer() = default;
254 /// The TargetTransformInfo available for this compilation.
255 const TargetTransformInfo &TTI;
256
257 /// Getter for the cache of @llvm.assume intrinsics.
258 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
259
260 /// Getter for BlockFrequencyInfo
261 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
262
263 /// Getter for TargetLibraryInfo
264 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
265
266 /// Profile summary information.
267 ProfileSummaryInfo *PSI;
268
269 /// The called function.
270 Function &F;
271
272 // Cache the DataLayout since we use it a lot.
273 const DataLayout &DL;
274
275 /// The OptimizationRemarkEmitter available for this compilation.
276 OptimizationRemarkEmitter *ORE;
277
278 /// The candidate callsite being analyzed. Please do not use this to do
279 /// analysis in the caller function; we want the inline cost query to be
280 /// easily cacheable. Instead, use the cover function paramHasAttr.
281 CallBase &CandidateCall;
282
283 /// Getter for the cache of ephemeral values.
284 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr;
285
286 /// Extension points for handling callsite features.
287 // Called before a basic block was analyzed.
288 virtual void onBlockStart(const BasicBlock *BB) {}
289
290 /// Called after a basic block was analyzed.
291 virtual void onBlockAnalyzed(const BasicBlock *BB) {}
292
293 /// Called before an instruction was analyzed
294 virtual void onInstructionAnalysisStart(const Instruction *I) {}
295
296 /// Called after an instruction was analyzed
297 virtual void onInstructionAnalysisFinish(const Instruction *I) {}
298
299 /// Called at the end of the analysis of the callsite. Return the outcome of
300 /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
301 /// the reason it can't.
302 virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
303 /// Called when we're about to start processing a basic block, and every time
304 /// we are done processing an instruction. Return true if there is no point in
305 /// continuing the analysis (e.g. we've determined already the call site is
306 /// too expensive to inline)
307 virtual bool shouldStop() { return false; }
308
309 /// Called before the analysis of the callee body starts (with callsite
310 /// contexts propagated). It checks callsite-specific information. Return a
311 /// reason analysis can't continue if that's the case, or 'true' if it may
312 /// continue.
313 virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
314 /// Called if the analysis engine decides SROA cannot be done for the given
315 /// alloca.
316 virtual void onDisableSROA(AllocaInst *Arg) {}
317
318 /// Called the analysis engine determines load elimination won't happen.
319 virtual void onDisableLoadElimination() {}
320
321 /// Called when we visit a CallBase, before the analysis starts. Return false
322 /// to stop further processing of the instruction.
323 virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
324
325 /// Called to account for a call.
326 virtual void onCallPenalty() {}
327
328 /// Called to account for a load or store.
329 virtual void onMemAccess(){};
330
331 /// Called to account for the expectation the inlining would result in a load
332 /// elimination.
333 virtual void onLoadEliminationOpportunity() {}
334
335 /// Called to account for the cost of argument setup for the Call in the
336 /// callee's body (not the callsite currently under analysis).
337 virtual void onCallArgumentSetup(const CallBase &Call) {}
338
339 /// Called to account for a load relative intrinsic.
340 virtual void onLoadRelativeIntrinsic() {}
341
342 /// Called to account for a lowered call.
343 virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
344 }
345
346 /// Account for a jump table of given size. Return false to stop further
347 /// processing the switch instruction
348 virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
349
350 /// Account for a case cluster of given size. Return false to stop further
351 /// processing of the instruction.
352 virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
353
354 /// Called at the end of processing a switch instruction, with the given
355 /// number of case clusters.
356 virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
357 bool DefaultDestUnreachable) {}
358
359 /// Called to account for any other instruction not specifically accounted
360 /// for.
361 virtual void onMissedSimplification() {}
362
363 /// Account for inline assembly instructions.
364 virtual void onInlineAsm(const InlineAsm &Arg) {}
365
366 /// Start accounting potential benefits due to SROA for the given alloca.
367 virtual void onInitializeSROAArg(AllocaInst *Arg) {}
368
369 /// Account SROA savings for the AllocaInst value.
370 virtual void onAggregateSROAUse(AllocaInst *V) {}
371
372 bool handleSROA(Value *V, bool DoNotDisable) {
373 // Check for SROA candidates in comparisons.
374 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
375 if (DoNotDisable) {
376 onAggregateSROAUse(SROAArg);
377 return true;
378 }
379 disableSROAForArg(SROAArg);
380 }
381 return false;
382 }
383
384 bool IsCallerRecursive = false;
385 bool IsRecursiveCall = false;
386 bool ExposesReturnsTwice = false;
387 bool HasDynamicAlloca = false;
388 bool ContainsNoDuplicateCall = false;
389 bool HasReturn = false;
390 bool HasIndirectBr = false;
391 bool HasUninlineableIntrinsic = false;
392 bool InitsVargArgs = false;
393
394 /// Number of bytes allocated statically by the callee.
395 uint64_t AllocatedSize = 0;
396 unsigned NumInstructions = 0;
397 unsigned NumInlineAsmInstructions = 0;
398 unsigned NumVectorInstructions = 0;
399
400 /// While we walk the potentially-inlined instructions, we build up and
401 /// maintain a mapping of simplified values specific to this callsite. The
402 /// idea is to propagate any special information we have about arguments to
403 /// this call through the inlinable section of the function, and account for
404 /// likely simplifications post-inlining. The most important aspect we track
405 /// is CFG altering simplifications -- when we prove a basic block dead, that
406 /// can cause dramatic shifts in the cost of inlining a function.
407 /// Note: The simplified Value may be owned by the caller function.
408 DenseMap<Value *, Value *> SimplifiedValues;
409
410 /// Keep track of the values which map back (through function arguments) to
411 /// allocas on the caller stack which could be simplified through SROA.
412 DenseMap<Value *, AllocaInst *> SROAArgValues;
413
414 /// Keep track of Allocas for which we believe we may get SROA optimization.
415 DenseSet<AllocaInst *> EnabledSROAAllocas;
416
417 /// Keep track of values which map to a pointer base and constant offset.
418 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
419
420 /// Keep track of dead blocks due to the constant arguments.
421 SmallPtrSet<BasicBlock *, 16> DeadBlocks;
422
423 /// The mapping of the blocks to their known unique successors due to the
424 /// constant arguments.
425 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
426
427 /// Model the elimination of repeated loads that is expected to happen
428 /// whenever we simplify away the stores that would otherwise cause them to be
429 /// loads.
430 bool EnableLoadElimination = true;
431
432 /// Whether we allow inlining for recursive call.
433 bool AllowRecursiveCall = false;
434
435 SmallPtrSet<Value *, 16> LoadAddrSet;
436
437 AllocaInst *getSROAArgForValueOrNull(Value *V) const {
438 auto It = SROAArgValues.find(V);
439 if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
440 return nullptr;
441 return It->second;
442 }
443
444 /// Use a value in its given form directly if possible, otherwise try looking
445 /// for it in SimplifiedValues.
446 template <typename T> T *getDirectOrSimplifiedValue(Value *V) const {
447 if (auto *Direct = dyn_cast<T>(V))
448 return Direct;
449 return getSimplifiedValue<T>(V);
450 }
451
452 // Custom simplification helper routines.
453 bool isAllocaDerivedArg(Value *V);
454 void disableSROAForArg(AllocaInst *SROAArg);
455 void disableSROA(Value *V);
456 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
457 void disableLoadElimination();
458 bool isGEPFree(GetElementPtrInst &GEP);
459 bool canFoldInboundsGEP(GetElementPtrInst &I);
460 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
461 bool simplifyCallSite(Function *F, CallBase &Call);
462 bool simplifyCmpInstForRecCall(CmpInst &Cmp);
463 bool simplifyInstruction(Instruction &I);
464 bool simplifyIntrinsicCallIsConstant(CallBase &CB);
465 bool simplifyIntrinsicCallObjectSize(CallBase &CB);
466 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
467 bool isLoweredToCall(Function *F, CallBase &Call);
468
469 /// Return true if the given argument to the function being considered for
470 /// inlining has the given attribute set either at the call site or the
471 /// function declaration. Primarily used to inspect call site specific
472 /// attributes since these can be more precise than the ones on the callee
473 /// itself.
474 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
475
476 /// Return true if the given value is known non null within the callee if
477 /// inlined through this particular callsite.
478 bool isKnownNonNullInCallee(Value *V);
479
480 /// Return true if size growth is allowed when inlining the callee at \p Call.
481 bool allowSizeGrowth(CallBase &Call);
482
483 // Custom analysis routines.
484 InlineResult analyzeBlock(BasicBlock *BB,
485 const SmallPtrSetImpl<const Value *> &EphValues);
486
487 // Disable several entry points to the visitor so we don't accidentally use
488 // them by declaring but not defining them here.
489 void visit(Module *);
490 void visit(Module &);
491 void visit(Function *);
492 void visit(Function &);
493 void visit(BasicBlock *);
494 void visit(BasicBlock &);
495
496 // Provide base case for our instruction visit.
497 bool visitInstruction(Instruction &I);
498
499 // Our visit overrides.
500 bool visitAlloca(AllocaInst &I);
501 bool visitPHI(PHINode &I);
502 bool visitGetElementPtr(GetElementPtrInst &I);
503 bool visitBitCast(BitCastInst &I);
504 bool visitPtrToInt(PtrToIntInst &I);
505 bool visitIntToPtr(IntToPtrInst &I);
506 bool visitCastInst(CastInst &I);
507 bool visitCmpInst(CmpInst &I);
508 bool visitSub(BinaryOperator &I);
509 bool visitBinaryOperator(BinaryOperator &I);
510 bool visitFNeg(UnaryOperator &I);
511 bool visitLoad(LoadInst &I);
512 bool visitStore(StoreInst &I);
513 bool visitExtractValue(ExtractValueInst &I);
514 bool visitInsertValue(InsertValueInst &I);
515 bool visitCallBase(CallBase &Call);
516 bool visitReturnInst(ReturnInst &RI);
517 bool visitBranchInst(BranchInst &BI);
518 bool visitSelectInst(SelectInst &SI);
519 bool visitSwitchInst(SwitchInst &SI);
520 bool visitIndirectBrInst(IndirectBrInst &IBI);
521 bool visitResumeInst(ResumeInst &RI);
522 bool visitCleanupReturnInst(CleanupReturnInst &RI);
523 bool visitCatchReturnInst(CatchReturnInst &RI);
524 bool visitUnreachableInst(UnreachableInst &I);
525
526public:
527 CallAnalyzer(
528 Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
529 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
530 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
531 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
532 ProfileSummaryInfo *PSI = nullptr,
533 OptimizationRemarkEmitter *ORE = nullptr,
534 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
535 nullptr)
536 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
537 GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
538 CandidateCall(Call), GetEphValuesCache(GetEphValuesCache) {}
539
540 InlineResult analyze();
541
542 /// Lookup simplified Value. May return a value owned by the caller.
543 Value *getSimplifiedValueUnchecked(Value *V) const {
544 return SimplifiedValues.lookup(V);
545 }
546
547 /// Lookup simplified Value, but return nullptr if the simplified value is
548 /// owned by the caller.
549 template <typename T> T *getSimplifiedValue(Value *V) const {
550 Value *SimpleV = SimplifiedValues.lookup(V);
551 if (!SimpleV)
552 return nullptr;
553
554 // Skip checks if we know T is a global. This has a small, but measurable
555 // impact on compile-time.
556 if constexpr (std::is_base_of_v<Constant, T>)
557 return dyn_cast<T>(SimpleV);
558
559 // Make sure the simplified Value is owned by this function
560 if (auto *I = dyn_cast<Instruction>(SimpleV)) {
561 if (I->getFunction() != &F)
562 return nullptr;
563 } else if (auto *Arg = dyn_cast<Argument>(SimpleV)) {
564 if (Arg->getParent() != &F)
565 return nullptr;
566 } else if (!isa<Constant>(SimpleV))
567 return nullptr;
568 return dyn_cast<T>(SimpleV);
569 }
570
571 // Keep a bunch of stats about the cost savings found so we can print them
572 // out when debugging.
573 unsigned NumConstantArgs = 0;
574 unsigned NumConstantOffsetPtrArgs = 0;
575 unsigned NumAllocaArgs = 0;
576 unsigned NumConstantPtrCmps = 0;
577 unsigned NumConstantPtrDiffs = 0;
578 unsigned NumInstructionsSimplified = 0;
579
580 void dump();
581};
582
583// Considering forming a binary search, we should find the number of nodes
584// which is same as the number of comparisons when lowered. For a given
585// number of clusters, n, we can define a recursive function, f(n), to find
586// the number of nodes in the tree. The recursion is :
587// f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
588// and f(n) = n, when n <= 3.
589// This will lead a binary tree where the leaf should be either f(2) or f(3)
590// when n > 3. So, the number of comparisons from leaves should be n, while
591// the number of non-leaf should be :
592// 2^(log2(n) - 1) - 1
593// = 2^log2(n) * 2^-1 - 1
594// = n / 2 - 1.
595// Considering comparisons from leaf and non-leaf nodes, we can estimate the
596// number of comparisons in a simple closed form :
597// n + n / 2 - 1 = n * 3 / 2 - 1
598int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
599 return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
600}
601
602/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
603/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
604class InlineCostCallAnalyzer final : public CallAnalyzer {
605 const bool ComputeFullInlineCost;
606 int LoadEliminationCost = 0;
607 /// Bonus to be applied when percentage of vector instructions in callee is
608 /// high (see more details in updateThreshold).
609 int VectorBonus = 0;
610 /// Bonus to be applied when the callee has only one reachable basic block.
611 int SingleBBBonus = 0;
612
613 /// Tunable parameters that control the analysis.
614 const InlineParams &Params;
615
616 // This DenseMap stores the delta change in cost and threshold after
617 // accounting for the given instruction. The map is filled only with the
618 // flag PrintInstructionComments on.
619 DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
620
621 /// Upper bound for the inlining cost. Bonuses are being applied to account
622 /// for speculative "expected profit" of the inlining decision.
623 int Threshold = 0;
624
625 /// The amount of StaticBonus applied.
626 int StaticBonusApplied = 0;
627
628 /// Attempt to evaluate indirect calls to boost its inline cost.
629 const bool BoostIndirectCalls;
630
631 /// Ignore the threshold when finalizing analysis.
632 const bool IgnoreThreshold;
633
634 // True if the cost-benefit-analysis-based inliner is enabled.
635 const bool CostBenefitAnalysisEnabled;
636
637 /// Inlining cost measured in abstract units, accounts for all the
638 /// instructions expected to be executed for a given function invocation.
639 /// Instructions that are statically proven to be dead based on call-site
640 /// arguments are not counted here.
641 int Cost = 0;
642
643 // The cumulative cost at the beginning of the basic block being analyzed. At
644 // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
645 // the size of that basic block.
646 int CostAtBBStart = 0;
647
648 // The static size of live but cold basic blocks. This is "static" in the
649 // sense that it's not weighted by profile counts at all.
650 int ColdSize = 0;
651
652 // Whether inlining is decided by cost-threshold analysis.
653 bool DecidedByCostThreshold = false;
654
655 // Whether inlining is decided by cost-benefit analysis.
656 bool DecidedByCostBenefit = false;
657
658 // The cost-benefit pair computed by cost-benefit analysis.
659 std::optional<CostBenefitPair> CostBenefit;
660
661 bool SingleBB = true;
662
663 unsigned SROACostSavings = 0;
664 unsigned SROACostSavingsLost = 0;
665
666 /// The mapping of caller Alloca values to their accumulated cost savings. If
667 /// we have to disable SROA for one of the allocas, this tells us how much
668 /// cost must be added.
669 DenseMap<AllocaInst *, int> SROAArgCosts;
670
671 /// Return true if \p Call is a cold callsite.
672 bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
673
674 /// Update Threshold based on callsite properties such as callee
675 /// attributes and callee hotness for PGO builds. The Callee is explicitly
676 /// passed to support analyzing indirect calls whose target is inferred by
677 /// analysis.
678 void updateThreshold(CallBase &Call, Function &Callee);
679 /// Return a higher threshold if \p Call is a hot callsite.
680 std::optional<int> getHotCallSiteThreshold(CallBase &Call,
681 BlockFrequencyInfo *CallerBFI);
682
683 /// Handle a capped 'int' increment for Cost.
684 void addCost(int64_t Inc) {
685 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
686 Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);
687 }
688
689 void onDisableSROA(AllocaInst *Arg) override {
690 auto CostIt = SROAArgCosts.find(Arg);
691 if (CostIt == SROAArgCosts.end())
692 return;
693 addCost(CostIt->second);
694 SROACostSavings -= CostIt->second;
695 SROACostSavingsLost += CostIt->second;
696 SROAArgCosts.erase(CostIt);
697 }
698
699 void onDisableLoadElimination() override {
700 addCost(LoadEliminationCost);
701 LoadEliminationCost = 0;
702 }
703
704 bool onCallBaseVisitStart(CallBase &Call) override {
705 if (std::optional<int> AttrCallThresholdBonus =
706 getStringFnAttrAsInt(Call, "call-threshold-bonus"))
707 Threshold += *AttrCallThresholdBonus;
708
709 if (std::optional<int> AttrCallCost =
710 getStringFnAttrAsInt(Call, "call-inline-cost")) {
711 addCost(*AttrCallCost);
712 // Prevent further processing of the call since we want to override its
713 // inline cost, not just add to it.
714 return false;
715 }
716 return true;
717 }
718
719 void onCallPenalty() override { addCost(CallPenalty); }
720
721 void onMemAccess() override { addCost(MemAccessCost); }
722
723 void onCallArgumentSetup(const CallBase &Call) override {
724 // Pay the price of the argument setup. We account for the average 1
725 // instruction per call argument setup here.
726 addCost(Call.arg_size() * InstrCost);
727 }
728 void onLoadRelativeIntrinsic() override {
729 // This is normally lowered to 4 LLVM instructions.
730 addCost(3 * InstrCost);
731 }
732 void onLoweredCall(Function *F, CallBase &Call,
733 bool IsIndirectCall) override {
734 // We account for the average 1 instruction per call argument setup here.
735 addCost(Call.arg_size() * InstrCost);
736
737 // If we have a constant that we are calling as a function, we can peer
738 // through it and see the function target. This happens not infrequently
739 // during devirtualization and so we want to give it a hefty bonus for
740 // inlining, but cap that bonus in the event that inlining wouldn't pan out.
741 // Pretend to inline the function, with a custom threshold.
742 if (IsIndirectCall && BoostIndirectCalls) {
743 auto IndirectCallParams = Params;
744 IndirectCallParams.DefaultThreshold =
746 /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
747 /// to instantiate the derived class.
748 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
749 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
750 false);
751 if (CA.analyze().isSuccess()) {
752 // We were able to inline the indirect call! Subtract the cost from the
753 // threshold to get the bonus we want to apply, but don't go below zero.
754 addCost(-std::max(0, CA.getThreshold() - CA.getCost()));
755 }
756 } else
757 // Otherwise simply add the cost for merely making the call.
758 addCost(TTI.getInlineCallPenalty(CandidateCall.getCaller(), Call,
759 CallPenalty));
760 }
761
762 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
763 bool DefaultDestUnreachable) override {
764 // If suitable for a jump table, consider the cost for the table size and
765 // branch to destination.
766 // Maximum valid cost increased in this function.
767 if (JumpTableSize) {
768 // Suppose a default branch includes one compare and one conditional
769 // branch if it's reachable.
770 if (!DefaultDestUnreachable)
771 addCost(2 * InstrCost);
772 // Suppose a jump table requires one load and one jump instruction.
773 int64_t JTCost =
774 static_cast<int64_t>(JumpTableSize) * InstrCost + 2 * InstrCost;
775 addCost(JTCost);
776 return;
777 }
778
779 if (NumCaseCluster <= 3) {
780 // Suppose a comparison includes one compare and one conditional branch.
781 // We can reduce a set of instructions if the default branch is
782 // undefined.
783 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 * InstrCost);
784 return;
785 }
786
787 int64_t ExpectedNumberOfCompare =
788 getExpectedNumberOfCompare(NumCaseCluster);
789 int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
790
791 addCost(SwitchCost);
792 }
793
794 // Parses the inline assembly argument to account for its cost. Inline
795 // assembly instructions incur higher costs for inlining since they cannot be
796 // analyzed and optimized.
797 void onInlineAsm(const InlineAsm &Arg) override {
799 return;
801 Arg.collectAsmStrs(AsmStrs);
802 int SectionLevel = 0;
803 int InlineAsmInstrCount = 0;
804 for (StringRef AsmStr : AsmStrs) {
805 // Trim whitespaces and comments.
806 StringRef Trimmed = AsmStr.trim();
807 size_t hashPos = Trimmed.find('#');
808 if (hashPos != StringRef::npos)
809 Trimmed = Trimmed.substr(0, hashPos);
810 // Ignore comments.
811 if (Trimmed.empty())
812 continue;
813 // Filter out the outlined assembly instructions from the cost by keeping
814 // track of the section level and only accounting for instrutions at
815 // section level of zero. Note there will be duplication in outlined
816 // sections too, but is not accounted in the inlining cost model.
817 if (Trimmed.starts_with(".pushsection")) {
818 ++SectionLevel;
819 continue;
820 }
821 if (Trimmed.starts_with(".popsection")) {
822 --SectionLevel;
823 continue;
824 }
825 // Ignore directives and labels.
826 if (Trimmed.starts_with(".") || Trimmed.contains(":"))
827 continue;
828 if (SectionLevel == 0)
829 ++InlineAsmInstrCount;
830 }
831 NumInlineAsmInstructions += InlineAsmInstrCount;
832 addCost(InlineAsmInstrCount * InlineAsmInstrCost);
833 }
834
835 void onMissedSimplification() override { addCost(InstrCost); }
836
837 void onInitializeSROAArg(AllocaInst *Arg) override {
838 assert(Arg != nullptr &&
839 "Should not initialize SROA costs for null value.");
840 auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg);
841 SROACostSavings += SROAArgCost;
842 SROAArgCosts[Arg] = SROAArgCost;
843 }
844
845 void onAggregateSROAUse(AllocaInst *SROAArg) override {
846 auto CostIt = SROAArgCosts.find(SROAArg);
847 assert(CostIt != SROAArgCosts.end() &&
848 "expected this argument to have a cost");
849 CostIt->second += InstrCost;
850 SROACostSavings += InstrCost;
851 }
852
853 void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
854
855 void onBlockAnalyzed(const BasicBlock *BB) override {
856 if (CostBenefitAnalysisEnabled) {
857 // Keep track of the static size of live but cold basic blocks. For now,
858 // we define a cold basic block to be one that's never executed.
859 assert(GetBFI && "GetBFI must be available");
860 BlockFrequencyInfo *BFI = &(GetBFI(F));
861 assert(BFI && "BFI must be available");
862 auto ProfileCount = BFI->getBlockProfileCount(BB);
863 if (*ProfileCount == 0)
864 ColdSize += Cost - CostAtBBStart;
865 }
866
867 auto *TI = BB->getTerminator();
868 // If we had any successors at this point, than post-inlining is likely to
869 // have them as well. Note that we assume any basic blocks which existed
870 // due to branches or switches which folded above will also fold after
871 // inlining.
872 if (SingleBB && TI->getNumSuccessors() > 1) {
873 // Take off the bonus we applied to the threshold.
874 Threshold -= SingleBBBonus;
875 SingleBB = false;
876 }
877 }
878
879 void onInstructionAnalysisStart(const Instruction *I) override {
880 // This function is called to store the initial cost of inlining before
881 // the given instruction was assessed.
883 return;
884 auto &CostDetail = InstructionCostDetailMap[I];
885 CostDetail.CostBefore = Cost;
886 CostDetail.ThresholdBefore = Threshold;
887 }
888
889 void onInstructionAnalysisFinish(const Instruction *I) override {
890 // This function is called to find new values of cost and threshold after
891 // the instruction has been assessed.
893 return;
894 auto &CostDetail = InstructionCostDetailMap[I];
895 CostDetail.CostAfter = Cost;
896 CostDetail.ThresholdAfter = Threshold;
897 }
898
899 bool isCostBenefitAnalysisEnabled() {
900 if (!PSI || !PSI->hasProfileSummary())
901 return false;
902
903 if (!GetBFI)
904 return false;
905
907 // Honor the explicit request from the user.
909 return false;
910 } else {
911 // Otherwise, require instrumentation profile.
912 if (!PSI->hasInstrumentationProfile())
913 return false;
914 }
915
916 auto *Caller = CandidateCall.getParent()->getParent();
917 if (!Caller->getEntryCount())
918 return false;
919
920 BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
921 if (!CallerBFI)
922 return false;
923
924 // For now, limit to hot call site.
925 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
926 return false;
927
928 // Make sure we have a nonzero entry count.
929 auto EntryCount = F.getEntryCount();
930 if (!EntryCount || !EntryCount->getCount())
931 return false;
932
933 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
934 if (!CalleeBFI)
935 return false;
936
937 return true;
938 }
939
940 // A helper function to choose between command line override and default.
941 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
942 if (InlineSavingsMultiplier.getNumOccurrences())
945 }
946
947 // A helper function to choose between command line override and default.
948 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
949 if (InlineSavingsProfitableMultiplier.getNumOccurrences())
952 }
953
954 void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
955 if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt(
956 CandidateCall, "inline-cycle-savings-for-test")) {
957 CycleSavings = *AttrCycleSavings;
958 }
959
960 if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt(
961 CandidateCall, "inline-runtime-cost-for-test")) {
962 Size = *AttrRuntimeCost;
963 }
964 }
965
966 // Determine whether we should inline the given call site, taking into account
967 // both the size cost and the cycle savings. Return std::nullopt if we don't
968 // have sufficient profiling information to determine.
969 std::optional<bool> costBenefitAnalysis() {
970 if (!CostBenefitAnalysisEnabled)
971 return std::nullopt;
972
973 // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
974 // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
975 // falling back to the cost-based metric.
976 // TODO: Improve this hacky condition.
977 if (Threshold == 0)
978 return std::nullopt;
979
980 assert(GetBFI);
981 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
982 assert(CalleeBFI);
983
984 // The cycle savings expressed as the sum of InstrCost
985 // multiplied by the estimated dynamic count of each instruction we can
986 // avoid. Savings come from the call site cost, such as argument setup and
987 // the call instruction, as well as the instructions that are folded.
988 //
989 // We use 128-bit APInt here to avoid potential overflow. This variable
990 // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
991 // assumes that we can avoid or fold a billion instructions, each with a
992 // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
993 // period on a 4GHz machine.
994 APInt CycleSavings(128, 0);
995
996 for (auto &BB : F) {
997 APInt CurrentSavings(128, 0);
998 for (auto &I : BB) {
999 if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
1000 // Count a conditional branch as savings if it becomes unconditional.
1001 if (BI->isConditional() &&
1002 getSimplifiedValue<ConstantInt>(BI->getCondition())) {
1003 CurrentSavings += InstrCost;
1004 }
1005 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
1006 if (getSimplifiedValue<ConstantInt>(SI->getCondition()))
1007 CurrentSavings += InstrCost;
1008 } else if (Value *V = dyn_cast<Value>(&I)) {
1009 // Count an instruction as savings if we can fold it.
1010 if (SimplifiedValues.count(V)) {
1011 CurrentSavings += InstrCost;
1012 }
1013 }
1014 }
1015
1016 auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
1017 CurrentSavings *= *ProfileCount;
1018 CycleSavings += CurrentSavings;
1019 }
1020
1021 // Compute the cycle savings per call.
1022 auto EntryProfileCount = F.getEntryCount();
1023 assert(EntryProfileCount && EntryProfileCount->getCount());
1024 auto EntryCount = EntryProfileCount->getCount();
1025 CycleSavings += EntryCount / 2;
1026 CycleSavings = CycleSavings.udiv(EntryCount);
1027
1028 // Compute the total savings for the call site.
1029 auto *CallerBB = CandidateCall.getParent();
1030 BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
1031 CycleSavings += getCallsiteCost(TTI, this->CandidateCall, DL);
1032 CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
1033
1034 // Remove the cost of the cold basic blocks to model the runtime cost more
1035 // accurately. Both machine block placement and function splitting could
1036 // place cold blocks further from hot blocks.
1037 int Size = Cost - ColdSize;
1038
1039 // Allow tiny callees to be inlined regardless of whether they meet the
1040 // savings threshold.
1042
1043 OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
1044 CostBenefit.emplace(APInt(128, Size), CycleSavings);
1045
1046 // Let R be the ratio of CycleSavings to Size. We accept the inlining
1047 // opportunity if R is really high and reject if R is really low. If R is
1048 // somewhere in the middle, we fall back to the cost-based analysis.
1049 //
1050 // Specifically, let R = CycleSavings / Size, we accept the inlining
1051 // opportunity if:
1052 //
1053 // PSI->getOrCompHotCountThreshold()
1054 // R > -------------------------------------------------
1055 // getInliningCostBenefitAnalysisSavingsMultiplier()
1056 //
1057 // and reject the inlining opportunity if:
1058 //
1059 // PSI->getOrCompHotCountThreshold()
1060 // R <= ----------------------------------------------------
1061 // getInliningCostBenefitAnalysisProfitableMultiplier()
1062 //
1063 // Otherwise, we fall back to the cost-based analysis.
1064 //
1065 // Implementation-wise, use multiplication (CycleSavings * Multiplier,
1066 // HotCountThreshold * Size) rather than division to avoid precision loss.
1067 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
1068 Threshold *= Size;
1069
1070 APInt UpperBoundCycleSavings = CycleSavings;
1071 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1072 if (UpperBoundCycleSavings.uge(Threshold))
1073 return true;
1074
1075 APInt LowerBoundCycleSavings = CycleSavings;
1076 LowerBoundCycleSavings *=
1077 getInliningCostBenefitAnalysisProfitableMultiplier();
1078 if (LowerBoundCycleSavings.ult(Threshold))
1079 return false;
1080
1081 // Otherwise, fall back to the cost-based analysis.
1082 return std::nullopt;
1083 }
1084
1085 InlineResult finalizeAnalysis() override {
1086 // Loops generally act a lot like calls in that they act like barriers to
1087 // movement, require a certain amount of setup, etc. So when optimising for
1088 // size, we penalise any call sites that perform loops. We do this after all
1089 // other costs here, so will likely only be dealing with relatively small
1090 // functions (and hence DT and LI will hopefully be cheap).
1091 auto *Caller = CandidateCall.getFunction();
1092 if (Caller->hasMinSize()) {
1093 DominatorTree DT(F);
1094 LoopInfo LI(DT);
1095 int NumLoops = 0;
1096 for (Loop *L : LI) {
1097 // Ignore loops that will not be executed
1098 if (DeadBlocks.count(L->getHeader()))
1099 continue;
1100 NumLoops++;
1101 }
1102 addCost(NumLoops * InlineConstants::LoopPenalty);
1103 }
1104
1105 // We applied the maximum possible vector bonus at the beginning. Now,
1106 // subtract the excess bonus, if any, from the Threshold before
1107 // comparing against Cost.
1108 if (NumVectorInstructions <= NumInstructions / 10)
1109 Threshold -= VectorBonus;
1110 else if (NumVectorInstructions <= NumInstructions / 2)
1111 Threshold -= VectorBonus / 2;
1112
1113 if (std::optional<int> AttrCost =
1114 getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
1115 Cost = *AttrCost;
1116
1117 if (std::optional<int> AttrCostMult = getStringFnAttrAsInt(
1118 CandidateCall,
1120 Cost *= *AttrCostMult;
1121
1122 if (std::optional<int> AttrThreshold =
1123 getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
1124 Threshold = *AttrThreshold;
1125
1126 if (auto Result = costBenefitAnalysis()) {
1127 DecidedByCostBenefit = true;
1128 if (*Result)
1129 return InlineResult::success();
1130 else
1131 return InlineResult::failure("Cost over threshold.");
1132 }
1133
1134 if (IgnoreThreshold)
1135 return InlineResult::success();
1136
1137 DecidedByCostThreshold = true;
1138 return Cost < std::max(1, Threshold)
1140 : InlineResult::failure("Cost over threshold.");
1141 }
1142
1143 bool shouldStop() override {
1144 if (IgnoreThreshold || ComputeFullInlineCost)
1145 return false;
1146 // Bail out the moment we cross the threshold. This means we'll under-count
1147 // the cost, but only when undercounting doesn't matter.
1148 if (Cost < Threshold)
1149 return false;
1150 DecidedByCostThreshold = true;
1151 return true;
1152 }
1153
1154 void onLoadEliminationOpportunity() override {
1155 LoadEliminationCost += InstrCost;
1156 }
1157
1158 InlineResult onAnalysisStart() override {
1159 // Perform some tweaks to the cost and threshold based on the direct
1160 // callsite information.
1161
1162 // We want to more aggressively inline vector-dense kernels, so up the
1163 // threshold, and we'll lower it if the % of vector instructions gets too
1164 // low. Note that these bonuses are some what arbitrary and evolved over
1165 // time by accident as much as because they are principled bonuses.
1166 //
1167 // FIXME: It would be nice to remove all such bonuses. At least it would be
1168 // nice to base the bonus values on something more scientific.
1169 assert(NumInstructions == 0);
1170 assert(NumVectorInstructions == 0);
1171
1172 // Update the threshold based on callsite properties
1173 updateThreshold(CandidateCall, F);
1174
1175 // While Threshold depends on commandline options that can take negative
1176 // values, we want to enforce the invariant that the computed threshold and
1177 // bonuses are non-negative.
1178 assert(Threshold >= 0);
1179 assert(SingleBBBonus >= 0);
1180 assert(VectorBonus >= 0);
1181
1182 // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1183 // this Threshold any time, and cost cannot decrease, we can stop processing
1184 // the rest of the function body.
1185 Threshold += (SingleBBBonus + VectorBonus);
1186
1187 // Give out bonuses for the callsite, as the instructions setting them up
1188 // will be gone after inlining.
1189 addCost(-getCallsiteCost(TTI, this->CandidateCall, DL));
1190
1191 // If this function uses the coldcc calling convention, prefer not to inline
1192 // it.
1193 if (F.getCallingConv() == CallingConv::Cold)
1195
1196 LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
1197
1198 // Check if we're done. This can happen due to bonuses and penalties.
1199 if (Cost >= Threshold && !ComputeFullInlineCost)
1200 return InlineResult::failure("high cost");
1201
1202 return InlineResult::success();
1203 }
1204
1205public:
1206 InlineCostCallAnalyzer(
1207 Function &Callee, CallBase &Call, const InlineParams &Params,
1208 const TargetTransformInfo &TTI,
1209 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1210 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1211 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
1212 ProfileSummaryInfo *PSI = nullptr,
1213 OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1214 bool IgnoreThreshold = false,
1215 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1216 nullptr)
1217 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1218 ORE, GetEphValuesCache),
1219 ComputeFullInlineCost(OptComputeFullInlineCost ||
1220 Params.ComputeFullInlineCost || ORE ||
1221 isCostBenefitAnalysisEnabled()),
1222 Params(Params), Threshold(Params.DefaultThreshold),
1223 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1224 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1225 Writer(this) {
1226 AllowRecursiveCall = *Params.AllowRecursiveCall;
1227 }
1228
1229 /// Annotation Writer for instruction details
1230 InlineCostAnnotationWriter Writer;
1231
1232 void dump();
1233
1234 // Prints the same analysis as dump(), but its definition is not dependent
1235 // on the build.
1236 void print(raw_ostream &OS);
1237
1238 std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1239 auto It = InstructionCostDetailMap.find(I);
1240 if (It != InstructionCostDetailMap.end())
1241 return It->second;
1242 return std::nullopt;
1243 }
1244
1245 ~InlineCostCallAnalyzer() override = default;
1246 int getThreshold() const { return Threshold; }
1247 int getCost() const { return Cost; }
1248 int getStaticBonusApplied() const { return StaticBonusApplied; }
1249 std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1250 bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1251 bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1252};
1253
1254// Return true if CB is the sole call to local function Callee.
1255static bool isSoleCallToLocalFunction(const CallBase &CB,
1256 const Function &Callee) {
1257 return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
1258 &Callee == CB.getCalledFunction();
1259}
1260
1261class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1262private:
1263 InlineCostFeatures Cost = {};
1264
1265 // FIXME: These constants are taken from the heuristic-based cost visitor.
1266 // These should be removed entirely in a later revision to avoid reliance on
1267 // heuristics in the ML inliner.
1268 static constexpr int JTCostMultiplier = 2;
1269 static constexpr int CaseClusterCostMultiplier = 2;
1270 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1271 static constexpr int SwitchCostMultiplier = 2;
1272
1273 // FIXME: These are taken from the heuristic-based cost visitor: we should
1274 // eventually abstract these to the CallAnalyzer to avoid duplication.
1275 unsigned SROACostSavingOpportunities = 0;
1276 int VectorBonus = 0;
1277 int SingleBBBonus = 0;
1278 int Threshold = 5;
1279
1280 DenseMap<AllocaInst *, unsigned> SROACosts;
1281
1282 void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1283 Cost[static_cast<size_t>(Feature)] += Delta;
1284 }
1285
1286 void set(InlineCostFeatureIndex Feature, int64_t Value) {
1287 Cost[static_cast<size_t>(Feature)] = Value;
1288 }
1289
1290 void onDisableSROA(AllocaInst *Arg) override {
1291 auto CostIt = SROACosts.find(Arg);
1292 if (CostIt == SROACosts.end())
1293 return;
1294
1295 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1296 SROACostSavingOpportunities -= CostIt->second;
1297 SROACosts.erase(CostIt);
1298 }
1299
1300 void onDisableLoadElimination() override {
1301 set(InlineCostFeatureIndex::load_elimination, 1);
1302 }
1303
1304 void onCallPenalty() override {
1305 increment(InlineCostFeatureIndex::call_penalty, CallPenalty);
1306 }
1307
1308 void onCallArgumentSetup(const CallBase &Call) override {
1309 increment(InlineCostFeatureIndex::call_argument_setup,
1310 Call.arg_size() * InstrCost);
1311 }
1312
1313 void onLoadRelativeIntrinsic() override {
1314 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 * InstrCost);
1315 }
1316
1317 void onLoweredCall(Function *F, CallBase &Call,
1318 bool IsIndirectCall) override {
1319 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1320 Call.arg_size() * InstrCost);
1321
1322 if (IsIndirectCall) {
1323 InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1324 /*HintThreshold*/ {},
1325 /*ColdThreshold*/ {},
1326 /*OptSizeThreshold*/ {},
1327 /*OptMinSizeThreshold*/ {},
1328 /*HotCallSiteThreshold*/ {},
1329 /*LocallyHotCallSiteThreshold*/ {},
1330 /*ColdCallSiteThreshold*/ {},
1331 /*ComputeFullInlineCost*/ true,
1332 /*EnableDeferral*/ true};
1333 IndirectCallParams.DefaultThreshold =
1335
1336 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1337 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1338 false, true);
1339 if (CA.analyze().isSuccess()) {
1340 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1341 CA.getCost());
1342 increment(InlineCostFeatureIndex::nested_inlines, 1);
1343 }
1344 } else {
1345 onCallPenalty();
1346 }
1347 }
1348
1349 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
1350 bool DefaultDestUnreachable) override {
1351 if (JumpTableSize) {
1352 if (!DefaultDestUnreachable)
1353 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1354 SwitchDefaultDestCostMultiplier * InstrCost);
1355 int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
1356 JTCostMultiplier * InstrCost;
1357 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1358 return;
1359 }
1360
1361 if (NumCaseCluster <= 3) {
1362 increment(InlineCostFeatureIndex::case_cluster_penalty,
1363 (NumCaseCluster - DefaultDestUnreachable) *
1364 CaseClusterCostMultiplier * InstrCost);
1365 return;
1366 }
1367
1368 int64_t ExpectedNumberOfCompare =
1369 getExpectedNumberOfCompare(NumCaseCluster);
1370
1371 int64_t SwitchCost =
1372 ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
1373 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1374 }
1375
1376 void onMissedSimplification() override {
1377 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1378 InstrCost);
1379 }
1380
1381 void onInitializeSROAArg(AllocaInst *Arg) override {
1382 auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg);
1383 SROACosts[Arg] = SROAArgCost;
1384 SROACostSavingOpportunities += SROAArgCost;
1385 }
1386
1387 void onAggregateSROAUse(AllocaInst *Arg) override {
1388 SROACosts.find(Arg)->second += InstrCost;
1389 SROACostSavingOpportunities += InstrCost;
1390 }
1391
1392 void onBlockAnalyzed(const BasicBlock *BB) override {
1393 if (BB->getTerminator()->getNumSuccessors() > 1)
1394 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1395 Threshold -= SingleBBBonus;
1396 }
1397
1398 InlineResult finalizeAnalysis() override {
1399 auto *Caller = CandidateCall.getFunction();
1400 if (Caller->hasMinSize()) {
1401 DominatorTree DT(F);
1402 LoopInfo LI(DT);
1403 for (Loop *L : LI) {
1404 // Ignore loops that will not be executed
1405 if (DeadBlocks.count(L->getHeader()))
1406 continue;
1407 increment(InlineCostFeatureIndex::num_loops,
1409 }
1410 }
1411 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.size());
1412 set(InlineCostFeatureIndex::simplified_instructions,
1413 NumInstructionsSimplified);
1414 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1415 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1416 NumConstantOffsetPtrArgs);
1417 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1418
1419 if (NumVectorInstructions <= NumInstructions / 10)
1420 Threshold -= VectorBonus;
1421 else if (NumVectorInstructions <= NumInstructions / 2)
1422 Threshold -= VectorBonus / 2;
1423
1424 set(InlineCostFeatureIndex::threshold, Threshold);
1425
1426 return InlineResult::success();
1427 }
1428
1429 bool shouldStop() override { return false; }
1430
1431 void onLoadEliminationOpportunity() override {
1432 increment(InlineCostFeatureIndex::load_elimination, 1);
1433 }
1434
1435 InlineResult onAnalysisStart() override {
1436 increment(InlineCostFeatureIndex::callsite_cost,
1437 -1 * getCallsiteCost(TTI, this->CandidateCall, DL));
1438
1439 set(InlineCostFeatureIndex::cold_cc_penalty,
1440 (F.getCallingConv() == CallingConv::Cold));
1441
1442 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1443 isSoleCallToLocalFunction(CandidateCall, F));
1444
1445 // FIXME: we shouldn't repeat this logic in both the Features and Cost
1446 // analyzer - instead, we should abstract it to a common method in the
1447 // CallAnalyzer
1448 int SingleBBBonusPercent = 50;
1449 int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1450 Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1451 Threshold *= TTI.getInliningThresholdMultiplier();
1452 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1453 VectorBonus = Threshold * VectorBonusPercent / 100;
1454 Threshold += (SingleBBBonus + VectorBonus);
1455
1456 return InlineResult::success();
1457 }
1458
1459public:
1460 InlineCostFeaturesAnalyzer(
1461 const TargetTransformInfo &TTI,
1462 function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1463 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1464 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
1465 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
1466 CallBase &Call)
1467 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
1468 PSI) {}
1469
1470 const InlineCostFeatures &features() const { return Cost; }
1471};
1472
1473} // namespace
1474
1475/// Test whether the given value is an Alloca-derived function argument.
1476bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1477 return SROAArgValues.count(V);
1478}
1479
1480void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1481 onDisableSROA(SROAArg);
1482 EnabledSROAAllocas.erase(SROAArg);
1483 disableLoadElimination();
1484}
1485
1486void InlineCostAnnotationWriter::emitInstructionAnnot(
1487 const Instruction *I, formatted_raw_ostream &OS) {
1488 // The cost of inlining of the given instruction is printed always.
1489 // The threshold delta is printed only when it is non-zero. It happens
1490 // when we decided to give a bonus at a particular instruction.
1491 std::optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1492 if (!Record)
1493 OS << "; No analysis for the instruction";
1494 else {
1495 OS << "; cost before = " << Record->CostBefore
1496 << ", cost after = " << Record->CostAfter
1497 << ", threshold before = " << Record->ThresholdBefore
1498 << ", threshold after = " << Record->ThresholdAfter << ", ";
1499 OS << "cost delta = " << Record->getCostDelta();
1500 if (Record->hasThresholdChanged())
1501 OS << ", threshold delta = " << Record->getThresholdDelta();
1502 }
1503 auto *V = ICCA->getSimplifiedValueUnchecked(const_cast<Instruction *>(I));
1504 if (V) {
1505 OS << ", simplified to ";
1506 V->print(OS, true);
1507 if (auto *VI = dyn_cast<Instruction>(V)) {
1508 if (VI->getFunction() != I->getFunction())
1509 OS << " (caller instruction)";
1510 } else if (auto *VArg = dyn_cast<Argument>(V)) {
1511 if (VArg->getParent() != I->getFunction())
1512 OS << " (caller argument)";
1513 }
1514 }
1515 OS << "\n";
1516}
1517
1518/// If 'V' maps to a SROA candidate, disable SROA for it.
1519void CallAnalyzer::disableSROA(Value *V) {
1520 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1521 disableSROAForArg(SROAArg);
1522 }
1523}
1524
1525void CallAnalyzer::disableLoadElimination() {
1526 if (EnableLoadElimination) {
1527 onDisableLoadElimination();
1528 EnableLoadElimination = false;
1529 }
1530}
1531
1532/// Accumulate a constant GEP offset into an APInt if possible.
1533///
1534/// Returns false if unable to compute the offset for any reason. Respects any
1535/// simplified values known during the analysis of this callsite.
1536bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1537 unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1538 assert(IntPtrWidth == Offset.getBitWidth());
1539
1541 GTI != GTE; ++GTI) {
1542 ConstantInt *OpC =
1543 getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand());
1544 if (!OpC)
1545 return false;
1546 if (OpC->isZero())
1547 continue;
1548
1549 // Handle a struct index, which adds its field offset to the pointer.
1550 if (StructType *STy = GTI.getStructTypeOrNull()) {
1551 unsigned ElementIdx = OpC->getZExtValue();
1552 const StructLayout *SL = DL.getStructLayout(STy);
1553 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1554 continue;
1555 }
1556
1557 APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
1558 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1559 }
1560 return true;
1561}
1562
1563/// Use TTI to check whether a GEP is free.
1564///
1565/// Respects any simplified values known during the analysis of this callsite.
1566bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1567 SmallVector<Value *, 4> Operands;
1568 Operands.push_back(GEP.getOperand(0));
1569 for (const Use &Op : GEP.indices())
1570 if (Constant *SimpleOp = getSimplifiedValue<Constant>(Op))
1571 Operands.push_back(SimpleOp);
1572 else
1573 Operands.push_back(Op);
1574 return TTI.getInstructionCost(&GEP, Operands,
1577}
1578
1579bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1580 disableSROA(I.getOperand(0));
1581
1582 // Check whether inlining will turn a dynamic alloca into a static
1583 // alloca and handle that case.
1584 if (I.isArrayAllocation()) {
1585 Constant *Size = getSimplifiedValue<Constant>(I.getArraySize());
1586 if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1587 // Sometimes a dynamic alloca could be converted into a static alloca
1588 // after this constant prop, and become a huge static alloca on an
1589 // unconditional CFG path. Avoid inlining if this is going to happen above
1590 // a threshold.
1591 // FIXME: If the threshold is removed or lowered too much, we could end up
1592 // being too pessimistic and prevent inlining non-problematic code. This
1593 // could result in unintended perf regressions. A better overall strategy
1594 // is needed to track stack usage during inlining.
1595 Type *Ty = I.getAllocatedType();
1596 AllocatedSize = SaturatingMultiplyAdd(
1597 AllocSize->getLimitedValue(),
1598 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1600 HasDynamicAlloca = true;
1601 return false;
1602 }
1603 }
1604
1605 if (I.isStaticAlloca()) {
1606 // Accumulate the allocated size if constant and executed once.
1607 // Note: if AllocSize is a vscale value, this is an underestimate of the
1608 // allocated size, and it also requires some of the cost of a dynamic
1609 // alloca, but is recorded here as a constant size alloca.
1610 TypeSize AllocSize = I.getAllocationSize(DL).value_or(TypeSize::getZero());
1611 AllocatedSize = SaturatingAdd(AllocSize.getKnownMinValue(), AllocatedSize);
1612 } else {
1613 // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1614 // a variety of reasons, and so we would like to not inline them into
1615 // functions which don't currently have a dynamic alloca. This simply
1616 // disables inlining altogether in the presence of a dynamic alloca.
1617 HasDynamicAlloca = true;
1618 }
1619
1620 return false;
1621}
1622
1623bool CallAnalyzer::visitPHI(PHINode &I) {
1624 // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1625 // though we don't want to propagate it's bonuses. The idea is to disable
1626 // SROA if it *might* be used in an inappropriate manner.
1627
1628 // Phi nodes are always zero-cost.
1629 // FIXME: Pointer sizes may differ between different address spaces, so do we
1630 // need to use correct address space in the call to getPointerSizeInBits here?
1631 // Or could we skip the getPointerSizeInBits call completely? As far as I can
1632 // see the ZeroOffset is used as a dummy value, so we can probably use any
1633 // bit width for the ZeroOffset?
1634 APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1635 bool CheckSROA = I.getType()->isPointerTy();
1636
1637 // Track the constant or pointer with constant offset we've seen so far.
1638 Constant *FirstC = nullptr;
1639 std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1640 Value *FirstV = nullptr;
1641
1642 for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1643 BasicBlock *Pred = I.getIncomingBlock(i);
1644 // If the incoming block is dead, skip the incoming block.
1645 if (DeadBlocks.count(Pred))
1646 continue;
1647 // If the parent block of phi is not the known successor of the incoming
1648 // block, skip the incoming block.
1649 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1650 if (KnownSuccessor && KnownSuccessor != I.getParent())
1651 continue;
1652
1653 Value *V = I.getIncomingValue(i);
1654 // If the incoming value is this phi itself, skip the incoming value.
1655 if (&I == V)
1656 continue;
1657
1658 Constant *C = getDirectOrSimplifiedValue<Constant>(V);
1659
1660 std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1661 if (!C && CheckSROA)
1662 BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1663
1664 if (!C && !BaseAndOffset.first)
1665 // The incoming value is neither a constant nor a pointer with constant
1666 // offset, exit early.
1667 return true;
1668
1669 if (FirstC) {
1670 if (FirstC == C)
1671 // If we've seen a constant incoming value before and it is the same
1672 // constant we see this time, continue checking the next incoming value.
1673 continue;
1674 // Otherwise early exit because we either see a different constant or saw
1675 // a constant before but we have a pointer with constant offset this time.
1676 return true;
1677 }
1678
1679 if (FirstV) {
1680 // The same logic as above, but check pointer with constant offset here.
1681 if (FirstBaseAndOffset == BaseAndOffset)
1682 continue;
1683 return true;
1684 }
1685
1686 if (C) {
1687 // This is the 1st time we've seen a constant, record it.
1688 FirstC = C;
1689 continue;
1690 }
1691
1692 // The remaining case is that this is the 1st time we've seen a pointer with
1693 // constant offset, record it.
1694 FirstV = V;
1695 FirstBaseAndOffset = BaseAndOffset;
1696 }
1697
1698 // Check if we can map phi to a constant.
1699 if (FirstC) {
1700 SimplifiedValues[&I] = FirstC;
1701 return true;
1702 }
1703
1704 // Check if we can map phi to a pointer with constant offset.
1705 if (FirstBaseAndOffset.first) {
1706 ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1707
1708 if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1709 SROAArgValues[&I] = SROAArg;
1710 }
1711
1712 return true;
1713}
1714
1715/// Check we can fold GEPs of constant-offset call site argument pointers.
1716/// This requires target data and inbounds GEPs.
1717///
1718/// \return true if the specified GEP can be folded.
1719bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1720 // Check if we have a base + offset for the pointer.
1721 std::pair<Value *, APInt> BaseAndOffset =
1722 ConstantOffsetPtrs.lookup(I.getPointerOperand());
1723 if (!BaseAndOffset.first)
1724 return false;
1725
1726 // Check if the offset of this GEP is constant, and if so accumulate it
1727 // into Offset.
1728 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1729 return false;
1730
1731 // Add the result as a new mapping to Base + Offset.
1732 ConstantOffsetPtrs[&I] = std::move(BaseAndOffset);
1733
1734 return true;
1735}
1736
1737bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1738 auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1739
1740 // Lambda to check whether a GEP's indices are all constant.
1741 auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1742 for (const Use &Op : GEP.indices())
1743 if (!getDirectOrSimplifiedValue<Constant>(Op))
1744 return false;
1745 return true;
1746 };
1747
1750 return true;
1751
1752 if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1753 if (SROAArg)
1754 SROAArgValues[&I] = SROAArg;
1755
1756 // Constant GEPs are modeled as free.
1757 return true;
1758 }
1759
1760 // Variable GEPs will require math and will disable SROA.
1761 if (SROAArg)
1762 disableSROAForArg(SROAArg);
1763 return isGEPFree(I);
1764}
1765
1766// Simplify \p Cmp if RHS is const and we can ValueTrack LHS.
1767// This handles the case only when the Cmp instruction is guarding a recursive
1768// call that will cause the Cmp to fail/succeed for the recursive call.
1769bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1770 // Bail out if LHS is not a function argument or RHS is NOT const:
1771 if (!isa<Argument>(Cmp.getOperand(0)) || !isa<Constant>(Cmp.getOperand(1)))
1772 return false;
1773 auto *CmpOp = Cmp.getOperand(0);
1774 // Make sure that the callsite is recursive:
1775 if (CandidateCall.getCaller() != &F)
1776 return false;
1777 // Only handle the case when the callsite has a single predecessor:
1778 auto *CallBB = CandidateCall.getParent();
1779 auto *Predecessor = CallBB->getSinglePredecessor();
1780 if (!Predecessor)
1781 return false;
1782 // Check if the callsite is guarded by the same Cmp instruction:
1783 auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
1784 if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
1785 return false;
1786
1787 // Check if there is any arg of the recursive callsite is affecting the cmp
1788 // instr:
1789 bool ArgFound = false;
1790 Value *FuncArg = nullptr, *CallArg = nullptr;
1791 for (unsigned ArgNum = 0;
1792 ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
1793 FuncArg = F.getArg(ArgNum);
1794 CallArg = CandidateCall.getArgOperand(ArgNum);
1795 if (FuncArg == CmpOp && CallArg != CmpOp) {
1796 ArgFound = true;
1797 break;
1798 }
1799 }
1800 if (!ArgFound)
1801 return false;
1802
1803 // Now we have a recursive call that is guarded by a cmp instruction.
1804 // Check if this cmp can be simplified:
1805 SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
1806 CondContext CC(&Cmp);
1807 CC.Invert = (CallBB != Br->getSuccessor(0));
1808 SQ.CC = &CC;
1809 CC.AffectedValues.insert(FuncArg);
1810 Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1811 cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
1812 if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
1813 // Make sure that the BB of the recursive call is NOT the true successor
1814 // of the icmp. In other words, make sure that the recursion depth is 1.
1815 if ((ConstVal->isOne() && CC.Invert) ||
1816 (ConstVal->isZero() && !CC.Invert)) {
1817 SimplifiedValues[&Cmp] = ConstVal;
1818 return true;
1819 }
1820 }
1821 return false;
1822}
1823
1824/// Simplify \p I if its operands are constants and update SimplifiedValues.
1825bool CallAnalyzer::simplifyInstruction(Instruction &I) {
1827 for (Value *Op : I.operands()) {
1828 Constant *COp = getDirectOrSimplifiedValue<Constant>(Op);
1829 if (!COp)
1830 return false;
1831 COps.push_back(COp);
1832 }
1833 auto *C = ConstantFoldInstOperands(&I, COps, DL);
1834 if (!C)
1835 return false;
1836 SimplifiedValues[&I] = C;
1837 return true;
1838}
1839
1840/// Try to simplify a call to llvm.is.constant.
1841///
1842/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1843/// we expect calls of this specific intrinsic to be infrequent.
1844///
1845/// FIXME: Given that we know CB's parent (F) caller
1846/// (CandidateCall->getParent()->getParent()), we might be able to determine
1847/// whether inlining F into F's caller would change how the call to
1848/// llvm.is.constant would evaluate.
1849bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1850 Value *Arg = CB.getArgOperand(0);
1851 auto *C = getDirectOrSimplifiedValue<Constant>(Arg);
1852
1853 Type *RT = CB.getFunctionType()->getReturnType();
1854 SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1855 return true;
1856}
1857
1858bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1859 // As per the langref, "The fourth argument to llvm.objectsize determines if
1860 // the value should be evaluated at runtime."
1861 if (cast<ConstantInt>(CB.getArgOperand(3))->isOne())
1862 return false;
1863
1865 /*MustSucceed=*/true);
1867 if (C)
1868 SimplifiedValues[&CB] = C;
1869 return C;
1870}
1871
1872bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1873 // Propagate constants through bitcasts.
1875 return true;
1876
1877 // Track base/offsets through casts
1878 std::pair<Value *, APInt> BaseAndOffset =
1879 ConstantOffsetPtrs.lookup(I.getOperand(0));
1880 // Casts don't change the offset, just wrap it up.
1881 if (BaseAndOffset.first)
1882 ConstantOffsetPtrs[&I] = BaseAndOffset;
1883
1884 // Also look for SROA candidates here.
1885 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1886 SROAArgValues[&I] = SROAArg;
1887
1888 // Bitcasts are always zero cost.
1889 return true;
1890}
1891
1892bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1893 // Propagate constants through ptrtoint.
1895 return true;
1896
1897 // Track base/offset pairs when converted to a plain integer provided the
1898 // integer is large enough to represent the pointer.
1899 unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1900 unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1901 if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1902 std::pair<Value *, APInt> BaseAndOffset =
1903 ConstantOffsetPtrs.lookup(I.getOperand(0));
1904 if (BaseAndOffset.first)
1905 ConstantOffsetPtrs[&I] = BaseAndOffset;
1906 }
1907
1908 // This is really weird. Technically, ptrtoint will disable SROA. However,
1909 // unless that ptrtoint is *used* somewhere in the live basic blocks after
1910 // inlining, it will be nuked, and SROA should proceed. All of the uses which
1911 // would block SROA would also block SROA if applied directly to a pointer,
1912 // and so we can just add the integer in here. The only places where SROA is
1913 // preserved either cannot fire on an integer, or won't in-and-of themselves
1914 // disable SROA (ext) w/o some later use that we would see and disable.
1915 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1916 SROAArgValues[&I] = SROAArg;
1917
1920}
1921
1922bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1923 // Propagate constants through ptrtoint.
1925 return true;
1926
1927 // Track base/offset pairs when round-tripped through a pointer without
1928 // modifications provided the integer is not too large.
1929 Value *Op = I.getOperand(0);
1930 unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1931 if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1932 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1933 if (BaseAndOffset.first)
1934 ConstantOffsetPtrs[&I] = BaseAndOffset;
1935 }
1936
1937 // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1938 if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1939 SROAArgValues[&I] = SROAArg;
1940
1943}
1944
1945bool CallAnalyzer::visitCastInst(CastInst &I) {
1946 // Propagate constants through casts.
1948 return true;
1949
1950 // Disable SROA in the face of arbitrary casts we don't explicitly list
1951 // elsewhere.
1952 disableSROA(I.getOperand(0));
1953
1954 // If this is a floating-point cast, and the target says this operation
1955 // is expensive, this may eventually become a library call. Treat the cost
1956 // as such.
1957 switch (I.getOpcode()) {
1958 case Instruction::FPTrunc:
1959 case Instruction::FPExt:
1960 case Instruction::UIToFP:
1961 case Instruction::SIToFP:
1962 case Instruction::FPToUI:
1963 case Instruction::FPToSI:
1965 onCallPenalty();
1966 break;
1967 default:
1968 break;
1969 }
1970
1973}
1974
1975bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1976 return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1977}
1978
1979bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1980 // Does the *call site* have the NonNull attribute set on an argument? We
1981 // use the attribute on the call site to memoize any analysis done in the
1982 // caller. This will also trip if the callee function has a non-null
1983 // parameter attribute, but that's a less interesting case because hopefully
1984 // the callee would already have been simplified based on that.
1985 if (Argument *A = dyn_cast<Argument>(V))
1986 if (paramHasAttr(A, Attribute::NonNull))
1987 return true;
1988
1989 // Is this an alloca in the caller? This is distinct from the attribute case
1990 // above because attributes aren't updated within the inliner itself and we
1991 // always want to catch the alloca derived case.
1992 if (isAllocaDerivedArg(V))
1993 // We can actually predict the result of comparisons between an
1994 // alloca-derived value and null. Note that this fires regardless of
1995 // SROA firing.
1996 return true;
1997
1998 return false;
1999}
2000
2001bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
2002 // If the normal destination of the invoke or the parent block of the call
2003 // site is unreachable-terminated, there is little point in inlining this
2004 // unless there is literally zero cost.
2005 // FIXME: Note that it is possible that an unreachable-terminated block has a
2006 // hot entry. For example, in below scenario inlining hot_call_X() may be
2007 // beneficial :
2008 // main() {
2009 // hot_call_1();
2010 // ...
2011 // hot_call_N()
2012 // exit(0);
2013 // }
2014 // For now, we are not handling this corner case here as it is rare in real
2015 // code. In future, we should elaborate this based on BPI and BFI in more
2016 // general threshold adjusting heuristics in updateThreshold().
2017 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
2018 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
2019 return false;
2020 } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
2021 return false;
2022
2023 return true;
2024}
2025
2026bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
2027 BlockFrequencyInfo *CallerBFI) {
2028 // If global profile summary is available, then callsite's coldness is
2029 // determined based on that.
2030 if (PSI && PSI->hasProfileSummary())
2031 return PSI->isColdCallSite(Call, CallerBFI);
2032
2033 // Otherwise we need BFI to be available.
2034 if (!CallerBFI)
2035 return false;
2036
2037 // Determine if the callsite is cold relative to caller's entry. We could
2038 // potentially cache the computation of scaled entry frequency, but the added
2039 // complexity is not worth it unless this scaling shows up high in the
2040 // profiles.
2041 const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
2042 auto CallSiteBB = Call.getParent();
2043 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
2044 auto CallerEntryFreq =
2045 CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
2046 return CallSiteFreq < CallerEntryFreq * ColdProb;
2047}
2048
2049std::optional<int>
2050InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
2051 BlockFrequencyInfo *CallerBFI) {
2052
2053 // If global profile summary is available, then callsite's hotness is
2054 // determined based on that.
2055 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
2056 return Params.HotCallSiteThreshold;
2057
2058 // Otherwise we need BFI to be available and to have a locally hot callsite
2059 // threshold.
2060 if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
2061 return std::nullopt;
2062
2063 // Determine if the callsite is hot relative to caller's entry. We could
2064 // potentially cache the computation of scaled entry frequency, but the added
2065 // complexity is not worth it unless this scaling shows up high in the
2066 // profiles.
2067 const BasicBlock *CallSiteBB = Call.getParent();
2068 BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
2069 BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq();
2070 std::optional<BlockFrequency> Limit = CallerEntryFreq.mul(HotCallSiteRelFreq);
2071 if (Limit && CallSiteFreq >= *Limit)
2072 return Params.LocallyHotCallSiteThreshold;
2073
2074 // Otherwise treat it normally.
2075 return std::nullopt;
2076}
2077
2078void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
2079 // If no size growth is allowed for this inlining, set Threshold to 0.
2080 if (!allowSizeGrowth(Call)) {
2081 Threshold = 0;
2082 return;
2083 }
2084
2086
2087 // return min(A, B) if B is valid.
2088 auto MinIfValid = [](int A, std::optional<int> B) {
2089 return B ? std::min(A, *B) : A;
2090 };
2091
2092 // return max(A, B) if B is valid.
2093 auto MaxIfValid = [](int A, std::optional<int> B) {
2094 return B ? std::max(A, *B) : A;
2095 };
2096
2097 // Various bonus percentages. These are multiplied by Threshold to get the
2098 // bonus values.
2099 // SingleBBBonus: This bonus is applied if the callee has a single reachable
2100 // basic block at the given callsite context. This is speculatively applied
2101 // and withdrawn if more than one basic block is seen.
2102 //
2103 // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
2104 // of the last call to a static function as inlining such functions is
2105 // guaranteed to reduce code size.
2106 //
2107 // These bonus percentages may be set to 0 based on properties of the caller
2108 // and the callsite.
2109 int SingleBBBonusPercent = 50;
2110 int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
2111 int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
2112
2113 // Lambda to set all the above bonus and bonus percentages to 0.
2114 auto DisallowAllBonuses = [&]() {
2115 SingleBBBonusPercent = 0;
2116 VectorBonusPercent = 0;
2117 LastCallToStaticBonus = 0;
2118 };
2119
2120 // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
2121 // and reduce the threshold if the caller has the necessary attribute.
2122 if (Caller->hasMinSize()) {
2123 Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
2124 // For minsize, we want to disable the single BB bonus and the vector
2125 // bonuses, but not the last-call-to-static bonus. Inlining the last call to
2126 // a static function will, at the minimum, eliminate the parameter setup and
2127 // call/return instructions.
2128 SingleBBBonusPercent = 0;
2129 VectorBonusPercent = 0;
2130 } else if (Caller->hasOptSize())
2131 Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
2132
2133 // Adjust the threshold based on inlinehint attribute and profile based
2134 // hotness information if the caller does not have MinSize attribute.
2135 if (!Caller->hasMinSize()) {
2136 if (Callee.hasFnAttribute(Attribute::InlineHint))
2137 Threshold = MaxIfValid(Threshold, Params.HintThreshold);
2138
2139 // FIXME: After switching to the new passmanager, simplify the logic below
2140 // by checking only the callsite hotness/coldness as we will reliably
2141 // have local profile information.
2142 //
2143 // Callsite hotness and coldness can be determined if sample profile is
2144 // used (which adds hotness metadata to calls) or if caller's
2145 // BlockFrequencyInfo is available.
2146 BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
2147 auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
2148 if (!Caller->hasOptSize() && HotCallSiteThreshold) {
2149 LLVM_DEBUG(dbgs() << "Hot callsite.\n");
2150 // FIXME: This should update the threshold only if it exceeds the
2151 // current threshold, but AutoFDO + ThinLTO currently relies on this
2152 // behavior to prevent inlining of hot callsites during ThinLTO
2153 // compile phase.
2154 Threshold = *HotCallSiteThreshold;
2155 } else if (isColdCallSite(Call, CallerBFI)) {
2156 LLVM_DEBUG(dbgs() << "Cold callsite.\n");
2157 // Do not apply bonuses for a cold callsite including the
2158 // LastCallToStatic bonus. While this bonus might result in code size
2159 // reduction, it can cause the size of a non-cold caller to increase
2160 // preventing it from being inlined.
2161 DisallowAllBonuses();
2162 Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
2163 } else if (PSI) {
2164 // Use callee's global profile information only if we have no way of
2165 // determining this via callsite information.
2166 if (PSI->isFunctionEntryHot(&Callee)) {
2167 LLVM_DEBUG(dbgs() << "Hot callee.\n");
2168 // If callsite hotness can not be determined, we may still know
2169 // that the callee is hot and treat it as a weaker hint for threshold
2170 // increase.
2171 Threshold = MaxIfValid(Threshold, Params.HintThreshold);
2172 } else if (PSI->isFunctionEntryCold(&Callee)) {
2173 LLVM_DEBUG(dbgs() << "Cold callee.\n");
2174 // Do not apply bonuses for a cold callee including the
2175 // LastCallToStatic bonus. While this bonus might result in code size
2176 // reduction, it can cause the size of a non-cold caller to increase
2177 // preventing it from being inlined.
2178 DisallowAllBonuses();
2179 Threshold = MinIfValid(Threshold, Params.ColdThreshold);
2180 }
2181 }
2182 }
2183
2184 Threshold += TTI.adjustInliningThreshold(&Call);
2185
2186 // Finally, take the target-specific inlining threshold multiplier into
2187 // account.
2188 Threshold *= TTI.getInliningThresholdMultiplier();
2189
2190 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2191 VectorBonus = Threshold * VectorBonusPercent / 100;
2192
2193 // If there is only one call of the function, and it has internal linkage,
2194 // the cost of inlining it drops dramatically. It may seem odd to update
2195 // Cost in updateThreshold, but the bonus depends on the logic in this method.
2196 if (isSoleCallToLocalFunction(Call, F)) {
2197 addCost(-LastCallToStaticBonus);
2198 StaticBonusApplied = LastCallToStaticBonus;
2199 }
2200}
2201
2202bool CallAnalyzer::visitCmpInst(CmpInst &I) {
2203 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2204 // First try to handle simplified comparisons.
2206 return true;
2207
2208 // Try to handle comparison that can be simplified using ValueTracking.
2209 if (simplifyCmpInstForRecCall(I))
2210 return true;
2211
2212 if (I.getOpcode() == Instruction::FCmp)
2213 return false;
2214
2215 // Otherwise look for a comparison between constant offset pointers with
2216 // a common base.
2217 Value *LHSBase, *RHSBase;
2218 APInt LHSOffset, RHSOffset;
2219 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
2220 if (LHSBase) {
2221 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
2222 if (RHSBase && LHSBase == RHSBase) {
2223 // We have common bases, fold the icmp to a constant based on the
2224 // offsets.
2225 SimplifiedValues[&I] = ConstantInt::getBool(
2226 I.getType(),
2227 ICmpInst::compare(LHSOffset, RHSOffset, I.getPredicate()));
2228 ++NumConstantPtrCmps;
2229 return true;
2230 }
2231 }
2232
2233 auto isImplicitNullCheckCmp = [](const CmpInst &I) {
2234 for (auto *User : I.users())
2235 if (auto *Instr = dyn_cast<Instruction>(User))
2236 if (!Instr->getMetadata(LLVMContext::MD_make_implicit))
2237 return false;
2238 return true;
2239 };
2240
2241 // If the comparison is an equality comparison with null, we can simplify it
2242 // if we know the value (argument) can't be null
2243 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) {
2244 if (isKnownNonNullInCallee(I.getOperand(0))) {
2245 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
2246 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
2247 : ConstantInt::getFalse(I.getType());
2248 return true;
2249 }
2250 // Implicit null checks act as unconditional branches and their comparisons
2251 // should be treated as simplified and free of cost.
2252 if (isImplicitNullCheckCmp(I))
2253 return true;
2254 }
2255 return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
2256}
2257
2258bool CallAnalyzer::visitSub(BinaryOperator &I) {
2259 // Try to handle a special case: we can fold computing the difference of two
2260 // constant-related pointers.
2261 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2262 Value *LHSBase, *RHSBase;
2263 APInt LHSOffset, RHSOffset;
2264 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
2265 if (LHSBase) {
2266 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
2267 if (RHSBase && LHSBase == RHSBase) {
2268 // We have common bases, fold the subtract to a constant based on the
2269 // offsets.
2270 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
2271 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
2272 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
2273 SimplifiedValues[&I] = C;
2274 ++NumConstantPtrDiffs;
2275 return true;
2276 }
2277 }
2278 }
2279
2280 // Otherwise, fall back to the generic logic for simplifying and handling
2281 // instructions.
2282 return Base::visitSub(I);
2283}
2284
2285bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
2286 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2287 Constant *CLHS = getDirectOrSimplifiedValue<Constant>(LHS);
2288 Constant *CRHS = getDirectOrSimplifiedValue<Constant>(RHS);
2289
2290 Value *SimpleV = nullptr;
2291 if (auto FI = dyn_cast<FPMathOperator>(&I))
2292 SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2293 FI->getFastMathFlags(), DL);
2294 else
2295 SimpleV =
2296 simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
2297
2298 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2299 SimplifiedValues[&I] = C;
2300
2301 if (SimpleV)
2302 return true;
2303
2304 // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2305 disableSROA(LHS);
2306 disableSROA(RHS);
2307
2308 // If the instruction is floating point, and the target says this operation
2309 // is expensive, this may eventually become a library call. Treat the cost
2310 // as such. Unless it's fneg which can be implemented with an xor.
2311 using namespace llvm::PatternMatch;
2312 if (I.getType()->isFloatingPointTy() &&
2314 !match(&I, m_FNeg(m_Value())))
2315 onCallPenalty();
2316
2317 return false;
2318}
2319
2320bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2321 Value *Op = I.getOperand(0);
2322 Constant *COp = getDirectOrSimplifiedValue<Constant>(Op);
2323
2324 Value *SimpleV = simplifyFNegInst(
2325 COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
2326
2327 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2328 SimplifiedValues[&I] = C;
2329
2330 if (SimpleV)
2331 return true;
2332
2333 // Disable any SROA on arguments to arbitrary, unsimplified fneg.
2334 disableSROA(Op);
2335
2336 return false;
2337}
2338
2339bool CallAnalyzer::visitLoad(LoadInst &I) {
2340 if (handleSROA(I.getPointerOperand(), I.isSimple()))
2341 return true;
2342
2343 // If the data is already loaded from this address and hasn't been clobbered
2344 // by any stores or calls, this load is likely to be redundant and can be
2345 // eliminated.
2346 if (EnableLoadElimination &&
2347 !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2348 onLoadEliminationOpportunity();
2349 return true;
2350 }
2351
2352 onMemAccess();
2353 return false;
2354}
2355
2356bool CallAnalyzer::visitStore(StoreInst &I) {
2357 if (handleSROA(I.getPointerOperand(), I.isSimple()))
2358 return true;
2359
2360 // The store can potentially clobber loads and prevent repeated loads from
2361 // being eliminated.
2362 // FIXME:
2363 // 1. We can probably keep an initial set of eliminatable loads substracted
2364 // from the cost even when we finally see a store. We just need to disable
2365 // *further* accumulation of elimination savings.
2366 // 2. We should probably at some point thread MemorySSA for the callee into
2367 // this and then use that to actually compute *really* precise savings.
2368 disableLoadElimination();
2369
2370 onMemAccess();
2371 return false;
2372}
2373
2374bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2375 Value *Op = I.getAggregateOperand();
2376
2377 // Special handling, because we want to simplify extractvalue with a
2378 // potential insertvalue from the caller.
2379 if (Value *SimpleOp = getSimplifiedValueUnchecked(Op)) {
2380 SimplifyQuery SQ(DL);
2381 Value *SimpleV = simplifyExtractValueInst(SimpleOp, I.getIndices(), SQ);
2382 if (SimpleV) {
2383 SimplifiedValues[&I] = SimpleV;
2384 return true;
2385 }
2386 }
2387
2388 // SROA can't look through these, but they may be free.
2389 return Base::visitExtractValue(I);
2390}
2391
2392bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2393 // Constant folding for insert value is trivial.
2395 return true;
2396
2397 // SROA can't look through these, but they may be free.
2398 return Base::visitInsertValue(I);
2399}
2400
2401/// Try to simplify a call site.
2402///
2403/// Takes a concrete function and callsite and tries to actually simplify it by
2404/// analyzing the arguments and call itself with instsimplify. Returns true if
2405/// it has simplified the callsite to some other entity (a constant), making it
2406/// free.
2407bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2408 // FIXME: Using the instsimplify logic directly for this is inefficient
2409 // because we have to continually rebuild the argument list even when no
2410 // simplifications can be performed. Until that is fixed with remapping
2411 // inside of instsimplify, directly constant fold calls here.
2413 return false;
2414
2415 // Try to re-map the arguments to constants.
2416 SmallVector<Constant *, 4> ConstantArgs;
2417 ConstantArgs.reserve(Call.arg_size());
2418 for (Value *I : Call.args()) {
2419 Constant *C = getDirectOrSimplifiedValue<Constant>(I);
2420 if (!C)
2421 return false; // This argument doesn't map to a constant.
2422
2423 ConstantArgs.push_back(C);
2424 }
2425 if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2426 SimplifiedValues[&Call] = C;
2427 return true;
2428 }
2429
2430 return false;
2431}
2432
2433bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2434 const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
2435 LibFunc LF;
2436 if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
2437 return TTI.isLoweredToCall(F);
2438
2439 switch (LF) {
2440 case LibFunc_memcpy_chk:
2441 case LibFunc_memmove_chk:
2442 case LibFunc_mempcpy_chk:
2443 case LibFunc_memset_chk: {
2444 // Calls to __memcpy_chk whose length is known to fit within the object
2445 // size will eventually be replaced by inline stores. Therefore, these
2446 // should not incur a call penalty. This is only really relevant on
2447 // platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
2448 // other platforms use memcpy intrinsics, which are already exempt from the
2449 // call penalty.
2450 auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(Call.getOperand(2));
2451 auto *ObjSizeOp =
2452 getDirectOrSimplifiedValue<ConstantInt>(Call.getOperand(3));
2453 if (LenOp && ObjSizeOp &&
2454 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2455 return false;
2456 }
2457 break;
2458 }
2459 default:
2460 break;
2461 }
2462
2463 return TTI.isLoweredToCall(F);
2464}
2465
2466bool CallAnalyzer::visitCallBase(CallBase &Call) {
2467 if (!onCallBaseVisitStart(Call))
2468 return true;
2469
2470 if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2471 !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2472 // This aborts the entire analysis.
2473 ExposesReturnsTwice = true;
2474 return false;
2475 }
2476 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2477 ContainsNoDuplicateCall = true;
2478
2479 if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand()))
2480 onInlineAsm(*InlineAsmOp);
2481
2483 bool IsIndirectCall = !F;
2484 if (IsIndirectCall) {
2485 // Check if this happens to be an indirect function call to a known function
2486 // in this inline context. If not, we've done all we can.
2488 F = getSimplifiedValue<Function>(Callee);
2489 if (!F || F->getFunctionType() != Call.getFunctionType()) {
2490 onCallArgumentSetup(Call);
2491
2492 if (!Call.onlyReadsMemory())
2493 disableLoadElimination();
2494 return Base::visitCallBase(Call);
2495 }
2496 }
2497
2498 assert(F && "Expected a call to a known function");
2499
2500 // When we have a concrete function, first try to simplify it directly.
2501 if (simplifyCallSite(F, Call))
2502 return true;
2503
2504 // Next check if it is an intrinsic we know about.
2505 // FIXME: Lift this into part of the InstVisitor.
2506 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2507 switch (II->getIntrinsicID()) {
2508 default:
2510 disableLoadElimination();
2511 return Base::visitCallBase(Call);
2512
2513 case Intrinsic::load_relative:
2514 onLoadRelativeIntrinsic();
2515 return false;
2516
2517 case Intrinsic::memset:
2518 case Intrinsic::memcpy:
2519 case Intrinsic::memmove:
2520 disableLoadElimination();
2521 // SROA can usually chew through these intrinsics, but they aren't free.
2522 return false;
2523 case Intrinsic::icall_branch_funnel:
2524 case Intrinsic::localescape:
2525 HasUninlineableIntrinsic = true;
2526 return false;
2527 case Intrinsic::vastart:
2528 InitsVargArgs = true;
2529 return false;
2530 case Intrinsic::launder_invariant_group:
2531 case Intrinsic::strip_invariant_group:
2532 if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2533 SROAArgValues[II] = SROAArg;
2534 return true;
2535 case Intrinsic::is_constant:
2536 return simplifyIntrinsicCallIsConstant(Call);
2537 case Intrinsic::objectsize:
2538 return simplifyIntrinsicCallObjectSize(Call);
2539 }
2540 }
2541
2542 if (F == Call.getFunction()) {
2543 // This flag will fully abort the analysis, so don't bother with anything
2544 // else.
2545 IsRecursiveCall = true;
2546 if (!AllowRecursiveCall)
2547 return false;
2548 }
2549
2550 if (isLoweredToCall(F, Call)) {
2551 onLoweredCall(F, Call, IsIndirectCall);
2552 }
2553
2554 if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2555 disableLoadElimination();
2556 return Base::visitCallBase(Call);
2557}
2558
2559bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2560 // At least one return instruction will be free after inlining.
2561 bool Free = !HasReturn;
2562 HasReturn = true;
2563 return Free;
2564}
2565
2566bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2567 // We model unconditional branches as essentially free -- they really
2568 // shouldn't exist at all, but handling them makes the behavior of the
2569 // inliner more regular and predictable. Interestingly, conditional branches
2570 // which will fold away are also free.
2571 return BI.isUnconditional() ||
2572 getDirectOrSimplifiedValue<ConstantInt>(BI.getCondition()) ||
2573 BI.getMetadata(LLVMContext::MD_make_implicit);
2574}
2575
2576bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2577 bool CheckSROA = SI.getType()->isPointerTy();
2578 Value *TrueVal = SI.getTrueValue();
2579 Value *FalseVal = SI.getFalseValue();
2580
2581 Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
2582 Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
2583 Constant *CondC = getSimplifiedValue<Constant>(SI.getCondition());
2584
2585 if (!CondC) {
2586 // Select C, X, X => X
2587 if (TrueC == FalseC && TrueC) {
2588 SimplifiedValues[&SI] = TrueC;
2589 return true;
2590 }
2591
2592 if (!CheckSROA)
2593 return Base::visitSelectInst(SI);
2594
2595 std::pair<Value *, APInt> TrueBaseAndOffset =
2596 ConstantOffsetPtrs.lookup(TrueVal);
2597 std::pair<Value *, APInt> FalseBaseAndOffset =
2598 ConstantOffsetPtrs.lookup(FalseVal);
2599 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2600 ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2601
2602 if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2603 SROAArgValues[&SI] = SROAArg;
2604 return true;
2605 }
2606
2607 return Base::visitSelectInst(SI);
2608 }
2609
2610 // Select condition is a constant.
2611 Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2612 : (CondC->isNullValue()) ? FalseVal
2613 : nullptr;
2614 if (!SelectedV) {
2615 // Condition is a vector constant that is not all 1s or all 0s. If all
2616 // operands are constants, ConstantFoldSelectInstruction() can handle the
2617 // cases such as select vectors.
2618 if (TrueC && FalseC) {
2619 if (auto *C = ConstantFoldSelectInstruction(CondC, TrueC, FalseC)) {
2620 SimplifiedValues[&SI] = C;
2621 return true;
2622 }
2623 }
2624 return Base::visitSelectInst(SI);
2625 }
2626
2627 // Condition is either all 1s or all 0s. SI can be simplified.
2628 if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2629 SimplifiedValues[&SI] = SelectedC;
2630 return true;
2631 }
2632
2633 if (!CheckSROA)
2634 return true;
2635
2636 std::pair<Value *, APInt> BaseAndOffset =
2637 ConstantOffsetPtrs.lookup(SelectedV);
2638 if (BaseAndOffset.first) {
2639 ConstantOffsetPtrs[&SI] = BaseAndOffset;
2640
2641 if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2642 SROAArgValues[&SI] = SROAArg;
2643 }
2644
2645 return true;
2646}
2647
2648bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2649 // We model unconditional switches as free, see the comments on handling
2650 // branches.
2651 if (getDirectOrSimplifiedValue<ConstantInt>(SI.getCondition()))
2652 return true;
2653
2654 // Assume the most general case where the switch is lowered into
2655 // either a jump table, bit test, or a balanced binary tree consisting of
2656 // case clusters without merging adjacent clusters with the same
2657 // destination. We do not consider the switches that are lowered with a mix
2658 // of jump table/bit test/binary search tree. The cost of the switch is
2659 // proportional to the size of the tree or the size of jump table range.
2660 //
2661 // NB: We convert large switches which are just used to initialize large phi
2662 // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2663 // inlining those. It will prevent inlining in cases where the optimization
2664 // does not (yet) fire.
2665
2666 unsigned JumpTableSize = 0;
2667 BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2668 unsigned NumCaseCluster =
2669 TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2670
2671 onFinalizeSwitch(JumpTableSize, NumCaseCluster, SI.defaultDestUnreachable());
2672 return false;
2673}
2674
2675bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2676 // We never want to inline functions that contain an indirectbr. This is
2677 // incorrect because all the blockaddress's (in static global initializers
2678 // for example) would be referring to the original function, and this
2679 // indirect jump would jump from the inlined copy of the function into the
2680 // original function which is extremely undefined behavior.
2681 // FIXME: This logic isn't really right; we can safely inline functions with
2682 // indirectbr's as long as no other function or global references the
2683 // blockaddress of a block within the current function.
2684 HasIndirectBr = true;
2685 return false;
2686}
2687
2688bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2689 // FIXME: It's not clear that a single instruction is an accurate model for
2690 // the inline cost of a resume instruction.
2691 return false;
2692}
2693
2694bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2695 // FIXME: It's not clear that a single instruction is an accurate model for
2696 // the inline cost of a cleanupret instruction.
2697 return false;
2698}
2699
2700bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2701 // FIXME: It's not clear that a single instruction is an accurate model for
2702 // the inline cost of a catchret instruction.
2703 return false;
2704}
2705
2706bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2707 // FIXME: It might be reasonably to discount the cost of instructions leading
2708 // to unreachable as they have the lowest possible impact on both runtime and
2709 // code size.
2710 return true; // No actual code is needed for unreachable.
2711}
2712
2713bool CallAnalyzer::visitInstruction(Instruction &I) {
2714 // Some instructions are free. All of the free intrinsics can also be
2715 // handled by SROA, etc.
2718 return true;
2719
2720 // We found something we don't understand or can't handle. Mark any SROA-able
2721 // values in the operand list as no longer viable.
2722 for (const Use &Op : I.operands())
2723 disableSROA(Op);
2724
2725 return false;
2726}
2727
2728/// Analyze a basic block for its contribution to the inline cost.
2729///
2730/// This method walks the analyzer over every instruction in the given basic
2731/// block and accounts for their cost during inlining at this callsite. It
2732/// aborts early if the threshold has been exceeded or an impossible to inline
2733/// construct has been detected. It returns false if inlining is no longer
2734/// viable, and true if inlining remains viable.
2735InlineResult
2736CallAnalyzer::analyzeBlock(BasicBlock *BB,
2737 const SmallPtrSetImpl<const Value *> &EphValues) {
2738 for (Instruction &I : *BB) {
2739 // FIXME: Currently, the number of instructions in a function regardless of
2740 // our ability to simplify them during inline to constants or dead code,
2741 // are actually used by the vector bonus heuristic. As long as that's true,
2742 // we have to special case debug intrinsics here to prevent differences in
2743 // inlining due to debug symbols. Eventually, the number of unsimplified
2744 // instructions shouldn't factor into the cost computation, but until then,
2745 // hack around it here.
2746 // Similarly, skip pseudo-probes.
2747 if (I.isDebugOrPseudoInst())
2748 continue;
2749
2750 // Skip ephemeral values.
2751 if (EphValues.count(&I))
2752 continue;
2753
2754 ++NumInstructions;
2755 if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2756 ++NumVectorInstructions;
2757
2758 // If the instruction simplified to a constant, there is no cost to this
2759 // instruction. Visit the instructions using our InstVisitor to account for
2760 // all of the per-instruction logic. The visit tree returns true if we
2761 // consumed the instruction in any way, and false if the instruction's base
2762 // cost should count against inlining.
2763 onInstructionAnalysisStart(&I);
2764
2765 if (Base::visit(&I))
2766 ++NumInstructionsSimplified;
2767 else
2768 onMissedSimplification();
2769
2770 onInstructionAnalysisFinish(&I);
2771 using namespace ore;
2772 // If the visit this instruction detected an uninlinable pattern, abort.
2773 InlineResult IR = InlineResult::success();
2774 if (IsRecursiveCall && !AllowRecursiveCall)
2775 IR = InlineResult::failure("recursive");
2776 else if (ExposesReturnsTwice)
2777 IR = InlineResult::failure("exposes returns twice");
2778 else if (HasDynamicAlloca)
2779 IR = InlineResult::failure("dynamic alloca");
2780 else if (HasIndirectBr)
2781 IR = InlineResult::failure("indirect branch");
2782 else if (HasUninlineableIntrinsic)
2783 IR = InlineResult::failure("uninlinable intrinsic");
2784 else if (InitsVargArgs)
2785 IR = InlineResult::failure("varargs");
2786 if (!IR.isSuccess()) {
2787 if (ORE)
2788 ORE->emit([&]() {
2789 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2790 &CandidateCall)
2791 << NV("Callee", &F) << " has uninlinable pattern ("
2792 << NV("InlineResult", IR.getFailureReason())
2793 << ") and cost is not fully computed";
2794 });
2795 return IR;
2796 }
2797
2798 // If the caller is a recursive function then we don't want to inline
2799 // functions which allocate a lot of stack space because it would increase
2800 // the caller stack usage dramatically.
2801 if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
2802 auto IR =
2803 InlineResult::failure("recursive and allocates too much stack space");
2804 if (ORE)
2805 ORE->emit([&]() {
2806 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2807 &CandidateCall)
2808 << NV("Callee", &F) << " is "
2809 << NV("InlineResult", IR.getFailureReason())
2810 << ". Cost is not fully computed";
2811 });
2812 return IR;
2813 }
2814
2815 if (shouldStop())
2816 return InlineResult::failure(
2817 "Call site analysis is not favorable to inlining.");
2818 }
2819
2820 return InlineResult::success();
2821}
2822
2823/// Compute the base pointer and cumulative constant offsets for V.
2824///
2825/// This strips all constant offsets off of V, leaving it the base pointer, and
2826/// accumulates the total constant offset applied in the returned constant. It
2827/// returns 0 if V is not a pointer, and returns the constant '0' if there are
2828/// no constant offsets applied.
2829ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2830 if (!V->getType()->isPointerTy())
2831 return nullptr;
2832
2833 unsigned AS = V->getType()->getPointerAddressSpace();
2834 unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2835 APInt Offset = APInt::getZero(IntPtrWidth);
2836
2837 // Even though we don't look through PHI nodes, we could be called on an
2838 // instruction in an unreachable block, which may be on a cycle.
2839 SmallPtrSet<Value *, 4> Visited;
2840 Visited.insert(V);
2841 do {
2842 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2843 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2844 return nullptr;
2845 V = GEP->getPointerOperand();
2846 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2847 if (GA->isInterposable())
2848 break;
2849 V = GA->getAliasee();
2850 } else {
2851 break;
2852 }
2853 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2854 } while (Visited.insert(V).second);
2855
2856 Type *IdxPtrTy = DL.getIndexType(V->getType());
2857 return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2858}
2859
2860/// Find dead blocks due to deleted CFG edges during inlining.
2861///
2862/// If we know the successor of the current block, \p CurrBB, has to be \p
2863/// NextBB, the other successors of \p CurrBB are dead if these successors have
2864/// no live incoming CFG edges. If one block is found to be dead, we can
2865/// continue growing the dead block list by checking the successors of the dead
2866/// blocks to see if all their incoming edges are dead or not.
2867void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2868 auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2869 // A CFG edge is dead if the predecessor is dead or the predecessor has a
2870 // known successor which is not the one under exam.
2871 if (DeadBlocks.count(Pred))
2872 return true;
2873 BasicBlock *KnownSucc = KnownSuccessors[Pred];
2874 return KnownSucc && KnownSucc != Succ;
2875 };
2876
2877 auto IsNewlyDead = [&](BasicBlock *BB) {
2878 // If all the edges to a block are dead, the block is also dead.
2879 return (!DeadBlocks.count(BB) &&
2881 [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2882 };
2883
2884 for (BasicBlock *Succ : successors(CurrBB)) {
2885 if (Succ == NextBB || !IsNewlyDead(Succ))
2886 continue;
2888 NewDead.push_back(Succ);
2889 while (!NewDead.empty()) {
2890 BasicBlock *Dead = NewDead.pop_back_val();
2891 if (DeadBlocks.insert(Dead).second)
2892 // Continue growing the dead block lists.
2893 for (BasicBlock *S : successors(Dead))
2894 if (IsNewlyDead(S))
2895 NewDead.push_back(S);
2896 }
2897 }
2898}
2899
2900/// Analyze a call site for potential inlining.
2901///
2902/// Returns true if inlining this call is viable, and false if it is not
2903/// viable. It computes the cost and adjusts the threshold based on numerous
2904/// factors and heuristics. If this method returns false but the computed cost
2905/// is below the computed threshold, then inlining was forcibly disabled by
2906/// some artifact of the routine.
2907InlineResult CallAnalyzer::analyze() {
2908 ++NumCallsAnalyzed;
2909
2910 auto Result = onAnalysisStart();
2911 if (!Result.isSuccess())
2912 return Result;
2913
2914 if (F.empty())
2915 return InlineResult::success();
2916
2917 Function *Caller = CandidateCall.getFunction();
2918 // Check if the caller function is recursive itself.
2919 for (User *U : Caller->users()) {
2920 CallBase *Call = dyn_cast<CallBase>(U);
2921 if (Call && Call->getFunction() == Caller) {
2922 IsCallerRecursive = true;
2923 break;
2924 }
2925 }
2926
2927 // Populate our simplified values by mapping from function arguments to call
2928 // arguments with known important simplifications.
2929 auto CAI = CandidateCall.arg_begin();
2930 for (Argument &FAI : F.args()) {
2931 assert(CAI != CandidateCall.arg_end());
2932 SimplifiedValues[&FAI] = *CAI;
2933 if (isa<Constant>(*CAI))
2934 ++NumConstantArgs;
2935
2936 Value *PtrArg = *CAI;
2937 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2938 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2939
2940 // We can SROA any pointer arguments derived from alloca instructions.
2941 if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2942 SROAArgValues[&FAI] = SROAArg;
2943 onInitializeSROAArg(SROAArg);
2944 EnabledSROAAllocas.insert(SROAArg);
2945 }
2946 }
2947 ++CAI;
2948 }
2949 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2950 NumAllocaArgs = SROAArgValues.size();
2951
2952 // Collecting the ephemeral values of `F` can be expensive, so use the
2953 // ephemeral values cache if available.
2954 SmallPtrSet<const Value *, 32> EphValuesStorage;
2955 const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2956 if (GetEphValuesCache)
2957 EphValues = &GetEphValuesCache(F).ephValues();
2958 else
2959 CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F),
2960 EphValuesStorage);
2961
2962 // The worklist of live basic blocks in the callee *after* inlining. We avoid
2963 // adding basic blocks of the callee which can be proven to be dead for this
2964 // particular call site in order to get more accurate cost estimates. This
2965 // requires a somewhat heavyweight iteration pattern: we need to walk the
2966 // basic blocks in a breadth-first order as we insert live successors. To
2967 // accomplish this, prioritizing for small iterations because we exit after
2968 // crossing our threshold, we use a small-size optimized SetVector.
2969 typedef SmallSetVector<BasicBlock *, 16> BBSetVector;
2970 BBSetVector BBWorklist;
2971 BBWorklist.insert(&F.getEntryBlock());
2972
2973 // Note that we *must not* cache the size, this loop grows the worklist.
2974 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2975 if (shouldStop())
2976 break;
2977
2978 BasicBlock *BB = BBWorklist[Idx];
2979 if (BB->empty())
2980 continue;
2981
2982 onBlockStart(BB);
2983
2984 // Disallow inlining a blockaddress.
2985 // A blockaddress only has defined behavior for an indirect branch in the
2986 // same function, and we do not currently support inlining indirect
2987 // branches. But, the inliner may not see an indirect branch that ends up
2988 // being dead code at a particular call site. If the blockaddress escapes
2989 // the function, e.g., via a global variable, inlining may lead to an
2990 // invalid cross-function reference.
2991 // FIXME: pr/39560: continue relaxing this overt restriction.
2992 if (BB->hasAddressTaken())
2993 return InlineResult::failure("blockaddress used");
2994
2995 // Analyze the cost of this block. If we blow through the threshold, this
2996 // returns false, and we can bail on out.
2997 InlineResult IR = analyzeBlock(BB, *EphValues);
2998 if (!IR.isSuccess())
2999 return IR;
3000
3001 Instruction *TI = BB->getTerminator();
3002
3003 // Add in the live successors by first checking whether we have terminator
3004 // that may be simplified based on the values simplified by this call.
3005 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
3006 if (BI->isConditional()) {
3007 Value *Cond = BI->getCondition();
3008 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(Cond)) {
3009 BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
3010 BBWorklist.insert(NextBB);
3011 KnownSuccessors[BB] = NextBB;
3012 findDeadBlocks(BB, NextBB);
3013 continue;
3014 }
3015 }
3016 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
3017 Value *Cond = SI->getCondition();
3018 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(Cond)) {
3019 BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
3020 BBWorklist.insert(NextBB);
3021 KnownSuccessors[BB] = NextBB;
3022 findDeadBlocks(BB, NextBB);
3023 continue;
3024 }
3025 }
3026
3027 // If we're unable to select a particular successor, just count all of
3028 // them.
3029 BBWorklist.insert_range(successors(BB));
3030
3031 onBlockAnalyzed(BB);
3032 }
3033
3034 // If this is a noduplicate call, we can still inline as long as
3035 // inlining this would cause the removal of the caller (so the instruction
3036 // is not actually duplicated, just moved).
3037 if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall)
3038 return InlineResult::failure("noduplicate");
3039
3040 // If the callee's stack size exceeds the user-specified threshold,
3041 // do not let it be inlined.
3042 // The command line option overrides a limit set in the function attributes.
3043 size_t FinalStackSizeThreshold = StackSizeThreshold;
3044 if (!StackSizeThreshold.getNumOccurrences())
3045 if (std::optional<int> AttrMaxStackSize = getStringFnAttrAsInt(
3047 FinalStackSizeThreshold = *AttrMaxStackSize;
3048 if (AllocatedSize > FinalStackSizeThreshold)
3049 return InlineResult::failure("stacksize");
3050
3051 return finalizeAnalysis();
3052}
3053
3054void InlineCostCallAnalyzer::print(raw_ostream &OS) {
3055#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3057 F.print(OS, &Writer);
3058 DEBUG_PRINT_STAT(NumConstantArgs);
3059 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
3060 DEBUG_PRINT_STAT(NumAllocaArgs);
3061 DEBUG_PRINT_STAT(NumConstantPtrCmps);
3062 DEBUG_PRINT_STAT(NumConstantPtrDiffs);
3063 DEBUG_PRINT_STAT(NumInstructionsSimplified);
3064 DEBUG_PRINT_STAT(NumInstructions);
3065 DEBUG_PRINT_STAT(NumInlineAsmInstructions);
3066 DEBUG_PRINT_STAT(SROACostSavings);
3067 DEBUG_PRINT_STAT(SROACostSavingsLost);
3068 DEBUG_PRINT_STAT(LoadEliminationCost);
3069 DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
3071 DEBUG_PRINT_STAT(Threshold);
3072#undef DEBUG_PRINT_STAT
3073}
3074
3075#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3076/// Dump stats about this call's analysis.
3077LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
3078#endif
3079
3080/// Test that there are no attribute conflicts between Caller and Callee
3081/// that prevent inlining.
3083 Function *Caller, Function *Callee, TargetTransformInfo &TTI,
3084 function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
3085 // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
3086 // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
3087 // object, and always returns the same object (which is overwritten on each
3088 // GetTLI call). Therefore we copy the first result.
3089 auto CalleeTLI = GetTLI(*Callee);
3090 return (IgnoreTTIInlineCompatible ||
3091 TTI.areInlineCompatible(Caller, Callee)) &&
3092 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
3094 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
3095}
3096
3098 const DataLayout &DL) {
3099 int64_t Cost = 0;
3100 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
3101 if (Call.isByValArgument(I)) {
3102 // We approximate the number of loads and stores needed by dividing the
3103 // size of the byval type by the target's pointer size.
3104 PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
3105 unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
3106 unsigned AS = PTy->getAddressSpace();
3107 unsigned PointerSize = DL.getPointerSizeInBits(AS);
3108 // Ceiling division.
3109 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
3110
3111 // If it generates more than 8 stores it is likely to be expanded as an
3112 // inline memcpy so we take that as an upper bound. Otherwise we assume
3113 // one load and one store per word copied.
3114 // FIXME: The maxStoresPerMemcpy setting from the target should be used
3115 // here instead of a magic number of 8, but it's not available via
3116 // DataLayout.
3117 NumStores = std::min(NumStores, 8U);
3118
3119 Cost += 2 * NumStores * InstrCost;
3120 } else {
3121 // For non-byval arguments subtract off one instruction per call
3122 // argument.
3123 Cost += InstrCost;
3124 }
3125 }
3126 // The call instruction also disappears after inlining.
3127 Cost += InstrCost;
3128 Cost += TTI.getInlineCallPenalty(Call.getCaller(), Call, CallPenalty);
3129
3130 return std::min<int64_t>(Cost, INT_MAX);
3131}
3132
3134 CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
3135 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3136 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3139 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3140 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
3141 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3142 GetEphValuesCache);
3143}
3144
3146 CallBase &Call, TargetTransformInfo &CalleeTTI,
3147 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3149 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3151 const InlineParams Params = {/* DefaultThreshold*/ 0,
3152 /*HintThreshold*/ {},
3153 /*ColdThreshold*/ {},
3154 /*OptSizeThreshold*/ {},
3155 /*OptMinSizeThreshold*/ {},
3156 /*HotCallSiteThreshold*/ {},
3157 /*LocallyHotCallSiteThreshold*/ {},
3158 /*ColdCallSiteThreshold*/ {},
3159 /*ComputeFullInlineCost*/ true,
3160 /*EnableDeferral*/ true};
3161
3162 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
3163 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
3164 /*IgnoreThreshold*/ true);
3165 auto R = CA.analyze();
3166 if (!R.isSuccess())
3167 return std::nullopt;
3168 return CA.getCost();
3169}
3170
3171std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
3172 CallBase &Call, TargetTransformInfo &CalleeTTI,
3173 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3175 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3177 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3178 PSI, ORE, *Call.getCalledFunction(), Call);
3179 auto R = CFA.analyze();
3180 if (!R.isSuccess())
3181 return std::nullopt;
3182 return CFA.features();
3183}
3184
3186 CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
3187 function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
3188
3189 // Cannot inline indirect calls.
3190 if (!Callee)
3191 return InlineResult::failure("indirect call");
3192
3193 // When callee coroutine function is inlined into caller coroutine function
3194 // before coro-split pass,
3195 // coro-early pass can not handle this quiet well.
3196 // So we won't inline the coroutine function if it have not been unsplited
3197 if (Callee->isPresplitCoroutine())
3198 return InlineResult::failure("unsplited coroutine call");
3199
3200 // Never inline calls with byval arguments that does not have the alloca
3201 // address space. Since byval arguments can be replaced with a copy to an
3202 // alloca, the inlined code would need to be adjusted to handle that the
3203 // argument is in the alloca address space (so it is a little bit complicated
3204 // to solve).
3205 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3206 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
3207 if (Call.isByValArgument(I)) {
3208 PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
3209 if (PTy->getAddressSpace() != AllocaAS)
3210 return InlineResult::failure("byval arguments without alloca"
3211 " address space");
3212 }
3213
3214 // Calls to functions with always-inline attributes should be inlined
3215 // whenever possible.
3216 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3217 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3218 return InlineResult::failure("noinline call site attribute");
3219
3220 auto IsViable = isInlineViable(*Callee);
3221 if (IsViable.isSuccess())
3222 return InlineResult::success();
3223 return InlineResult::failure(IsViable.getFailureReason());
3224 }
3225
3226 // Never inline functions with conflicting attributes (unless callee has
3227 // always-inline attribute).
3228 Function *Caller = Call.getCaller();
3229 if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
3230 return InlineResult::failure("conflicting attributes");
3231
3232 // Don't inline this call if the caller has the optnone attribute.
3233 if (Caller->hasOptNone())
3234 return InlineResult::failure("optnone attribute");
3235
3236 // Don't inline a function that treats null pointer as valid into a caller
3237 // that does not have this attribute.
3238 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3239 return InlineResult::failure("nullptr definitions incompatible");
3240
3241 // Don't inline functions which can be interposed at link-time.
3242 if (Callee->isInterposable())
3243 return InlineResult::failure("interposable");
3244
3245 // Don't inline functions marked noinline.
3246 if (Callee->hasFnAttribute(Attribute::NoInline))
3247 return InlineResult::failure("noinline function attribute");
3248
3249 // Don't inline call sites marked noinline.
3250 if (Call.isNoInline())
3251 return InlineResult::failure("noinline call site attribute");
3252
3253 // Don't inline functions that are loader replaceable.
3254 if (Callee->hasFnAttribute("loader-replaceable"))
3255 return InlineResult::failure("loader replaceable function attribute");
3256
3257 return std::nullopt;
3258}
3259
3261 CallBase &Call, Function *Callee, const InlineParams &Params,
3262 TargetTransformInfo &CalleeTTI,
3263 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3264 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3267 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3268
3269 auto UserDecision =
3270 llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
3271
3272 if (UserDecision) {
3273 if (UserDecision->isSuccess())
3274 return llvm::InlineCost::getAlways("always inline attribute");
3275 return llvm::InlineCost::getNever(UserDecision->getFailureReason());
3276 }
3277
3280 "Inlining forced by -inline-all-viable-calls");
3281
3282 LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
3283 << "... (caller:" << Call.getCaller()->getName()
3284 << ")\n");
3285
3286 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3287 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3288 /*BoostIndirect=*/true, /*IgnoreThreshold=*/false,
3289 GetEphValuesCache);
3290 InlineResult ShouldInline = CA.analyze();
3291
3292 LLVM_DEBUG(CA.dump());
3293
3294 // Always make cost benefit based decision explicit.
3295 // We use always/never here since threshold is not meaningful,
3296 // as it's not what drives cost-benefit analysis.
3297 if (CA.wasDecidedByCostBenefit()) {
3298 if (ShouldInline.isSuccess())
3299 return InlineCost::getAlways("benefit over cost",
3300 CA.getCostBenefitPair());
3301 else
3302 return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
3303 }
3304
3305 if (CA.wasDecidedByCostThreshold())
3306 return InlineCost::get(CA.getCost(), CA.getThreshold(),
3307 CA.getStaticBonusApplied());
3308
3309 // No details on how the decision was made, simply return always or never.
3310 return ShouldInline.isSuccess()
3311 ? InlineCost::getAlways("empty function")
3312 : InlineCost::getNever(ShouldInline.getFailureReason());
3313}
3314
3316 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
3317 for (BasicBlock &BB : F) {
3318 // Disallow inlining of functions which contain indirect branches.
3320 return InlineResult::failure("contains indirect branches");
3321
3322 // Disallow inlining of blockaddresses.
3323 if (BB.hasAddressTaken())
3324 return InlineResult::failure("blockaddress used");
3325
3326 for (auto &II : BB) {
3328 if (!Call)
3329 continue;
3330
3331 // Disallow recursive calls.
3332 Function *Callee = Call->getCalledFunction();
3333 if (&F == Callee)
3334 return InlineResult::failure("recursive call");
3335
3336 // Disallow calls which expose returns-twice to a function not previously
3337 // attributed as such.
3338 if (!ReturnsTwice && isa<CallInst>(Call) &&
3339 cast<CallInst>(Call)->canReturnTwice())
3340 return InlineResult::failure("exposes returns-twice attribute");
3341
3342 if (Callee)
3343 switch (Callee->getIntrinsicID()) {
3344 default:
3345 break;
3346 case llvm::Intrinsic::icall_branch_funnel:
3347 // Disallow inlining of @llvm.icall.branch.funnel because current
3348 // backend can't separate call targets from call arguments.
3349 return InlineResult::failure(
3350 "disallowed inlining of @llvm.icall.branch.funnel");
3351 case llvm::Intrinsic::localescape:
3352 // Disallow inlining functions that call @llvm.localescape. Doing this
3353 // correctly would require major changes to the inliner.
3354 return InlineResult::failure(
3355 "disallowed inlining of @llvm.localescape");
3356 case llvm::Intrinsic::vastart:
3357 // Disallow inlining of functions that initialize VarArgs with
3358 // va_start.
3359 return InlineResult::failure(
3360 "contains VarArgs initialized with va_start");
3361 }
3362 }
3363 }
3364
3365 return InlineResult::success();
3366}
3367
3368// APIs to create InlineParams based on command line flags and/or other
3369// parameters.
3370
3372 InlineParams Params;
3373
3374 // This field is the threshold to use for a callee by default. This is
3375 // derived from one or more of:
3376 // * optimization or size-optimization levels,
3377 // * a value passed to createFunctionInliningPass function, or
3378 // * the -inline-threshold flag.
3379 // If the -inline-threshold flag is explicitly specified, that is used
3380 // irrespective of anything else.
3381 if (InlineThreshold.getNumOccurrences() > 0)
3383 else
3384 Params.DefaultThreshold = Threshold;
3385
3386 // Set the HintThreshold knob from the -inlinehint-threshold.
3388
3389 // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3391
3392 // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3393 // populate LocallyHotCallSiteThreshold. Later, we populate
3394 // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3395 // we know that optimization level is O3 (in the getInlineParams variant that
3396 // takes the opt and size levels).
3397 // FIXME: Remove this check (and make the assignment unconditional) after
3398 // addressing size regression issues at O2.
3399 if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3401
3402 // Set the ColdCallSiteThreshold knob from the
3403 // -inline-cold-callsite-threshold.
3405
3406 // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3407 // -inlinehint-threshold commandline option is not explicitly given. If that
3408 // option is present, then its value applies even for callees with size and
3409 // minsize attributes.
3410 // If the -inline-threshold is not specified, set the ColdThreshold from the
3411 // -inlinecold-threshold even if it is not explicitly passed. If
3412 // -inline-threshold is specified, then -inlinecold-threshold needs to be
3413 // explicitly specified to set the ColdThreshold knob
3414 if (InlineThreshold.getNumOccurrences() == 0) {
3418 } else if (ColdThreshold.getNumOccurrences() > 0) {
3420 }
3421 return Params;
3422}
3423
3427
3428// Compute the default threshold for inlining based on the opt level and the
3429// size opt level.
3430static int computeThresholdFromOptLevels(unsigned OptLevel,
3431 unsigned SizeOptLevel) {
3432 if (OptLevel > 2)
3434 if (SizeOptLevel == 1) // -Os
3436 if (SizeOptLevel == 2) // -Oz
3438 return DefaultThreshold;
3439}
3440
3441InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3442 auto Params =
3443 getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3444 // At O3, use the value of -locally-hot-callsite-threshold option to populate
3445 // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3446 // when it is specified explicitly.
3447 if (OptLevel > 2)
3449 return Params;
3450}
3451
3456 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3457 [&](Function &F) -> AssumptionCache & {
3458 return FAM.getResult<AssumptionAnalysis>(F);
3459 };
3460
3461 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
3462 ProfileSummaryInfo *PSI =
3463 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3464 const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
3465
3466 // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3467 // In the current implementation, the type of InlineParams doesn't matter as
3468 // the pass serves only for verification of inliner's decisions.
3469 // We can add a flag which determines InlineParams for this run. Right now,
3470 // the default InlineParams are used.
3471 const InlineParams Params = llvm::getInlineParams();
3472 for (BasicBlock &BB : F) {
3473 for (Instruction &I : BB) {
3474 if (auto *CB = dyn_cast<CallBase>(&I)) {
3475 Function *CalledFunction = CB->getCalledFunction();
3476 if (!CalledFunction || CalledFunction->isDeclaration())
3477 continue;
3478 OptimizationRemarkEmitter ORE(CalledFunction);
3479 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI,
3480 GetAssumptionCache, nullptr, nullptr, PSI,
3481 &ORE);
3482 ICCA.analyze();
3483 OS << " Analyzing call of " << CalledFunction->getName()
3484 << "... (caller:" << CB->getCaller()->getName() << ")\n";
3485 ICCA.print(OS);
3486 OS << "\n";
3487 }
3488 }
3489 }
3490 return PreservedAnalyses::all();
3491}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define DEBUG_TYPE
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:81
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
#define T
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1584
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1052
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
PointerType * getType() const
Overload to return most specific pointer type.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool empty() const
Definition BasicBlock.h:492
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:696
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getEntryFreq() const
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool onlyReadsMemory(unsigned OpNo) const
Value * getCalledOperand() const
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
@ ICMP_NE
not equal
Definition InstrTypes.h:698
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
LLVM_ABI bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition Constants.cpp:91
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
unsigned size() const
Definition DenseMap.h:110
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
A cache of ephemeral values within a function.
Type * getReturnType() const
const BasicBlock & getEntryBlock() const
Definition Function.h:809
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:329
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const
Definition InlineAsm.cpp:63
Represents the cost of inlining a function.
Definition InlineCost.h:91
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:132
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:127
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition InlineCost.h:121
InlineResult is basically true or false.
Definition InlineCost.h:181
static InlineResult success()
Definition InlineCost.h:186
static InlineResult failure(const char *Reason)
Definition InlineCost.h:187
bool isSuccess() const
Definition InlineCost.h:190
const char * getFailureReason() const
Definition InlineCost.h:191
Base class for instruction visitors.
Definition InstVisitor.h:78
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:426
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition StringRef.h:293
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
Analysis pass providing the TargetTransformInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
LLVM_ABI unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
LLVM_ABI unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI int getInliningLastCallToStaticBonus() const
LLVM_ABI unsigned adjustInliningThreshold(const CallBase *CB) const
LLVM_ABI unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
LLVM_ABI int getInlinerVectorBonusPercent() const
LLVM_ABI bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
LLVM_ABI unsigned getInliningThresholdMultiplier() const
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
LLVM_ABI unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
LLVM_ABI InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
static constexpr TypeSize getZero()
Definition TypeSize.h:349
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
bool erase(const ValueT &V)
Definition DenseSet.h:100
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
const int ColdccPenalty
Definition InlineCost.h:52
const char FunctionInlineCostMultiplierAttributeName[]
Definition InlineCost.h:60
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition InlineCost.h:40
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition InlineCost.h:43
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition InlineCost.h:58
const int IndirectCallThreshold
Definition InlineCost.h:50
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition InlineCost.h:46
const char MaxInlineStackSizeAttributeName[]
Definition InlineCost.h:63
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition InlineCost.h:55
LLVM_ABI int getInstrCost()
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
InstructionCost Cost
@ Dead
Unused definition.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
generic_gep_type_iterator<> gep_type_iterator
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition MathExtras.h:684
Function::ProfileCount ProfileCount
LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
TargetTransformInfo TTI
LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
DWARFExpression::Operation Op
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition MathExtras.h:609
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition InlineCost.h:221
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition InlineCost.h:218
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition InlineCost.h:231
std::optional< int > ColdThreshold
Threshold to use for cold callees.
Definition InlineCost.h:215
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition InlineCost.h:228