31#include "llvm/Config/llvm-config.h"
52#define DEBUG_TYPE "inline-cost"
54STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
58 cl::desc(
"Default amount of inlining to perform"));
67 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
68 "during inline cost calculation"));
72 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
76 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
80 cl::desc(
"Threshold for inlining functions with inline hint"));
85 cl::desc(
"Threshold for inlining cold callsites"));
89 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
96 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
103 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
104 "savings won't justify the cost"));
108 cl::desc(
"The maximum size of a callee that get's "
109 "inlined without sufficient cycle savings"));
116 cl::desc(
"Threshold for inlining functions with cold attribute"));
120 cl::desc(
"Threshold for hot callsites "));
124 cl::desc(
"Threshold for locally hot callsites "));
128 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
129 "entry frequency, for a callsite to be cold in the absence of "
130 "profile information."));
134 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
135 "entry frequency, for a callsite to be hot in the absence of "
136 "profile information."));
140 cl::desc(
"Cost of a single instruction when inlining"));
144 cl::desc(
"Cost of load/store instruction when inlining"));
148 cl::desc(
"Call penalty that is applied per callsite when inlining"));
152 cl::init(std::numeric_limits<size_t>::max()),
153 cl::desc(
"Do not inline functions with a stack size "
154 "that exceeds the specified limit"));
159 cl::desc(
"Do not inline recursive functions with a stack "
160 "size that exceeds the specified limit"));
164 cl::desc(
"Compute the full inline cost of a call site even when the cost "
165 "exceeds the threshold."));
169 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
174 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
194namespace InlineConstants {
202class InlineCostCallAnalyzer;
206struct InstructionCostDetail {
209 int ThresholdBefore = 0;
210 int ThresholdAfter = 0;
212 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
214 int getCostDelta()
const {
return CostAfter - CostBefore; }
216 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
221 InlineCostCallAnalyzer *
const ICCA;
224 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
237class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
242 virtual ~CallAnalyzer() =
default;
271 virtual void onBlockStart(
const BasicBlock *BB) {}
274 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
277 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
280 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
290 virtual bool shouldStop() {
return false; }
299 virtual void onDisableSROA(
AllocaInst *Arg) {}
302 virtual void onDisableLoadElimination() {}
306 virtual bool onCallBaseVisitStart(
CallBase &Call) {
return true; }
309 virtual void onCallPenalty() {}
312 virtual void onMemAccess(){};
316 virtual void onLoadEliminationOpportunity() {}
320 virtual void onCallArgumentSetup(
const CallBase &Call) {}
323 virtual void onLoadRelativeIntrinsic() {}
331 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
335 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
339 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
340 bool DefaultDestUndefined) {}
344 virtual void onMissedSimplification() {}
347 virtual void onInitializeSROAArg(
AllocaInst *Arg) {}
350 virtual void onAggregateSROAUse(
AllocaInst *V) {}
352 bool handleSROA(
Value *V,
bool DoNotDisable) {
354 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
356 onAggregateSROAUse(SROAArg);
359 disableSROAForArg(SROAArg);
364 bool IsCallerRecursive =
false;
365 bool IsRecursiveCall =
false;
366 bool ExposesReturnsTwice =
false;
367 bool HasDynamicAlloca =
false;
368 bool ContainsNoDuplicateCall =
false;
369 bool HasReturn =
false;
370 bool HasIndirectBr =
false;
371 bool HasUninlineableIntrinsic =
false;
372 bool InitsVargArgs =
false;
376 unsigned NumInstructions = 0;
377 unsigned NumVectorInstructions = 0;
408 bool EnableLoadElimination =
true;
411 bool AllowRecursiveCall =
false;
416 auto It = SROAArgValues.
find(V);
417 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
423 bool isAllocaDerivedArg(
Value *V);
425 void disableSROA(
Value *V);
427 void disableLoadElimination();
433 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
434 bool simplifyIntrinsicCallObjectSize(
CallBase &CB);
446 bool isKnownNonNullInCallee(
Value *V);
449 bool allowSizeGrowth(
CallBase &Call);
500 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
502 CandidateCall(
Call) {}
506 std::optional<Constant *> getSimplifiedValue(
Instruction *
I) {
508 return SimplifiedValues[
I];
514 unsigned NumConstantArgs = 0;
515 unsigned NumConstantOffsetPtrArgs = 0;
516 unsigned NumAllocaArgs = 0;
517 unsigned NumConstantPtrCmps = 0;
518 unsigned NumConstantPtrDiffs = 0;
519 unsigned NumInstructionsSimplified = 0;
539int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
540 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
545class InlineCostCallAnalyzer final :
public CallAnalyzer {
546 const bool ComputeFullInlineCost;
547 int LoadEliminationCost = 0;
552 int SingleBBBonus = 0;
567 int StaticBonusApplied = 0;
570 const bool BoostIndirectCalls;
573 const bool IgnoreThreshold;
576 const bool CostBenefitAnalysisEnabled;
587 int CostAtBBStart = 0;
594 bool DecidedByCostThreshold =
false;
597 bool DecidedByCostBenefit =
false;
602 bool SingleBB =
true;
604 unsigned SROACostSavings = 0;
605 unsigned SROACostSavingsLost = 0;
621 std::optional<int> getHotCallSiteThreshold(
CallBase &Call,
625 void addCost(int64_t Inc) {
626 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
627 Cost = std::clamp<int64_t>(Inc +
Cost, INT_MIN, INT_MAX);
630 void onDisableSROA(
AllocaInst *Arg)
override {
631 auto CostIt = SROAArgCosts.
find(Arg);
632 if (CostIt == SROAArgCosts.
end())
634 addCost(CostIt->second);
635 SROACostSavings -= CostIt->second;
636 SROACostSavingsLost += CostIt->second;
637 SROAArgCosts.
erase(CostIt);
640 void onDisableLoadElimination()
override {
641 addCost(LoadEliminationCost);
642 LoadEliminationCost = 0;
645 bool onCallBaseVisitStart(
CallBase &Call)
override {
646 if (std::optional<int> AttrCallThresholdBonus =
648 Threshold += *AttrCallThresholdBonus;
650 if (std::optional<int> AttrCallCost =
652 addCost(*AttrCallCost);
660 void onCallPenalty()
override { addCost(
CallPenalty); }
664 void onCallArgumentSetup(
const CallBase &Call)
override {
669 void onLoadRelativeIntrinsic()
override {
674 bool IsIndirectCall)
override {
683 if (IsIndirectCall && BoostIndirectCalls) {
684 auto IndirectCallParams = Params;
689 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
690 GetAssumptionCache, GetBFI, PSI, ORE,
false);
691 if (CA.analyze().isSuccess()) {
694 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
702 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
703 bool DefaultDestUndefined)
override {
704 if (!DefaultDestUndefined)
716 if (NumCaseCluster <= 3) {
722 int64_t ExpectedNumberOfCompare =
723 getExpectedNumberOfCompare(NumCaseCluster);
724 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
728 void onMissedSimplification()
override { addCost(
InstrCost); }
730 void onInitializeSROAArg(
AllocaInst *Arg)
override {
732 "Should not initialize SROA costs for null value.");
734 SROACostSavings += SROAArgCost;
735 SROAArgCosts[Arg] = SROAArgCost;
738 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
739 auto CostIt = SROAArgCosts.
find(SROAArg);
741 "expected this argument to have a cost");
746 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart =
Cost; }
748 void onBlockAnalyzed(
const BasicBlock *BB)
override {
749 if (CostBenefitAnalysisEnabled) {
752 assert(GetBFI &&
"GetBFI must be available");
754 assert(BFI &&
"BFI must be available");
757 ColdSize +=
Cost - CostAtBBStart;
765 if (SingleBB && TI->getNumSuccessors() > 1) {
767 Threshold -= SingleBBBonus;
772 void onInstructionAnalysisStart(
const Instruction *
I)
override {
777 InstructionCostDetailMap[
I].CostBefore =
Cost;
778 InstructionCostDetailMap[
I].ThresholdBefore = Threshold;
781 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
786 InstructionCostDetailMap[
I].CostAfter =
Cost;
787 InstructionCostDetailMap[
I].ThresholdAfter = Threshold;
790 bool isCostBenefitAnalysisEnabled() {
791 if (!PSI || !PSI->hasProfileSummary())
803 if (!(PSI->hasInstrumentationProfile() || PSI->hasSampleProfile()))
808 if (!
Caller->getEntryCount())
816 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
820 auto EntryCount =
F.getEntryCount();
821 if (!EntryCount || !EntryCount->getCount())
832 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
839 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
845 void OverrideCycleSavingsAndSizeForTesting(
APInt &CycleSavings,
int &
Size) {
847 CandidateCall,
"inline-cycle-savings-for-test")) {
848 CycleSavings = *AttrCycleSavings;
852 CandidateCall,
"inline-runtime-cost-for-test")) {
853 Size = *AttrRuntimeCost;
860 std::optional<bool> costBenefitAnalysis() {
861 if (!CostBenefitAnalysisEnabled)
885 APInt CycleSavings(128, 0);
888 APInt CurrentSavings(128, 0);
892 if (BI->isConditional() &&
893 isa_and_nonnull<ConstantInt>(
894 SimplifiedValues.
lookup(BI->getCondition()))) {
897 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(&
I)) {
898 if (isa_and_present<ConstantInt>(SimplifiedValues.
lookup(
SI->getCondition())))
900 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
902 if (SimplifiedValues.
count(V)) {
910 CycleSavings += CurrentSavings;
914 auto EntryProfileCount =
F.getEntryCount();
915 assert(EntryProfileCount && EntryProfileCount->getCount());
916 auto EntryCount = EntryProfileCount->getCount();
917 CycleSavings += EntryCount / 2;
918 CycleSavings = CycleSavings.
udiv(EntryCount);
921 auto *CallerBB = CandidateCall.
getParent();
935 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
959 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
962 APInt UpperBoundCycleSavings = CycleSavings;
963 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
964 if (UpperBoundCycleSavings.
uge(Threshold))
967 APInt LowerBoundCycleSavings = CycleSavings;
968 LowerBoundCycleSavings *=
969 getInliningCostBenefitAnalysisProfitableMultiplier();
970 if (LowerBoundCycleSavings.
ult(Threshold))
984 if (
Caller->hasMinSize()) {
990 if (DeadBlocks.
count(
L->getHeader()))
1000 if (NumVectorInstructions <= NumInstructions / 10)
1001 Threshold -= VectorBonus;
1002 else if (NumVectorInstructions <= NumInstructions / 2)
1003 Threshold -= VectorBonus / 2;
1005 if (std::optional<int> AttrCost =
1012 Cost *= *AttrCostMult;
1014 if (std::optional<int> AttrThreshold =
1016 Threshold = *AttrThreshold;
1018 if (
auto Result = costBenefitAnalysis()) {
1019 DecidedByCostBenefit =
true;
1026 if (IgnoreThreshold)
1029 DecidedByCostThreshold =
true;
1030 return Cost < std::max(1, Threshold)
1035 bool shouldStop()
override {
1036 if (IgnoreThreshold || ComputeFullInlineCost)
1040 if (
Cost < Threshold)
1042 DecidedByCostThreshold =
true;
1046 void onLoadEliminationOpportunity()
override {
1061 assert(NumInstructions == 0);
1062 assert(NumVectorInstructions == 0);
1065 updateThreshold(CandidateCall,
F);
1071 assert(SingleBBBonus >= 0);
1072 assert(VectorBonus >= 0);
1077 Threshold += (SingleBBBonus + VectorBonus);
1091 if (
Cost >= Threshold && !ComputeFullInlineCost)
1098 InlineCostCallAnalyzer(
1105 bool IgnoreThreshold =
false)
1106 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1108 Params.ComputeFullInlineCost || ORE ||
1109 isCostBenefitAnalysisEnabled()),
1111 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1112 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1118 InlineCostAnnotationWriter Writer;
1126 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1127 if (InstructionCostDetailMap.
contains(
I))
1128 return InstructionCostDetailMap[
I];
1129 return std::nullopt;
1132 virtual ~InlineCostCallAnalyzer() =
default;
1133 int getThreshold()
const {
return Threshold; }
1134 int getCost()
const {
return Cost; }
1135 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1136 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1137 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1138 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1142static bool isSoleCallToLocalFunction(
const CallBase &CB,
1144 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1148class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1155 static constexpr int JTCostMultiplier = 4;
1156 static constexpr int CaseClusterCostMultiplier = 2;
1157 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1158 static constexpr int SwitchCostMultiplier = 2;
1162 unsigned SROACostSavingOpportunities = 0;
1163 int VectorBonus = 0;
1164 int SingleBBBonus = 0;
1170 Cost[
static_cast<size_t>(Feature)] += Delta;
1174 Cost[
static_cast<size_t>(Feature)] =
Value;
1177 void onDisableSROA(
AllocaInst *Arg)
override {
1178 auto CostIt = SROACosts.
find(Arg);
1179 if (CostIt == SROACosts.
end())
1182 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1183 SROACostSavingOpportunities -= CostIt->second;
1184 SROACosts.
erase(CostIt);
1187 void onDisableLoadElimination()
override {
1188 set(InlineCostFeatureIndex::load_elimination, 1);
1191 void onCallPenalty()
override {
1192 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1195 void onCallArgumentSetup(
const CallBase &Call)
override {
1196 increment(InlineCostFeatureIndex::call_argument_setup,
1200 void onLoadRelativeIntrinsic()
override {
1201 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1205 bool IsIndirectCall)
override {
1206 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1209 if (IsIndirectCall) {
1223 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1224 GetAssumptionCache, GetBFI, PSI, ORE,
false,
1226 if (CA.analyze().isSuccess()) {
1227 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1229 increment(InlineCostFeatureIndex::nested_inlines, 1);
1236 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
1237 bool DefaultDestUndefined)
override {
1238 if (!DefaultDestUndefined)
1239 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1240 SwitchDefaultDestCostMultiplier *
InstrCost);
1242 if (JumpTableSize) {
1243 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1245 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1249 if (NumCaseCluster <= 3) {
1250 increment(InlineCostFeatureIndex::case_cluster_penalty,
1251 NumCaseCluster * CaseClusterCostMultiplier *
InstrCost);
1255 int64_t ExpectedNumberOfCompare =
1256 getExpectedNumberOfCompare(NumCaseCluster);
1258 int64_t SwitchCost =
1259 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1260 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1263 void onMissedSimplification()
override {
1264 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1268 void onInitializeSROAArg(
AllocaInst *Arg)
override {
1270 SROACosts[Arg] = SROAArgCost;
1271 SROACostSavingOpportunities += SROAArgCost;
1274 void onAggregateSROAUse(
AllocaInst *Arg)
override {
1276 SROACostSavingOpportunities +=
InstrCost;
1279 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1281 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1282 Threshold -= SingleBBBonus;
1287 if (
Caller->hasMinSize()) {
1290 for (
Loop *L : LI) {
1292 if (DeadBlocks.
count(
L->getHeader()))
1294 increment(InlineCostFeatureIndex::num_loops,
1298 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1299 set(InlineCostFeatureIndex::simplified_instructions,
1300 NumInstructionsSimplified);
1301 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1302 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1303 NumConstantOffsetPtrArgs);
1304 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1306 if (NumVectorInstructions <= NumInstructions / 10)
1307 Threshold -= VectorBonus;
1308 else if (NumVectorInstructions <= NumInstructions / 2)
1309 Threshold -= VectorBonus / 2;
1311 set(InlineCostFeatureIndex::threshold, Threshold);
1316 bool shouldStop()
override {
return false; }
1318 void onLoadEliminationOpportunity()
override {
1319 increment(InlineCostFeatureIndex::load_elimination, 1);
1323 increment(InlineCostFeatureIndex::callsite_cost,
1326 set(InlineCostFeatureIndex::cold_cc_penalty,
1329 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1330 isSoleCallToLocalFunction(CandidateCall,
F));
1335 int SingleBBBonusPercent = 50;
1339 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1340 VectorBonus = Threshold * VectorBonusPercent / 100;
1341 Threshold += (SingleBBBonus + VectorBonus);
1347 InlineCostFeaturesAnalyzer(
1353 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI) {}
1361bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1362 return SROAArgValues.
count(V);
1365void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1366 onDisableSROA(SROAArg);
1367 EnabledSROAAllocas.
erase(SROAArg);
1368 disableLoadElimination();
1371void InlineCostAnnotationWriter::emitInstructionAnnot(
1376 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1378 OS <<
"; No analysis for the instruction";
1380 OS <<
"; cost before = " <<
Record->CostBefore
1381 <<
", cost after = " <<
Record->CostAfter
1382 <<
", threshold before = " <<
Record->ThresholdBefore
1383 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1384 OS <<
"cost delta = " <<
Record->getCostDelta();
1385 if (
Record->hasThresholdChanged())
1386 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1388 auto C = ICCA->getSimplifiedValue(
const_cast<Instruction *
>(
I));
1390 OS <<
", simplified to ";
1391 (*C)->print(
OS,
true);
1397void CallAnalyzer::disableSROA(
Value *V) {
1398 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1399 disableSROAForArg(SROAArg);
1403void CallAnalyzer::disableLoadElimination() {
1404 if (EnableLoadElimination) {
1405 onDisableLoadElimination();
1406 EnableLoadElimination =
false;
1415 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1419 GTI != GTE; ++GTI) {
1420 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1422 if (
Constant *SimpleOp = SimplifiedValues.
lookup(GTI.getOperand()))
1423 OpC = dyn_cast<ConstantInt>(SimpleOp);
1430 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1449 for (
const Use &
Op :
GEP.indices())
1460 disableSROA(
I.getOperand(0));
1464 if (
I.isArrayAllocation()) {
1466 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(
Size)) {
1475 Type *Ty =
I.getAllocatedType();
1477 AllocSize->getLimitedValue(),
1478 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1480 HasDynamicAlloca =
true;
1486 if (
I.isStaticAlloca()) {
1487 Type *Ty =
I.getAllocatedType();
1488 AllocatedSize =
SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinValue(),
1496 if (!
I.isStaticAlloca())
1497 HasDynamicAlloca =
true;
1502bool CallAnalyzer::visitPHI(
PHINode &
I) {
1514 bool CheckSROA =
I.getType()->isPointerTy();
1518 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1519 Value *FirstV =
nullptr;
1521 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1524 if (DeadBlocks.
count(Pred))
1528 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1529 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1532 Value *
V =
I.getIncomingValue(i);
1539 C = SimplifiedValues.
lookup(V);
1541 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1542 if (!
C && CheckSROA)
1543 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1545 if (!
C && !BaseAndOffset.first)
1562 if (FirstBaseAndOffset == BaseAndOffset)
1576 FirstBaseAndOffset = BaseAndOffset;
1581 SimplifiedValues[&
I] = FirstC;
1586 if (FirstBaseAndOffset.first) {
1587 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1589 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1590 SROAArgValues[&
I] = SROAArg;
1602 std::pair<Value *, APInt> BaseAndOffset =
1603 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1604 if (!BaseAndOffset.first)
1609 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1613 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1619 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1623 for (
const Use &
Op :
GEP.indices())
1624 if (!isa<Constant>(
Op) && !SimplifiedValues.
lookup(
Op))
1633 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1635 SROAArgValues[&
I] = SROAArg;
1643 disableSROAForArg(SROAArg);
1644 return isGEPFree(
I);
1648bool CallAnalyzer::simplifyInstruction(
Instruction &
I) {
1661 SimplifiedValues[&
I] =
C;
1674bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1676 auto *
C = dyn_cast<Constant>(Arg);
1679 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(Arg));
1682 SimplifiedValues[&CB] = ConstantInt::get(RT,
C ? 1 : 0);
1686bool CallAnalyzer::simplifyIntrinsicCallObjectSize(
CallBase &CB) {
1694 Constant *
C = dyn_cast_or_null<Constant>(V);
1696 SimplifiedValues[&CB] =
C;
1706 std::pair<Value *, APInt> BaseAndOffset =
1707 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1709 if (BaseAndOffset.first)
1710 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1713 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1714 SROAArgValues[&
I] = SROAArg;
1727 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1728 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1729 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1730 std::pair<Value *, APInt> BaseAndOffset =
1731 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1732 if (BaseAndOffset.first)
1733 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1743 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1744 SROAArgValues[&
I] = SROAArg;
1758 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1759 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1760 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1761 if (BaseAndOffset.first)
1762 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1766 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1767 SROAArgValues[&
I] = SROAArg;
1773bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1780 disableSROA(
I.getOperand(0));
1785 switch (
I.getOpcode()) {
1786 case Instruction::FPTrunc:
1787 case Instruction::FPExt:
1788 case Instruction::UIToFP:
1789 case Instruction::SIToFP:
1790 case Instruction::FPToUI:
1791 case Instruction::FPToSI:
1807bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1813 if (
Argument *
A = dyn_cast<Argument>(V))
1814 if (paramHasAttr(
A, Attribute::NonNull))
1820 if (isAllocaDerivedArg(V))
1829bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
1845 if (
InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1846 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1848 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
1854bool InlineCostCallAnalyzer::isColdCallSite(
CallBase &Call,
1858 if (PSI && PSI->hasProfileSummary())
1859 return PSI->isColdCallSite(Call, CallerBFI);
1870 auto CallSiteBB =
Call.getParent();
1871 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
1872 auto CallerEntryFreq =
1874 return CallSiteFreq < CallerEntryFreq * ColdProb;
1878InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
1883 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1889 return std::nullopt;
1899 if (Limit && CallSiteFreq >= *Limit)
1903 return std::nullopt;
1906void InlineCostCallAnalyzer::updateThreshold(
CallBase &Call,
Function &Callee) {
1908 if (!allowSizeGrowth(Call)) {
1916 auto MinIfValid = [](
int A, std::optional<int>
B) {
1917 return B ? std::min(
A, *
B) :
A;
1921 auto MaxIfValid = [](
int A, std::optional<int>
B) {
1922 return B ? std::max(
A, *
B) :
A;
1937 int SingleBBBonusPercent = 50;
1942 auto DisallowAllBonuses = [&]() {
1943 SingleBBBonusPercent = 0;
1944 VectorBonusPercent = 0;
1950 if (
Caller->hasMinSize()) {
1956 SingleBBBonusPercent = 0;
1957 VectorBonusPercent = 0;
1958 }
else if (
Caller->hasOptSize())
1963 if (!
Caller->hasMinSize()) {
1964 if (
Callee.hasFnAttribute(Attribute::InlineHint))
1989 DisallowAllBonuses();
1994 if (PSI->isFunctionEntryHot(&Callee)) {
2000 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2006 DisallowAllBonuses();
2018 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2019 VectorBonus = Threshold * VectorBonusPercent / 100;
2024 if (isSoleCallToLocalFunction(Call,
F)) {
2030bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
2036 if (
I.getOpcode() == Instruction::FCmp)
2041 Value *LHSBase, *RHSBase;
2042 APInt LHSOffset, RHSOffset;
2043 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2045 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2046 if (RHSBase && LHSBase == RHSBase) {
2052 SimplifiedValues[&
I] =
C;
2053 ++NumConstantPtrCmps;
2059 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2060 for (
auto *
User :
I.users())
2061 if (
auto *Instr = dyn_cast<Instruction>(
User))
2062 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2069 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1))) {
2070 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2078 if (isImplicitNullCheckCmp(
I))
2081 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
2088 Value *LHSBase, *RHSBase;
2089 APInt LHSOffset, RHSOffset;
2090 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2092 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2093 if (RHSBase && LHSBase == RHSBase) {
2099 SimplifiedValues[&
I] =
C;
2100 ++NumConstantPtrDiffs;
2108 return Base::visitSub(
I);
2113 Constant *CLHS = dyn_cast<Constant>(LHS);
2115 CLHS = SimplifiedValues.
lookup(LHS);
2116 Constant *CRHS = dyn_cast<Constant>(RHS);
2118 CRHS = SimplifiedValues.
lookup(RHS);
2120 Value *SimpleV =
nullptr;
2121 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
2122 SimpleV =
simplifyBinOp(
I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2123 FI->getFastMathFlags(),
DL);
2128 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2129 SimplifiedValues[&
I] =
C;
2142 if (
I.getType()->isFloatingPointTy() &&
2159 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2160 SimplifiedValues[&
I] =
C;
2171bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2172 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2178 if (EnableLoadElimination &&
2179 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2180 onLoadEliminationOpportunity();
2189 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2200 disableLoadElimination();
2212 return Base::visitExtractValue(
I);
2221 return Base::visitInsertValue(
I);
2244 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
I));
2251 SimplifiedValues[&
Call] =
C;
2258bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2259 if (!onCallBaseVisitStart(Call))
2262 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2263 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2265 ExposesReturnsTwice =
true;
2268 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2269 ContainsNoDuplicateCall =
true;
2272 bool IsIndirectCall = !
F;
2273 if (IsIndirectCall) {
2277 F = dyn_cast_or_null<Function>(SimplifiedValues.
lookup(Callee));
2278 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2279 onCallArgumentSetup(Call);
2281 if (!
Call.onlyReadsMemory())
2282 disableLoadElimination();
2283 return Base::visitCallBase(Call);
2287 assert(
F &&
"Expected a call to a known function");
2290 if (simplifyCallSite(
F, Call))
2296 switch (II->getIntrinsicID()) {
2299 disableLoadElimination();
2300 return Base::visitCallBase(Call);
2302 case Intrinsic::load_relative:
2303 onLoadRelativeIntrinsic();
2306 case Intrinsic::memset:
2307 case Intrinsic::memcpy:
2308 case Intrinsic::memmove:
2309 disableLoadElimination();
2312 case Intrinsic::icall_branch_funnel:
2313 case Intrinsic::localescape:
2314 HasUninlineableIntrinsic =
true;
2316 case Intrinsic::vastart:
2317 InitsVargArgs =
true;
2319 case Intrinsic::launder_invariant_group:
2320 case Intrinsic::strip_invariant_group:
2321 if (
auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2322 SROAArgValues[II] = SROAArg;
2324 case Intrinsic::is_constant:
2325 return simplifyIntrinsicCallIsConstant(Call);
2326 case Intrinsic::objectsize:
2327 return simplifyIntrinsicCallObjectSize(Call);
2331 if (
F ==
Call.getFunction()) {
2334 IsRecursiveCall =
true;
2335 if (!AllowRecursiveCall)
2340 onLoweredCall(
F, Call, IsIndirectCall);
2343 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2344 disableLoadElimination();
2345 return Base::visitCallBase(Call);
2348bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2350 bool Free = !HasReturn;
2355bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2362 isa_and_nonnull<ConstantInt>(
2366bool CallAnalyzer::visitSelectInst(
SelectInst &SI) {
2367 bool CheckSROA =
SI.getType()->isPointerTy();
2371 Constant *TrueC = dyn_cast<Constant>(TrueVal);
2373 TrueC = SimplifiedValues.
lookup(TrueVal);
2374 Constant *FalseC = dyn_cast<Constant>(FalseVal);
2376 FalseC = SimplifiedValues.
lookup(FalseVal);
2378 dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
SI.getCondition()));
2382 if (TrueC == FalseC && TrueC) {
2383 SimplifiedValues[&
SI] = TrueC;
2388 return Base::visitSelectInst(SI);
2390 std::pair<Value *, APInt> TrueBaseAndOffset =
2391 ConstantOffsetPtrs.
lookup(TrueVal);
2392 std::pair<Value *, APInt> FalseBaseAndOffset =
2393 ConstantOffsetPtrs.
lookup(FalseVal);
2394 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2395 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2397 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2398 SROAArgValues[&
SI] = SROAArg;
2402 return Base::visitSelectInst(SI);
2413 if (TrueC && FalseC) {
2415 SimplifiedValues[&
SI] =
C;
2419 return Base::visitSelectInst(SI);
2423 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2424 SimplifiedValues[&
SI] = SelectedC;
2431 std::pair<Value *, APInt> BaseAndOffset =
2432 ConstantOffsetPtrs.
lookup(SelectedV);
2433 if (BaseAndOffset.first) {
2434 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2436 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2437 SROAArgValues[&
SI] = SROAArg;
2443bool CallAnalyzer::visitSwitchInst(
SwitchInst &SI) {
2446 if (isa<ConstantInt>(
SI.getCondition()))
2449 if (isa<ConstantInt>(V))
2464 unsigned JumpTableSize = 0;
2466 unsigned NumCaseCluster =
2469 onFinalizeSwitch(JumpTableSize, NumCaseCluster,
SI.defaultDestUndefined());
2482 HasIndirectBr =
true;
2486bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2520 for (
const Use &
Op :
I.operands())
2545 if (
I.isDebugOrPseudoInst())
2553 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2554 ++NumVectorInstructions;
2561 onInstructionAnalysisStart(&
I);
2563 if (Base::visit(&
I))
2564 ++NumInstructionsSimplified;
2566 onMissedSimplification();
2568 onInstructionAnalysisFinish(&
I);
2569 using namespace ore;
2572 if (IsRecursiveCall && !AllowRecursiveCall)
2574 else if (ExposesReturnsTwice)
2576 else if (HasDynamicAlloca)
2578 else if (HasIndirectBr)
2580 else if (HasUninlineableIntrinsic)
2582 else if (InitsVargArgs)
2584 if (!
IR.isSuccess()) {
2589 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2590 <<
NV(
"InlineResult",
IR.getFailureReason())
2591 <<
") and cost is not fully computed";
2606 <<
NV(
"Callee", &
F) <<
" is "
2607 <<
NV(
"InlineResult",
IR.getFailureReason())
2608 <<
". Cost is not fully computed";
2615 "Call site analysis is not favorable to inlining.");
2627ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2628 if (!
V->getType()->isPointerTy())
2631 unsigned AS =
V->getType()->getPointerAddressSpace();
2632 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2641 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2643 V =
GEP->getPointerOperand();
2645 V = cast<Operator>(V)->getOperand(0);
2646 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2647 if (GA->isInterposable())
2649 V = GA->getAliasee();
2653 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2654 }
while (Visited.
insert(V).second);
2656 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2657 return cast<ConstantInt>(ConstantInt::get(IdxPtrTy,
Offset));
2671 return (DeadBlocks.
count(Pred) ||
2672 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2677 return (!DeadBlocks.
count(BB) &&
2683 if (Succ == NextBB || !IsNewlyDead(Succ))
2687 while (!NewDead.
empty()) {
2689 if (DeadBlocks.
insert(Dead).second)
2708 auto Result = onAnalysisStart();
2719 if (Call &&
Call->getFunction() == Caller) {
2720 IsCallerRecursive =
true;
2730 if (
Constant *
C = dyn_cast<Constant>(CAI))
2731 SimplifiedValues[&FAI] =
C;
2733 Value *PtrArg = *CAI;
2734 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2735 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2738 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2739 SROAArgValues[&FAI] = SROAArg;
2740 onInitializeSROAArg(SROAArg);
2741 EnabledSROAAllocas.
insert(SROAArg);
2746 NumConstantArgs = SimplifiedValues.
size();
2747 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2748 NumAllocaArgs = SROAArgValues.
size();
2765 BBWorklist.
insert(&
F.getEntryBlock());
2788 if (!isa<CallBrInst>(*U))
2794 if (!
IR.isSuccess())
2801 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2805 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2807 BBWorklist.
insert(NextBB);
2808 KnownSuccessors[BB] = NextBB;
2809 findDeadBlocks(BB, NextBB);
2813 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2816 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2817 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2818 BBWorklist.
insert(NextBB);
2819 KnownSuccessors[BB] = NextBB;
2820 findDeadBlocks(BB, NextBB);
2830 onBlockAnalyzed(BB);
2836 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
2846 FinalStackSizeThreshold = *AttrMaxStackSize;
2847 if (AllocatedSize > FinalStackSizeThreshold)
2850 return finalizeAnalysis();
2854#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2856 F.print(
OS, &Writer);
2870#undef DEBUG_PRINT_STAT
2873#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2887 auto CalleeTLI = GetTLI(*Callee);
2890 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2898 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I) {
2899 if (Call.isByValArgument(
I)) {
2902 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2903 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
2905 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
2907 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
2915 NumStores = std::min(NumStores, 8U);
2928 return std::min<int64_t>(
Cost, INT_MAX);
2937 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2938 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2957 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2958 GetAssumptionCache, GetBFI, PSI, ORE,
true,
2960 auto R = CA.analyze();
2962 return std::nullopt;
2963 return CA.getCost();
2971 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2972 ORE, *Call.getCalledFunction(), Call);
2973 auto R = CFA.analyze();
2975 return std::nullopt;
2976 return CFA.features();
2991 if (Callee->isPresplitCoroutine())
2999 unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
3000 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I)
3001 if (Call.isByValArgument(
I)) {
3002 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
3010 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3011 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3015 if (IsViable.isSuccess())
3022 Function *Caller = Call.getCaller();
3027 if (Caller->hasOptNone())
3032 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3036 if (Callee->isInterposable())
3040 if (Callee->hasFnAttribute(Attribute::NoInline))
3044 if (Call.isNoInline())
3047 return std::nullopt;
3062 if (UserDecision->isSuccess())
3068 <<
"... (caller:" << Call.getCaller()->getName()
3071 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3072 GetAssumptionCache, GetBFI, PSI, ORE);
3080 if (CA.wasDecidedByCostBenefit()) {
3083 CA.getCostBenefitPair());
3088 if (CA.wasDecidedByCostThreshold())
3090 CA.getStaticBonusApplied());
3099 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3109 if (!isa<CallBrInst>(*U))
3112 for (
auto &II : BB) {
3113 CallBase *Call = dyn_cast<CallBase>(&II);
3118 Function *Callee = Call->getCalledFunction();
3124 if (!ReturnsTwice && isa<CallInst>(Call) &&
3125 cast<CallInst>(Call)->canReturnTwice())
3129 switch (Callee->getIntrinsicID()) {
3132 case llvm::Intrinsic::icall_branch_funnel:
3136 "disallowed inlining of @llvm.icall.branch.funnel");
3137 case llvm::Intrinsic::localescape:
3141 "disallowed inlining of @llvm.localescape");
3142 case llvm::Intrinsic::vastart:
3146 "contains VarArgs initialized with va_start");
3217 unsigned SizeOptLevel) {
3220 if (SizeOptLevel == 1)
3222 if (SizeOptLevel == 2)
3258 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
3259 Function *CalledFunction = CI->getCalledFunction();
3263 InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params,
TTI,
3264 GetAssumptionCache,
nullptr, &PSI, &ORE);
3266 OS <<
" Analyzing call of " << CalledFunction->
getName()
3267 <<
"... (caller:" << CI->getCaller()->getName() <<
")\n";
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
SetVector< BasicBlock * > BBSetVector
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
mir Rename Register Operands
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &)
emitInstructionAnnot - This may be implemented to emit a string right before an instruction is emitte...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
StringRef getValueAsString() const
Return the attribute's value as a string.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
BlockFrequency getEntryFreq() const
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Type * getReturnType() const
Class to represent profile counts.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Indirect Branch Instruction.
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
RetTy visitIndirectBrInst(IndirectBrInst &I)
RetTy visitCmpInst(CmpInst &I)
RetTy visitCallBase(CallBase &I)
RetTy visitCleanupReturnInst(CleanupReturnInst &I)
RetTy visitUnreachableInst(UnreachableInst &I)
RetTy visitSwitchInst(SwitchInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitReturnInst(ReturnInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitResumeInst(ResumeInst &I)
RetTy visitCatchReturnInst(CatchReturnInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitBranchInst(BranchInst &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This class represents a cast from an integer to a pointer.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis providing profile information.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ C
The default llvm calling convention, compatible with C.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const int LastCallToStaticBonus
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Function::ProfileCount ProfileCount
std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
gep_type_iterator gep_type_end(const User *GEP)
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.