31#include "llvm/Config/llvm-config.h"
52#define DEBUG_TYPE "inline-cost"
54STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
58 cl::desc(
"Default amount of inlining to perform"));
67 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
68 "during inline cost calculation"));
72 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
76 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
80 cl::desc(
"Threshold for inlining functions with inline hint"));
85 cl::desc(
"Threshold for inlining cold callsites"));
89 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
96 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
103 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
104 "savings won't justify the cost"));
108 cl::desc(
"The maximum size of a callee that get's "
109 "inlined without sufficient cycle savings"));
116 cl::desc(
"Threshold for inlining functions with cold attribute"));
120 cl::desc(
"Threshold for hot callsites "));
124 cl::desc(
"Threshold for locally hot callsites "));
128 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
129 "entry frequency, for a callsite to be cold in the absence of "
130 "profile information."));
134 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
135 "entry frequency, for a callsite to be hot in the absence of "
136 "profile information."));
140 cl::desc(
"Cost of a single instruction when inlining"));
144 cl::desc(
"Cost of load/store instruction when inlining"));
148 cl::desc(
"Call penalty that is applied per callsite when inlining"));
152 cl::init(std::numeric_limits<size_t>::max()),
153 cl::desc(
"Do not inline functions with a stack size "
154 "that exceeds the specified limit"));
159 cl::desc(
"Do not inline recursive functions with a stack "
160 "size that exceeds the specified limit"));
164 cl::desc(
"Compute the full inline cost of a call site even when the cost "
165 "exceeds the threshold."));
169 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
174 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
194namespace InlineConstants {
202class InlineCostCallAnalyzer;
206struct InstructionCostDetail {
209 int ThresholdBefore = 0;
210 int ThresholdAfter = 0;
212 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
214 int getCostDelta()
const {
return CostAfter - CostBefore; }
216 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
221 InlineCostCallAnalyzer *
const ICCA;
224 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
237class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
242 virtual ~CallAnalyzer() =
default;
274 virtual void onBlockStart(
const BasicBlock *BB) {}
277 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
280 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
283 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
293 virtual bool shouldStop() {
return false; }
302 virtual void onDisableSROA(
AllocaInst *Arg) {}
305 virtual void onDisableLoadElimination() {}
309 virtual bool onCallBaseVisitStart(
CallBase &Call) {
return true; }
312 virtual void onCallPenalty() {}
315 virtual void onMemAccess(){};
319 virtual void onLoadEliminationOpportunity() {}
323 virtual void onCallArgumentSetup(
const CallBase &Call) {}
326 virtual void onLoadRelativeIntrinsic() {}
334 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
338 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
342 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
343 bool DefaultDestUndefined) {}
347 virtual void onMissedSimplification() {}
350 virtual void onInitializeSROAArg(
AllocaInst *Arg) {}
353 virtual void onAggregateSROAUse(
AllocaInst *V) {}
355 bool handleSROA(
Value *V,
bool DoNotDisable) {
357 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
359 onAggregateSROAUse(SROAArg);
362 disableSROAForArg(SROAArg);
367 bool IsCallerRecursive =
false;
368 bool IsRecursiveCall =
false;
369 bool ExposesReturnsTwice =
false;
370 bool HasDynamicAlloca =
false;
371 bool ContainsNoDuplicateCall =
false;
372 bool HasReturn =
false;
373 bool HasIndirectBr =
false;
374 bool HasUninlineableIntrinsic =
false;
375 bool InitsVargArgs =
false;
379 unsigned NumInstructions = 0;
380 unsigned NumVectorInstructions = 0;
411 bool EnableLoadElimination =
true;
414 bool AllowRecursiveCall =
false;
419 auto It = SROAArgValues.
find(V);
420 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
427 template <
typename T>
T *getDirectOrSimplifiedValue(
Value *V)
const {
428 if (
auto *Direct = dyn_cast<T>(V))
430 return dyn_cast_if_present<T>(SimplifiedValues.
lookup(V));
434 bool isAllocaDerivedArg(
Value *V);
436 void disableSROA(
Value *V);
438 void disableLoadElimination();
444 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
445 bool simplifyIntrinsicCallObjectSize(
CallBase &CB);
458 bool isKnownNonNullInCallee(
Value *V);
461 bool allowSizeGrowth(
CallBase &Call);
514 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
515 GetTLI(GetTLI), PSI(PSI),
F(
Callee),
DL(
F.getDataLayout()), ORE(ORE),
516 CandidateCall(
Call) {}
520 std::optional<Constant *> getSimplifiedValue(
Instruction *
I) {
521 auto It = SimplifiedValues.
find(
I);
522 if (It != SimplifiedValues.
end())
529 unsigned NumConstantArgs = 0;
530 unsigned NumConstantOffsetPtrArgs = 0;
531 unsigned NumAllocaArgs = 0;
532 unsigned NumConstantPtrCmps = 0;
533 unsigned NumConstantPtrDiffs = 0;
534 unsigned NumInstructionsSimplified = 0;
554int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
555 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
560class InlineCostCallAnalyzer final :
public CallAnalyzer {
561 const bool ComputeFullInlineCost;
562 int LoadEliminationCost = 0;
567 int SingleBBBonus = 0;
582 int StaticBonusApplied = 0;
585 const bool BoostIndirectCalls;
588 const bool IgnoreThreshold;
591 const bool CostBenefitAnalysisEnabled;
602 int CostAtBBStart = 0;
609 bool DecidedByCostThreshold =
false;
612 bool DecidedByCostBenefit =
false;
617 bool SingleBB =
true;
619 unsigned SROACostSavings = 0;
620 unsigned SROACostSavingsLost = 0;
636 std::optional<int> getHotCallSiteThreshold(
CallBase &Call,
640 void addCost(int64_t Inc) {
641 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
642 Cost = std::clamp<int64_t>(Inc +
Cost, INT_MIN, INT_MAX);
645 void onDisableSROA(
AllocaInst *Arg)
override {
646 auto CostIt = SROAArgCosts.
find(Arg);
647 if (CostIt == SROAArgCosts.
end())
649 addCost(CostIt->second);
650 SROACostSavings -= CostIt->second;
651 SROACostSavingsLost += CostIt->second;
652 SROAArgCosts.
erase(CostIt);
655 void onDisableLoadElimination()
override {
656 addCost(LoadEliminationCost);
657 LoadEliminationCost = 0;
660 bool onCallBaseVisitStart(
CallBase &Call)
override {
661 if (std::optional<int> AttrCallThresholdBonus =
663 Threshold += *AttrCallThresholdBonus;
665 if (std::optional<int> AttrCallCost =
667 addCost(*AttrCallCost);
675 void onCallPenalty()
override { addCost(
CallPenalty); }
679 void onCallArgumentSetup(
const CallBase &Call)
override {
684 void onLoadRelativeIntrinsic()
override {
689 bool IsIndirectCall)
override {
698 if (IsIndirectCall && BoostIndirectCalls) {
699 auto IndirectCallParams = Params;
704 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
705 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
707 if (CA.analyze().isSuccess()) {
710 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
718 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
719 bool DefaultDestUndefined)
override {
726 if (!DefaultDestUndefined)
735 if (NumCaseCluster <= 3) {
739 addCost((NumCaseCluster - DefaultDestUndefined) * 2 *
InstrCost);
743 int64_t ExpectedNumberOfCompare =
744 getExpectedNumberOfCompare(NumCaseCluster);
745 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
749 void onMissedSimplification()
override { addCost(
InstrCost); }
751 void onInitializeSROAArg(
AllocaInst *Arg)
override {
753 "Should not initialize SROA costs for null value.");
755 SROACostSavings += SROAArgCost;
756 SROAArgCosts[Arg] = SROAArgCost;
759 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
760 auto CostIt = SROAArgCosts.
find(SROAArg);
762 "expected this argument to have a cost");
767 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart =
Cost; }
769 void onBlockAnalyzed(
const BasicBlock *BB)
override {
770 if (CostBenefitAnalysisEnabled) {
773 assert(GetBFI &&
"GetBFI must be available");
775 assert(BFI &&
"BFI must be available");
778 ColdSize +=
Cost - CostAtBBStart;
786 if (SingleBB && TI->getNumSuccessors() > 1) {
788 Threshold -= SingleBBBonus;
793 void onInstructionAnalysisStart(
const Instruction *
I)
override {
798 InstructionCostDetailMap[
I].CostBefore =
Cost;
799 InstructionCostDetailMap[
I].ThresholdBefore = Threshold;
802 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
807 InstructionCostDetailMap[
I].CostAfter =
Cost;
808 InstructionCostDetailMap[
I].ThresholdAfter = Threshold;
811 bool isCostBenefitAnalysisEnabled() {
812 if (!PSI || !PSI->hasProfileSummary())
824 if (!PSI->hasInstrumentationProfile())
829 if (!
Caller->getEntryCount())
837 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
841 auto EntryCount =
F.getEntryCount();
842 if (!EntryCount || !EntryCount->getCount())
853 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
860 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
866 void OverrideCycleSavingsAndSizeForTesting(
APInt &CycleSavings,
int &
Size) {
868 CandidateCall,
"inline-cycle-savings-for-test")) {
869 CycleSavings = *AttrCycleSavings;
873 CandidateCall,
"inline-runtime-cost-for-test")) {
874 Size = *AttrRuntimeCost;
881 std::optional<bool> costBenefitAnalysis() {
882 if (!CostBenefitAnalysisEnabled)
906 APInt CycleSavings(128, 0);
909 APInt CurrentSavings(128, 0);
913 if (BI->isConditional() &&
914 isa_and_nonnull<ConstantInt>(
915 SimplifiedValues.
lookup(BI->getCondition()))) {
918 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(&
I)) {
919 if (isa_and_present<ConstantInt>(SimplifiedValues.
lookup(
SI->getCondition())))
921 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
923 if (SimplifiedValues.
count(V)) {
931 CycleSavings += CurrentSavings;
935 auto EntryProfileCount =
F.getEntryCount();
936 assert(EntryProfileCount && EntryProfileCount->getCount());
937 auto EntryCount = EntryProfileCount->getCount();
938 CycleSavings += EntryCount / 2;
939 CycleSavings = CycleSavings.
udiv(EntryCount);
942 auto *CallerBB = CandidateCall.
getParent();
956 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
980 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
983 APInt UpperBoundCycleSavings = CycleSavings;
984 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
985 if (UpperBoundCycleSavings.
uge(Threshold))
988 APInt LowerBoundCycleSavings = CycleSavings;
989 LowerBoundCycleSavings *=
990 getInliningCostBenefitAnalysisProfitableMultiplier();
991 if (LowerBoundCycleSavings.
ult(Threshold))
1005 if (
Caller->hasMinSize()) {
1009 for (
Loop *L : LI) {
1011 if (DeadBlocks.
count(
L->getHeader()))
1021 if (NumVectorInstructions <= NumInstructions / 10)
1022 Threshold -= VectorBonus;
1023 else if (NumVectorInstructions <= NumInstructions / 2)
1024 Threshold -= VectorBonus / 2;
1026 if (std::optional<int> AttrCost =
1033 Cost *= *AttrCostMult;
1035 if (std::optional<int> AttrThreshold =
1037 Threshold = *AttrThreshold;
1039 if (
auto Result = costBenefitAnalysis()) {
1040 DecidedByCostBenefit =
true;
1047 if (IgnoreThreshold)
1050 DecidedByCostThreshold =
true;
1051 return Cost < std::max(1, Threshold)
1056 bool shouldStop()
override {
1057 if (IgnoreThreshold || ComputeFullInlineCost)
1061 if (
Cost < Threshold)
1063 DecidedByCostThreshold =
true;
1067 void onLoadEliminationOpportunity()
override {
1082 assert(NumInstructions == 0);
1083 assert(NumVectorInstructions == 0);
1086 updateThreshold(CandidateCall,
F);
1092 assert(SingleBBBonus >= 0);
1093 assert(VectorBonus >= 0);
1098 Threshold += (SingleBBBonus + VectorBonus);
1112 if (
Cost >= Threshold && !ComputeFullInlineCost)
1119 InlineCostCallAnalyzer(
1127 bool IgnoreThreshold =
false)
1128 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1131 Params.ComputeFullInlineCost || ORE ||
1132 isCostBenefitAnalysisEnabled()),
1134 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1135 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1141 InlineCostAnnotationWriter Writer;
1149 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1150 auto It = InstructionCostDetailMap.
find(
I);
1151 if (It != InstructionCostDetailMap.
end())
1153 return std::nullopt;
1156 virtual ~InlineCostCallAnalyzer() =
default;
1157 int getThreshold()
const {
return Threshold; }
1158 int getCost()
const {
return Cost; }
1159 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1160 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1161 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1162 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1166static bool isSoleCallToLocalFunction(
const CallBase &CB,
1168 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1172class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1179 static constexpr int JTCostMultiplier = 2;
1180 static constexpr int CaseClusterCostMultiplier = 2;
1181 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1182 static constexpr int SwitchCostMultiplier = 2;
1186 unsigned SROACostSavingOpportunities = 0;
1187 int VectorBonus = 0;
1188 int SingleBBBonus = 0;
1194 Cost[
static_cast<size_t>(Feature)] += Delta;
1198 Cost[
static_cast<size_t>(Feature)] =
Value;
1201 void onDisableSROA(
AllocaInst *Arg)
override {
1202 auto CostIt = SROACosts.
find(Arg);
1203 if (CostIt == SROACosts.
end())
1206 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1207 SROACostSavingOpportunities -= CostIt->second;
1208 SROACosts.
erase(CostIt);
1211 void onDisableLoadElimination()
override {
1212 set(InlineCostFeatureIndex::load_elimination, 1);
1215 void onCallPenalty()
override {
1216 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1219 void onCallArgumentSetup(
const CallBase &Call)
override {
1220 increment(InlineCostFeatureIndex::call_argument_setup,
1224 void onLoadRelativeIntrinsic()
override {
1225 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1229 bool IsIndirectCall)
override {
1230 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1233 if (IsIndirectCall) {
1247 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1248 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1250 if (CA.analyze().isSuccess()) {
1251 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1253 increment(InlineCostFeatureIndex::nested_inlines, 1);
1260 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
1261 bool DefaultDestUndefined)
override {
1262 if (JumpTableSize) {
1263 if (!DefaultDestUndefined)
1264 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1265 SwitchDefaultDestCostMultiplier *
InstrCost);
1266 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1268 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1272 if (NumCaseCluster <= 3) {
1273 increment(InlineCostFeatureIndex::case_cluster_penalty,
1274 (NumCaseCluster - DefaultDestUndefined) *
1279 int64_t ExpectedNumberOfCompare =
1280 getExpectedNumberOfCompare(NumCaseCluster);
1282 int64_t SwitchCost =
1283 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1284 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1287 void onMissedSimplification()
override {
1288 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1292 void onInitializeSROAArg(
AllocaInst *Arg)
override {
1294 SROACosts[Arg] = SROAArgCost;
1295 SROACostSavingOpportunities += SROAArgCost;
1298 void onAggregateSROAUse(
AllocaInst *Arg)
override {
1300 SROACostSavingOpportunities +=
InstrCost;
1303 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1305 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1306 Threshold -= SingleBBBonus;
1311 if (
Caller->hasMinSize()) {
1314 for (
Loop *L : LI) {
1316 if (DeadBlocks.
count(
L->getHeader()))
1318 increment(InlineCostFeatureIndex::num_loops,
1322 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1323 set(InlineCostFeatureIndex::simplified_instructions,
1324 NumInstructionsSimplified);
1325 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1326 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1327 NumConstantOffsetPtrArgs);
1328 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1330 if (NumVectorInstructions <= NumInstructions / 10)
1331 Threshold -= VectorBonus;
1332 else if (NumVectorInstructions <= NumInstructions / 2)
1333 Threshold -= VectorBonus / 2;
1335 set(InlineCostFeatureIndex::threshold, Threshold);
1340 bool shouldStop()
override {
return false; }
1342 void onLoadEliminationOpportunity()
override {
1343 increment(InlineCostFeatureIndex::load_elimination, 1);
1347 increment(InlineCostFeatureIndex::callsite_cost,
1350 set(InlineCostFeatureIndex::cold_cc_penalty,
1353 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1354 isSoleCallToLocalFunction(CandidateCall,
F));
1359 int SingleBBBonusPercent = 50;
1363 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1364 VectorBonus = Threshold * VectorBonusPercent / 100;
1365 Threshold += (SingleBBBonus + VectorBonus);
1371 InlineCostFeaturesAnalyzer(
1378 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI,
1387bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1388 return SROAArgValues.
count(V);
1391void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1392 onDisableSROA(SROAArg);
1393 EnabledSROAAllocas.
erase(SROAArg);
1394 disableLoadElimination();
1397void InlineCostAnnotationWriter::emitInstructionAnnot(
1402 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1404 OS <<
"; No analysis for the instruction";
1406 OS <<
"; cost before = " <<
Record->CostBefore
1407 <<
", cost after = " <<
Record->CostAfter
1408 <<
", threshold before = " <<
Record->ThresholdBefore
1409 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1410 OS <<
"cost delta = " <<
Record->getCostDelta();
1411 if (
Record->hasThresholdChanged())
1412 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1414 auto C = ICCA->getSimplifiedValue(
const_cast<Instruction *
>(
I));
1416 OS <<
", simplified to ";
1417 (*C)->print(
OS,
true);
1423void CallAnalyzer::disableSROA(
Value *V) {
1424 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1425 disableSROAForArg(SROAArg);
1429void CallAnalyzer::disableLoadElimination() {
1430 if (EnableLoadElimination) {
1431 onDisableLoadElimination();
1432 EnableLoadElimination =
false;
1441 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1445 GTI != GTE; ++GTI) {
1447 getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand());
1454 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1473 for (
const Use &
Op :
GEP.indices())
1484 disableSROA(
I.getOperand(0));
1488 if (
I.isArrayAllocation()) {
1490 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(
Size)) {
1499 Type *Ty =
I.getAllocatedType();
1501 AllocSize->getLimitedValue(),
1502 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1504 HasDynamicAlloca =
true;
1510 if (
I.isStaticAlloca()) {
1511 Type *Ty =
I.getAllocatedType();
1512 AllocatedSize =
SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinValue(),
1520 if (!
I.isStaticAlloca())
1521 HasDynamicAlloca =
true;
1526bool CallAnalyzer::visitPHI(
PHINode &
I) {
1538 bool CheckSROA =
I.getType()->isPointerTy();
1542 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1543 Value *FirstV =
nullptr;
1545 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1548 if (DeadBlocks.
count(Pred))
1552 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1553 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1556 Value *
V =
I.getIncomingValue(i);
1561 Constant *
C = getDirectOrSimplifiedValue<Constant>(V);
1563 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1564 if (!
C && CheckSROA)
1565 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1567 if (!
C && !BaseAndOffset.first)
1584 if (FirstBaseAndOffset == BaseAndOffset)
1598 FirstBaseAndOffset = BaseAndOffset;
1603 SimplifiedValues[&
I] = FirstC;
1608 if (FirstBaseAndOffset.first) {
1609 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1611 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1612 SROAArgValues[&
I] = SROAArg;
1624 std::pair<Value *, APInt> BaseAndOffset =
1625 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1626 if (!BaseAndOffset.first)
1631 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1635 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1641 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1645 for (
const Use &
Op :
GEP.indices())
1646 if (!getDirectOrSimplifiedValue<Constant>(
Op))
1655 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1657 SROAArgValues[&
I] = SROAArg;
1665 disableSROAForArg(SROAArg);
1666 return isGEPFree(
I);
1670bool CallAnalyzer::simplifyInstruction(
Instruction &
I) {
1673 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
1681 SimplifiedValues[&
I] =
C;
1694bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1696 auto *
C = getDirectOrSimplifiedValue<Constant>(Arg);
1699 SimplifiedValues[&CB] = ConstantInt::get(RT,
C ? 1 : 0);
1703bool CallAnalyzer::simplifyIntrinsicCallObjectSize(
CallBase &CB) {
1711 Constant *
C = dyn_cast_or_null<Constant>(V);
1713 SimplifiedValues[&CB] =
C;
1723 std::pair<Value *, APInt> BaseAndOffset =
1724 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1726 if (BaseAndOffset.first)
1727 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1730 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1731 SROAArgValues[&
I] = SROAArg;
1744 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1745 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1746 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1747 std::pair<Value *, APInt> BaseAndOffset =
1748 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1749 if (BaseAndOffset.first)
1750 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1760 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1761 SROAArgValues[&
I] = SROAArg;
1775 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1776 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1777 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1778 if (BaseAndOffset.first)
1779 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1783 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1784 SROAArgValues[&
I] = SROAArg;
1790bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1797 disableSROA(
I.getOperand(0));
1802 switch (
I.getOpcode()) {
1803 case Instruction::FPTrunc:
1804 case Instruction::FPExt:
1805 case Instruction::UIToFP:
1806 case Instruction::SIToFP:
1807 case Instruction::FPToUI:
1808 case Instruction::FPToSI:
1824bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1830 if (
Argument *
A = dyn_cast<Argument>(V))
1831 if (paramHasAttr(
A, Attribute::NonNull))
1837 if (isAllocaDerivedArg(V))
1846bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
1863 if (isa<UnreachableInst>(
II->getNormalDest()->getTerminator()))
1865 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
1871bool InlineCostCallAnalyzer::isColdCallSite(
CallBase &Call,
1875 if (PSI && PSI->hasProfileSummary())
1876 return PSI->isColdCallSite(Call, CallerBFI);
1887 auto CallSiteBB =
Call.getParent();
1888 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
1889 auto CallerEntryFreq =
1891 return CallSiteFreq < CallerEntryFreq * ColdProb;
1895InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
1900 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1906 return std::nullopt;
1916 if (Limit && CallSiteFreq >= *Limit)
1920 return std::nullopt;
1923void InlineCostCallAnalyzer::updateThreshold(
CallBase &Call,
Function &Callee) {
1925 if (!allowSizeGrowth(Call)) {
1933 auto MinIfValid = [](
int A, std::optional<int>
B) {
1934 return B ? std::min(
A, *
B) :
A;
1938 auto MaxIfValid = [](
int A, std::optional<int>
B) {
1939 return B ? std::max(
A, *
B) :
A;
1954 int SingleBBBonusPercent = 50;
1959 auto DisallowAllBonuses = [&]() {
1960 SingleBBBonusPercent = 0;
1961 VectorBonusPercent = 0;
1962 LastCallToStaticBonus = 0;
1967 if (
Caller->hasMinSize()) {
1973 SingleBBBonusPercent = 0;
1974 VectorBonusPercent = 0;
1975 }
else if (
Caller->hasOptSize())
1980 if (!
Caller->hasMinSize()) {
1981 if (
Callee.hasFnAttribute(Attribute::InlineHint))
2006 DisallowAllBonuses();
2011 if (PSI->isFunctionEntryHot(&Callee)) {
2017 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2023 DisallowAllBonuses();
2035 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2036 VectorBonus = Threshold * VectorBonusPercent / 100;
2041 if (isSoleCallToLocalFunction(Call,
F)) {
2042 Cost -= LastCallToStaticBonus;
2043 StaticBonusApplied = LastCallToStaticBonus;
2047bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
2053 if (
I.getOpcode() == Instruction::FCmp)
2058 Value *LHSBase, *RHSBase;
2059 APInt LHSOffset, RHSOffset;
2060 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2062 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2063 if (RHSBase && LHSBase == RHSBase) {
2069 ++NumConstantPtrCmps;
2074 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2075 for (
auto *
User :
I.users())
2076 if (
auto *Instr = dyn_cast<Instruction>(
User))
2077 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2084 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1))) {
2085 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2093 if (isImplicitNullCheckCmp(
I))
2096 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
2103 Value *LHSBase, *RHSBase;
2104 APInt LHSOffset, RHSOffset;
2105 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2107 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2108 if (RHSBase && LHSBase == RHSBase) {
2114 SimplifiedValues[&
I] =
C;
2115 ++NumConstantPtrDiffs;
2123 return Base::visitSub(
I);
2128 Constant *CLHS = getDirectOrSimplifiedValue<Constant>(LHS);
2129 Constant *CRHS = getDirectOrSimplifiedValue<Constant>(RHS);
2131 Value *SimpleV =
nullptr;
2132 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
2133 SimpleV =
simplifyBinOp(
I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2134 FI->getFastMathFlags(),
DL);
2139 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2140 SimplifiedValues[&
I] =
C;
2153 if (
I.getType()->isFloatingPointTy() &&
2163 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
2168 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2169 SimplifiedValues[&
I] =
C;
2180bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2181 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2187 if (EnableLoadElimination &&
2188 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2189 onLoadEliminationOpportunity();
2198 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2209 disableLoadElimination();
2221 return Base::visitExtractValue(
I);
2230 return Base::visitInsertValue(
I);
2251 Constant *
C = getDirectOrSimplifiedValue<Constant>(
I);
2258 SimplifiedValues[&
Call] =
C;
2272 case LibFunc_memcpy_chk:
2273 case LibFunc_memmove_chk:
2274 case LibFunc_mempcpy_chk:
2275 case LibFunc_memset_chk: {
2282 auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(
Call.getOperand(2));
2284 getDirectOrSimplifiedValue<ConstantInt>(
Call.getOperand(3));
2285 if (LenOp && ObjSizeOp &&
2286 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2298bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2299 if (!onCallBaseVisitStart(Call))
2302 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2303 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2305 ExposesReturnsTwice =
true;
2308 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2309 ContainsNoDuplicateCall =
true;
2312 bool IsIndirectCall = !
F;
2313 if (IsIndirectCall) {
2317 F = dyn_cast_or_null<Function>(SimplifiedValues.
lookup(Callee));
2318 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2319 onCallArgumentSetup(Call);
2321 if (!
Call.onlyReadsMemory())
2322 disableLoadElimination();
2323 return Base::visitCallBase(Call);
2327 assert(
F &&
"Expected a call to a known function");
2330 if (simplifyCallSite(
F, Call))
2336 switch (
II->getIntrinsicID()) {
2339 disableLoadElimination();
2340 return Base::visitCallBase(Call);
2342 case Intrinsic::load_relative:
2343 onLoadRelativeIntrinsic();
2346 case Intrinsic::memset:
2347 case Intrinsic::memcpy:
2348 case Intrinsic::memmove:
2349 disableLoadElimination();
2352 case Intrinsic::icall_branch_funnel:
2353 case Intrinsic::localescape:
2354 HasUninlineableIntrinsic =
true;
2356 case Intrinsic::vastart:
2357 InitsVargArgs =
true;
2359 case Intrinsic::launder_invariant_group:
2360 case Intrinsic::strip_invariant_group:
2361 if (
auto *SROAArg = getSROAArgForValueOrNull(
II->getOperand(0)))
2362 SROAArgValues[
II] = SROAArg;
2364 case Intrinsic::is_constant:
2365 return simplifyIntrinsicCallIsConstant(Call);
2366 case Intrinsic::objectsize:
2367 return simplifyIntrinsicCallObjectSize(Call);
2371 if (
F ==
Call.getFunction()) {
2374 IsRecursiveCall =
true;
2375 if (!AllowRecursiveCall)
2379 if (isLoweredToCall(
F, Call)) {
2380 onLoweredCall(
F, Call, IsIndirectCall);
2383 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2384 disableLoadElimination();
2385 return Base::visitCallBase(Call);
2388bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2390 bool Free = !HasReturn;
2395bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2401 getDirectOrSimplifiedValue<ConstantInt>(BI.
getCondition()) ||
2405bool CallAnalyzer::visitSelectInst(
SelectInst &SI) {
2406 bool CheckSROA =
SI.getType()->isPointerTy();
2410 Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
2411 Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
2413 dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
SI.getCondition()));
2417 if (TrueC == FalseC && TrueC) {
2418 SimplifiedValues[&
SI] = TrueC;
2423 return Base::visitSelectInst(SI);
2425 std::pair<Value *, APInt> TrueBaseAndOffset =
2426 ConstantOffsetPtrs.
lookup(TrueVal);
2427 std::pair<Value *, APInt> FalseBaseAndOffset =
2428 ConstantOffsetPtrs.
lookup(FalseVal);
2429 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2430 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2432 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2433 SROAArgValues[&
SI] = SROAArg;
2437 return Base::visitSelectInst(SI);
2448 if (TrueC && FalseC) {
2450 SimplifiedValues[&
SI] =
C;
2454 return Base::visitSelectInst(SI);
2458 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2459 SimplifiedValues[&
SI] = SelectedC;
2466 std::pair<Value *, APInt> BaseAndOffset =
2467 ConstantOffsetPtrs.
lookup(SelectedV);
2468 if (BaseAndOffset.first) {
2469 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2471 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2472 SROAArgValues[&
SI] = SROAArg;
2478bool CallAnalyzer::visitSwitchInst(
SwitchInst &SI) {
2481 if (getDirectOrSimplifiedValue<ConstantInt>(
SI.getCondition()))
2496 unsigned JumpTableSize = 0;
2498 unsigned NumCaseCluster =
2501 onFinalizeSwitch(JumpTableSize, NumCaseCluster,
SI.defaultDestUndefined());
2514 HasIndirectBr =
true;
2518bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2552 for (
const Use &
Op :
I.operands())
2577 if (
I.isDebugOrPseudoInst())
2585 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2586 ++NumVectorInstructions;
2593 onInstructionAnalysisStart(&
I);
2595 if (Base::visit(&
I))
2596 ++NumInstructionsSimplified;
2598 onMissedSimplification();
2600 onInstructionAnalysisFinish(&
I);
2601 using namespace ore;
2604 if (IsRecursiveCall && !AllowRecursiveCall)
2606 else if (ExposesReturnsTwice)
2608 else if (HasDynamicAlloca)
2610 else if (HasIndirectBr)
2612 else if (HasUninlineableIntrinsic)
2614 else if (InitsVargArgs)
2616 if (!
IR.isSuccess()) {
2621 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2622 <<
NV(
"InlineResult",
IR.getFailureReason())
2623 <<
") and cost is not fully computed";
2638 <<
NV(
"Callee", &
F) <<
" is "
2639 <<
NV(
"InlineResult",
IR.getFailureReason())
2640 <<
". Cost is not fully computed";
2647 "Call site analysis is not favorable to inlining.");
2659ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2660 if (!
V->getType()->isPointerTy())
2663 unsigned AS =
V->getType()->getPointerAddressSpace();
2664 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2673 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2675 V =
GEP->getPointerOperand();
2676 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2677 if (GA->isInterposable())
2679 V = GA->getAliasee();
2683 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2684 }
while (Visited.
insert(V).second);
2686 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2687 return cast<ConstantInt>(ConstantInt::get(IdxPtrTy,
Offset));
2701 return (DeadBlocks.
count(Pred) ||
2702 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2707 return (!DeadBlocks.
count(BB) &&
2713 if (Succ == NextBB || !IsNewlyDead(Succ))
2717 while (!NewDead.
empty()) {
2719 if (DeadBlocks.
insert(Dead).second)
2738 auto Result = onAnalysisStart();
2749 if (Call &&
Call->getFunction() == Caller) {
2750 IsCallerRecursive =
true;
2760 if (
Constant *
C = dyn_cast<Constant>(CAI))
2761 SimplifiedValues[&FAI] =
C;
2763 Value *PtrArg = *CAI;
2764 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2765 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2768 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2769 SROAArgValues[&FAI] = SROAArg;
2770 onInitializeSROAArg(SROAArg);
2771 EnabledSROAAllocas.
insert(SROAArg);
2776 NumConstantArgs = SimplifiedValues.
size();
2777 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2778 NumAllocaArgs = SROAArgValues.
size();
2794 BBSetVector BBWorklist;
2795 BBWorklist.
insert(&
F.getEntryBlock());
2798 for (
unsigned Idx = 0;
Idx != BBWorklist.size(); ++
Idx) {
2818 if (!isa<CallBrInst>(*U))
2824 if (!
IR.isSuccess())
2831 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2835 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2837 BBWorklist.insert(NextBB);
2838 KnownSuccessors[BB] = NextBB;
2839 findDeadBlocks(BB, NextBB);
2843 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2846 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2847 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2848 BBWorklist.insert(NextBB);
2849 KnownSuccessors[BB] = NextBB;
2850 findDeadBlocks(BB, NextBB);
2858 BBWorklist.insert(Succ);
2860 onBlockAnalyzed(BB);
2866 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
2876 FinalStackSizeThreshold = *AttrMaxStackSize;
2877 if (AllocatedSize > FinalStackSizeThreshold)
2880 return finalizeAnalysis();
2884#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2886 F.print(
OS, &Writer);
2900#undef DEBUG_PRINT_STAT
2903#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2917 auto CalleeTLI = GetTLI(*Callee);
2920 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2928 for (
unsigned I = 0, E = Call.arg_size();
I != E; ++
I) {
2929 if (Call.isByValArgument(
I)) {
2932 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2933 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
2935 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
2937 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
2945 NumStores = std::min(NumStores, 8U);
2958 return std::min<int64_t>(
Cost, INT_MAX);
2967 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2968 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2988 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2989 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
true,
2991 auto R = CA.analyze();
2993 return std::nullopt;
2994 return CA.getCost();
3003 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3004 PSI, ORE, *Call.getCalledFunction(), Call);
3005 auto R = CFA.analyze();
3007 return std::nullopt;
3008 return CFA.features();
3023 if (Callee->isPresplitCoroutine())
3031 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3032 for (
unsigned I = 0, E = Call.arg_size();
I != E; ++
I)
3033 if (Call.isByValArgument(
I)) {
3034 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
3042 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3043 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3047 if (IsViable.isSuccess())
3054 Function *Caller = Call.getCaller();
3059 if (Caller->hasOptNone())
3064 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3068 if (Callee->isInterposable())
3072 if (Callee->hasFnAttribute(Attribute::NoInline))
3076 if (Call.isNoInline())
3079 return std::nullopt;
3094 if (UserDecision->isSuccess())
3100 <<
"... (caller:" << Call.getCaller()->getName()
3103 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3104 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
3112 if (CA.wasDecidedByCostBenefit()) {
3115 CA.getCostBenefitPair());
3120 if (CA.wasDecidedByCostThreshold())
3122 CA.getStaticBonusApplied());
3131 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3141 if (!isa<CallBrInst>(*U))
3144 for (
auto &
II : BB) {
3150 Function *Callee = Call->getCalledFunction();
3156 if (!ReturnsTwice && isa<CallInst>(Call) &&
3157 cast<CallInst>(Call)->canReturnTwice())
3161 switch (Callee->getIntrinsicID()) {
3164 case llvm::Intrinsic::icall_branch_funnel:
3168 "disallowed inlining of @llvm.icall.branch.funnel");
3169 case llvm::Intrinsic::localescape:
3173 "disallowed inlining of @llvm.localescape");
3174 case llvm::Intrinsic::vastart:
3178 "contains VarArgs initialized with va_start");
3249 unsigned SizeOptLevel) {
3252 if (SizeOptLevel == 1)
3254 if (SizeOptLevel == 2)
3289 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
3294 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params,
TTI,
3295 GetAssumptionCache,
nullptr,
nullptr, &PSI,
3298 OS <<
" Analyzing call of " << CalledFunction->
getName()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
mir Rename Register Operands
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &)
emitInstructionAnnot - This may be implemented to emit a string right before an instruction is emitte...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
StringRef getValueAsString() const
Return the attribute's value as a string.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
BlockFrequency getEntryFreq() const
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
Function * getCaller()
Helper to get the caller (the parent function).
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Type * getReturnType() const
Class to represent profile counts.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
Indirect Branch Instruction.
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
RetTy visitIndirectBrInst(IndirectBrInst &I)
RetTy visitCmpInst(CmpInst &I)
RetTy visitCallBase(CallBase &I)
RetTy visitCleanupReturnInst(CleanupReturnInst &I)
RetTy visitUnreachableInst(UnreachableInst &I)
RetTy visitSwitchInst(SwitchInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitReturnInst(ReturnInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitResumeInst(ResumeInst &I)
RetTy visitCatchReturnInst(CatchReturnInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitBranchInst(BranchInst &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This class represents a cast from an integer to a pointer.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis providing profile information.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ C
The default llvm calling convention, compatible with C.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Function::ProfileCount ProfileCount
std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
gep_type_iterator gep_type_begin(const User *GEP)
std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
auto predecessors(const MachineBasicBlock *BB)
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.