31#include "llvm/Config/llvm-config.h"
52#define DEBUG_TYPE "inline-cost"
54STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
58 cl::desc(
"Default amount of inlining to perform"));
67 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
68 "during inline cost calculation"));
72 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
76 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
80 cl::desc(
"Threshold for inlining functions with inline hint"));
85 cl::desc(
"Threshold for inlining cold callsites"));
89 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
93 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
97 cl::desc(
"The maximum size of a callee that get's "
98 "inlined without sufficient cycle savings"));
105 cl::desc(
"Threshold for inlining functions with cold attribute"));
109 cl::desc(
"Threshold for hot callsites "));
113 cl::desc(
"Threshold for locally hot callsites "));
117 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
118 "entry frequency, for a callsite to be cold in the absence of "
119 "profile information."));
123 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
124 "entry frequency, for a callsite to be hot in the absence of "
125 "profile information."));
129 cl::desc(
"Cost of a single instruction when inlining"));
133 cl::desc(
"Cost of load/store instruction when inlining"));
137 cl::desc(
"Call penalty that is applied per callsite when inlining"));
141 cl::init(std::numeric_limits<size_t>::max()),
142 cl::desc(
"Do not inline functions with a stack size "
143 "that exceeds the specified limit"));
148 cl::desc(
"Do not inline recursive functions with a stack "
149 "size that exceeds the specified limit"));
153 cl::desc(
"Compute the full inline cost of a call site even when the cost "
154 "exceeds the threshold."));
158 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
163 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
183namespace InlineConstants {
191class InlineCostCallAnalyzer;
195struct InstructionCostDetail {
198 int ThresholdBefore = 0;
199 int ThresholdAfter = 0;
201 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
203 int getCostDelta()
const {
return CostAfter - CostBefore; }
205 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
210 InlineCostCallAnalyzer *
const ICCA;
213 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
226class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
231 virtual ~CallAnalyzer() =
default;
260 virtual void onBlockStart(
const BasicBlock *BB) {}
263 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
266 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
269 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
279 virtual bool shouldStop() {
return false; }
288 virtual void onDisableSROA(
AllocaInst *Arg) {}
291 virtual void onDisableLoadElimination() {}
295 virtual bool onCallBaseVisitStart(
CallBase &Call) {
return true; }
298 virtual void onCallPenalty() {}
301 virtual void onMemAccess(){};
305 virtual void onLoadEliminationOpportunity() {}
309 virtual void onCallArgumentSetup(
const CallBase &Call) {}
312 virtual void onLoadRelativeIntrinsic() {}
320 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
324 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
328 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
329 unsigned NumCaseCluster) {}
333 virtual void onMissedSimplification() {}
336 virtual void onInitializeSROAArg(
AllocaInst *Arg) {}
339 virtual void onAggregateSROAUse(
AllocaInst *V) {}
341 bool handleSROA(
Value *V,
bool DoNotDisable) {
343 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
345 onAggregateSROAUse(SROAArg);
348 disableSROAForArg(SROAArg);
353 bool IsCallerRecursive =
false;
354 bool IsRecursiveCall =
false;
355 bool ExposesReturnsTwice =
false;
356 bool HasDynamicAlloca =
false;
357 bool ContainsNoDuplicateCall =
false;
358 bool HasReturn =
false;
359 bool HasIndirectBr =
false;
360 bool HasUninlineableIntrinsic =
false;
361 bool InitsVargArgs =
false;
365 unsigned NumInstructions = 0;
366 unsigned NumVectorInstructions = 0;
397 bool EnableLoadElimination =
true;
400 bool AllowRecursiveCall =
false;
405 auto It = SROAArgValues.
find(V);
406 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
412 bool isAllocaDerivedArg(
Value *V);
414 void disableSROA(
Value *V);
416 void disableLoadElimination();
422 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
423 bool simplifyIntrinsicCallObjectSize(
CallBase &CB);
435 bool isKnownNonNullInCallee(
Value *V);
438 bool allowSizeGrowth(
CallBase &Call);
489 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
491 CandidateCall(
Call) {}
495 std::optional<Constant *> getSimplifiedValue(
Instruction *
I) {
497 return SimplifiedValues[
I];
503 unsigned NumConstantArgs = 0;
504 unsigned NumConstantOffsetPtrArgs = 0;
505 unsigned NumAllocaArgs = 0;
506 unsigned NumConstantPtrCmps = 0;
507 unsigned NumConstantPtrDiffs = 0;
508 unsigned NumInstructionsSimplified = 0;
528int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
529 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
534class InlineCostCallAnalyzer final :
public CallAnalyzer {
535 const bool ComputeFullInlineCost;
536 int LoadEliminationCost = 0;
541 int SingleBBBonus = 0;
556 int StaticBonusApplied = 0;
559 const bool BoostIndirectCalls;
562 const bool IgnoreThreshold;
565 const bool CostBenefitAnalysisEnabled;
576 int CostAtBBStart = 0;
583 bool DecidedByCostThreshold =
false;
586 bool DecidedByCostBenefit =
false;
591 bool SingleBB =
true;
593 unsigned SROACostSavings = 0;
594 unsigned SROACostSavingsLost = 0;
610 std::optional<int> getHotCallSiteThreshold(
CallBase &Call,
614 void addCost(int64_t Inc) {
615 Inc = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc), INT_MIN);
616 Cost = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc +
Cost), INT_MIN);
619 void onDisableSROA(
AllocaInst *Arg)
override {
620 auto CostIt = SROAArgCosts.
find(Arg);
621 if (CostIt == SROAArgCosts.
end())
623 addCost(CostIt->second);
624 SROACostSavings -= CostIt->second;
625 SROACostSavingsLost += CostIt->second;
626 SROAArgCosts.
erase(CostIt);
629 void onDisableLoadElimination()
override {
630 addCost(LoadEliminationCost);
631 LoadEliminationCost = 0;
634 bool onCallBaseVisitStart(
CallBase &Call)
override {
635 if (std::optional<int> AttrCallThresholdBonus =
637 Threshold += *AttrCallThresholdBonus;
639 if (std::optional<int> AttrCallCost =
641 addCost(*AttrCallCost);
649 void onCallPenalty()
override { addCost(
CallPenalty); }
653 void onCallArgumentSetup(
const CallBase &Call)
override {
658 void onLoadRelativeIntrinsic()
override {
663 bool IsIndirectCall)
override {
672 if (IsIndirectCall && BoostIndirectCalls) {
673 auto IndirectCallParams = Params;
678 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
679 GetAssumptionCache, GetBFI, PSI, ORE,
false);
680 if (CA.analyze().isSuccess()) {
683 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
690 void onFinalizeSwitch(
unsigned JumpTableSize,
691 unsigned NumCaseCluster)
override {
703 if (NumCaseCluster <= 3) {
709 int64_t ExpectedNumberOfCompare =
710 getExpectedNumberOfCompare(NumCaseCluster);
711 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
715 void onMissedSimplification()
override { addCost(
InstrCost); }
717 void onInitializeSROAArg(
AllocaInst *Arg)
override {
719 "Should not initialize SROA costs for null value.");
721 SROACostSavings += SROAArgCost;
722 SROAArgCosts[Arg] = SROAArgCost;
725 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
726 auto CostIt = SROAArgCosts.
find(SROAArg);
728 "expected this argument to have a cost");
733 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart =
Cost; }
735 void onBlockAnalyzed(
const BasicBlock *BB)
override {
736 if (CostBenefitAnalysisEnabled) {
739 assert(GetBFI &&
"GetBFI must be available");
741 assert(BFI &&
"BFI must be available");
744 ColdSize +=
Cost - CostAtBBStart;
752 if (SingleBB && TI->getNumSuccessors() > 1) {
754 Threshold -= SingleBBBonus;
759 void onInstructionAnalysisStart(
const Instruction *
I)
override {
764 InstructionCostDetailMap[
I].CostBefore =
Cost;
765 InstructionCostDetailMap[
I].ThresholdBefore = Threshold;
768 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
773 InstructionCostDetailMap[
I].CostAfter =
Cost;
774 InstructionCostDetailMap[
I].ThresholdAfter = Threshold;
777 bool isCostBenefitAnalysisEnabled() {
778 if (!PSI || !PSI->hasProfileSummary())
790 if (!PSI->hasInstrumentationProfile())
795 if (!
Caller->getEntryCount())
803 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
807 auto EntryCount =
F.getEntryCount();
808 if (!EntryCount || !EntryCount->getCount())
821 std::optional<bool> costBenefitAnalysis() {
822 if (!CostBenefitAnalysisEnabled)
846 APInt CycleSavings(128, 0);
849 APInt CurrentSavings(128, 0);
853 if (BI->isConditional() &&
854 isa_and_nonnull<ConstantInt>(
855 SimplifiedValues.
lookup(BI->getCondition()))) {
858 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
860 if (SimplifiedValues.
count(V)) {
868 CycleSavings += CurrentSavings;
872 auto EntryProfileCount =
F.getEntryCount();
873 assert(EntryProfileCount && EntryProfileCount->getCount());
874 auto EntryCount = EntryProfileCount->getCount();
875 CycleSavings += EntryCount / 2;
876 CycleSavings = CycleSavings.udiv(EntryCount);
879 auto *CallerBB = CandidateCall.
getParent();
904 APInt RHS(128, PSI->getOrCompHotCountThreshold());
916 if (
Caller->hasMinSize()) {
922 if (DeadBlocks.
count(
L->getHeader()))
932 if (NumVectorInstructions <= NumInstructions / 10)
933 Threshold -= VectorBonus;
934 else if (NumVectorInstructions <= NumInstructions / 2)
935 Threshold -= VectorBonus / 2;
937 if (std::optional<int> AttrCost =
944 Cost *= *AttrCostMult;
946 if (std::optional<int> AttrThreshold =
948 Threshold = *AttrThreshold;
950 if (
auto Result = costBenefitAnalysis()) {
951 DecidedByCostBenefit =
true;
961 DecidedByCostThreshold =
true;
962 return Cost < std::max(1, Threshold)
967 bool shouldStop()
override {
968 if (IgnoreThreshold || ComputeFullInlineCost)
972 if (
Cost < Threshold)
974 DecidedByCostThreshold =
true;
978 void onLoadEliminationOpportunity()
override {
993 assert(NumInstructions == 0);
994 assert(NumVectorInstructions == 0);
997 updateThreshold(CandidateCall,
F);
1003 assert(SingleBBBonus >= 0);
1004 assert(VectorBonus >= 0);
1009 Threshold += (SingleBBBonus + VectorBonus);
1023 if (
Cost >= Threshold && !ComputeFullInlineCost)
1030 InlineCostCallAnalyzer(
1037 bool IgnoreThreshold =
false)
1038 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1040 Params.ComputeFullInlineCost || ORE ||
1041 isCostBenefitAnalysisEnabled()),
1043 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1044 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1050 InlineCostAnnotationWriter Writer;
1058 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1059 if (InstructionCostDetailMap.
contains(
I))
1060 return InstructionCostDetailMap[
I];
1061 return std::nullopt;
1064 virtual ~InlineCostCallAnalyzer() =
default;
1065 int getThreshold()
const {
return Threshold; }
1066 int getCost()
const {
return Cost; }
1067 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1068 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1069 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1070 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1074static bool isSoleCallToLocalFunction(
const CallBase &CB,
1076 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1080class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1087 static constexpr int JTCostMultiplier = 4;
1088 static constexpr int CaseClusterCostMultiplier = 2;
1089 static constexpr int SwitchCostMultiplier = 2;
1093 unsigned SROACostSavingOpportunities = 0;
1094 int VectorBonus = 0;
1095 int SingleBBBonus = 0;
1101 Cost[
static_cast<size_t>(Feature)] += Delta;
1105 Cost[
static_cast<size_t>(Feature)] =
Value;
1108 void onDisableSROA(
AllocaInst *Arg)
override {
1109 auto CostIt = SROACosts.
find(Arg);
1110 if (CostIt == SROACosts.
end())
1113 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1114 SROACostSavingOpportunities -= CostIt->second;
1115 SROACosts.
erase(CostIt);
1118 void onDisableLoadElimination()
override {
1119 set(InlineCostFeatureIndex::load_elimination, 1);
1122 void onCallPenalty()
override {
1123 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1126 void onCallArgumentSetup(
const CallBase &Call)
override {
1127 increment(InlineCostFeatureIndex::call_argument_setup,
1131 void onLoadRelativeIntrinsic()
override {
1132 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1136 bool IsIndirectCall)
override {
1137 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1140 if (IsIndirectCall) {
1154 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1155 GetAssumptionCache, GetBFI, PSI, ORE,
false,
1157 if (CA.analyze().isSuccess()) {
1158 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1160 increment(InlineCostFeatureIndex::nested_inlines, 1);
1167 void onFinalizeSwitch(
unsigned JumpTableSize,
1168 unsigned NumCaseCluster)
override {
1170 if (JumpTableSize) {
1171 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1173 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1177 if (NumCaseCluster <= 3) {
1178 increment(InlineCostFeatureIndex::case_cluster_penalty,
1179 NumCaseCluster * CaseClusterCostMultiplier *
InstrCost);
1183 int64_t ExpectedNumberOfCompare =
1184 getExpectedNumberOfCompare(NumCaseCluster);
1186 int64_t SwitchCost =
1187 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1188 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1191 void onMissedSimplification()
override {
1192 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1196 void onInitializeSROAArg(
AllocaInst *Arg)
override {
1198 SROACosts[Arg] = SROAArgCost;
1199 SROACostSavingOpportunities += SROAArgCost;
1202 void onAggregateSROAUse(
AllocaInst *Arg)
override {
1204 SROACostSavingOpportunities +=
InstrCost;
1207 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1209 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1210 Threshold -= SingleBBBonus;
1215 if (
Caller->hasMinSize()) {
1218 for (
Loop *L : LI) {
1220 if (DeadBlocks.
count(
L->getHeader()))
1222 increment(InlineCostFeatureIndex::num_loops,
1226 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1227 set(InlineCostFeatureIndex::simplified_instructions,
1228 NumInstructionsSimplified);
1229 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1230 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1231 NumConstantOffsetPtrArgs);
1232 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1234 if (NumVectorInstructions <= NumInstructions / 10)
1235 Threshold -= VectorBonus;
1236 else if (NumVectorInstructions <= NumInstructions / 2)
1237 Threshold -= VectorBonus / 2;
1239 set(InlineCostFeatureIndex::threshold, Threshold);
1244 bool shouldStop()
override {
return false; }
1246 void onLoadEliminationOpportunity()
override {
1247 increment(InlineCostFeatureIndex::load_elimination, 1);
1251 increment(InlineCostFeatureIndex::callsite_cost,
1254 set(InlineCostFeatureIndex::cold_cc_penalty,
1257 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1258 isSoleCallToLocalFunction(CandidateCall,
F));
1263 int SingleBBBonusPercent = 50;
1267 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1268 VectorBonus = Threshold * VectorBonusPercent / 100;
1269 Threshold += (SingleBBBonus + VectorBonus);
1275 InlineCostFeaturesAnalyzer(
1281 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI) {}
1289bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1290 return SROAArgValues.
count(V);
1293void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1294 onDisableSROA(SROAArg);
1295 EnabledSROAAllocas.
erase(SROAArg);
1296 disableLoadElimination();
1299void InlineCostAnnotationWriter::emitInstructionAnnot(
1304 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1306 OS <<
"; No analysis for the instruction";
1308 OS <<
"; cost before = " <<
Record->CostBefore
1309 <<
", cost after = " <<
Record->CostAfter
1310 <<
", threshold before = " <<
Record->ThresholdBefore
1311 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1312 OS <<
"cost delta = " <<
Record->getCostDelta();
1313 if (
Record->hasThresholdChanged())
1314 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1316 auto C = ICCA->getSimplifiedValue(
const_cast<Instruction *
>(
I));
1318 OS <<
", simplified to ";
1319 (*C)->print(
OS,
true);
1325void CallAnalyzer::disableSROA(
Value *V) {
1326 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1327 disableSROAForArg(SROAArg);
1331void CallAnalyzer::disableLoadElimination() {
1332 if (EnableLoadElimination) {
1333 onDisableLoadElimination();
1334 EnableLoadElimination =
false;
1343 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1347 GTI != GTE; ++GTI) {
1348 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1350 if (
Constant *SimpleOp = SimplifiedValues.
lookup(GTI.getOperand()))
1351 OpC = dyn_cast<ConstantInt>(SimpleOp);
1358 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1365 APInt TypeSize(IntPtrWidth,
DL.getTypeAllocSize(GTI.getIndexedType()));
1377 for (
const Use &
Op :
GEP.indices())
1388 disableSROA(
I.getOperand(0));
1392 if (
I.isArrayAllocation()) {
1394 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(
Size)) {
1403 Type *Ty =
I.getAllocatedType();
1405 AllocSize->getLimitedValue(),
1406 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1408 HasDynamicAlloca =
true;
1414 if (
I.isStaticAlloca()) {
1415 Type *Ty =
I.getAllocatedType();
1416 AllocatedSize =
SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinValue(),
1424 if (!
I.isStaticAlloca())
1425 HasDynamicAlloca =
true;
1430bool CallAnalyzer::visitPHI(
PHINode &
I) {
1442 bool CheckSROA =
I.getType()->isPointerTy();
1446 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1447 Value *FirstV =
nullptr;
1449 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1452 if (DeadBlocks.
count(Pred))
1456 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1457 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1460 Value *
V =
I.getIncomingValue(i);
1467 C = SimplifiedValues.
lookup(V);
1469 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1470 if (!
C && CheckSROA)
1471 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1473 if (!
C && !BaseAndOffset.first)
1490 if (FirstBaseAndOffset == BaseAndOffset)
1504 FirstBaseAndOffset = BaseAndOffset;
1509 SimplifiedValues[&
I] = FirstC;
1514 if (FirstBaseAndOffset.first) {
1515 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1517 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1518 SROAArgValues[&
I] = SROAArg;
1530 std::pair<Value *, APInt> BaseAndOffset =
1531 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1532 if (!BaseAndOffset.first)
1537 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1541 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1547 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1551 for (
const Use &
Op :
GEP.indices())
1552 if (!isa<Constant>(
Op) && !SimplifiedValues.
lookup(
Op))
1561 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1563 SROAArgValues[&
I] = SROAArg;
1571 disableSROAForArg(SROAArg);
1572 return isGEPFree(
I);
1576bool CallAnalyzer::simplifyInstruction(
Instruction &
I) {
1589 SimplifiedValues[&
I] =
C;
1602bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1604 auto *
C = dyn_cast<Constant>(Arg);
1607 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(Arg));
1614bool CallAnalyzer::simplifyIntrinsicCallObjectSize(
CallBase &CB) {
1622 Constant *
C = dyn_cast_or_null<Constant>(V);
1624 SimplifiedValues[&CB] =
C;
1634 std::pair<Value *, APInt> BaseAndOffset =
1635 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1637 if (BaseAndOffset.first)
1638 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1641 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1642 SROAArgValues[&
I] = SROAArg;
1655 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1656 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1657 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1658 std::pair<Value *, APInt> BaseAndOffset =
1659 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1660 if (BaseAndOffset.first)
1661 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1671 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1672 SROAArgValues[&
I] = SROAArg;
1686 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1687 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1688 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1689 if (BaseAndOffset.first)
1690 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1694 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1695 SROAArgValues[&
I] = SROAArg;
1701bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1708 disableSROA(
I.getOperand(0));
1713 switch (
I.getOpcode()) {
1714 case Instruction::FPTrunc:
1715 case Instruction::FPExt:
1716 case Instruction::UIToFP:
1717 case Instruction::SIToFP:
1718 case Instruction::FPToUI:
1719 case Instruction::FPToSI:
1735bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1741 if (
Argument *
A = dyn_cast<Argument>(V))
1742 if (paramHasAttr(
A, Attribute::NonNull))
1748 if (isAllocaDerivedArg(V))
1757bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
1773 if (
InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1774 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1776 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
1782bool InlineCostCallAnalyzer::isColdCallSite(
CallBase &Call,
1786 if (PSI && PSI->hasProfileSummary())
1787 return PSI->isColdCallSite(Call, CallerBFI);
1798 auto CallSiteBB =
Call.getParent();
1799 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
1800 auto CallerEntryFreq =
1802 return CallSiteFreq < CallerEntryFreq * ColdProb;
1806InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
1811 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1817 return std::nullopt;
1827 if (Limit && CallSiteFreq >= *Limit)
1831 return std::nullopt;
1834void InlineCostCallAnalyzer::updateThreshold(
CallBase &Call,
Function &Callee) {
1836 if (!allowSizeGrowth(Call)) {
1844 auto MinIfValid = [](
int A, std::optional<int>
B) {
1845 return B ? std::min(
A, *
B) :
A;
1849 auto MaxIfValid = [](
int A, std::optional<int>
B) {
1850 return B ? std::max(
A, *
B) :
A;
1865 int SingleBBBonusPercent = 50;
1870 auto DisallowAllBonuses = [&]() {
1871 SingleBBBonusPercent = 0;
1872 VectorBonusPercent = 0;
1878 if (
Caller->hasMinSize()) {
1884 SingleBBBonusPercent = 0;
1885 VectorBonusPercent = 0;
1886 }
else if (
Caller->hasOptSize())
1891 if (!
Caller->hasMinSize()) {
1892 if (
Callee.hasFnAttribute(Attribute::InlineHint))
1917 DisallowAllBonuses();
1922 if (PSI->isFunctionEntryHot(&Callee)) {
1928 }
else if (PSI->isFunctionEntryCold(&Callee)) {
1934 DisallowAllBonuses();
1946 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1947 VectorBonus = Threshold * VectorBonusPercent / 100;
1952 if (isSoleCallToLocalFunction(Call,
F)) {
1958bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
1964 if (
I.getOpcode() == Instruction::FCmp)
1969 Value *LHSBase, *RHSBase;
1970 APInt LHSOffset, RHSOffset;
1971 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
1973 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
1974 if (RHSBase && LHSBase == RHSBase) {
1980 SimplifiedValues[&
I] =
C;
1981 ++NumConstantPtrCmps;
1987 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
1988 for (
auto *
User :
I.users())
1989 if (
auto *Instr = dyn_cast<Instruction>(
User))
1990 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
1997 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1))) {
1998 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2006 if (isImplicitNullCheckCmp(
I))
2009 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
2016 Value *LHSBase, *RHSBase;
2017 APInt LHSOffset, RHSOffset;
2018 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2020 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2021 if (RHSBase && LHSBase == RHSBase) {
2027 SimplifiedValues[&
I] =
C;
2028 ++NumConstantPtrDiffs;
2036 return Base::visitSub(
I);
2041 Constant *CLHS = dyn_cast<Constant>(LHS);
2043 CLHS = SimplifiedValues.
lookup(LHS);
2044 Constant *CRHS = dyn_cast<Constant>(RHS);
2046 CRHS = SimplifiedValues.
lookup(RHS);
2048 Value *SimpleV =
nullptr;
2049 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
2050 SimpleV =
simplifyBinOp(
I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2051 FI->getFastMathFlags(),
DL);
2056 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2057 SimplifiedValues[&
I] =
C;
2070 if (
I.getType()->isFloatingPointTy() &&
2085 COp ? COp :
Op, cast<FPMathOperator>(
I).getFastMathFlags(),
DL);
2087 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2088 SimplifiedValues[&
I] =
C;
2099bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2100 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2106 if (EnableLoadElimination &&
2107 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2108 onLoadEliminationOpportunity();
2117 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2128 disableLoadElimination();
2140 return Base::visitExtractValue(
I);
2149 return Base::visitInsertValue(
I);
2172 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
I));
2179 SimplifiedValues[&
Call] =
C;
2186bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2187 if (!onCallBaseVisitStart(Call))
2190 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2191 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2193 ExposesReturnsTwice =
true;
2196 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2197 ContainsNoDuplicateCall =
true;
2200 bool IsIndirectCall = !
F;
2201 if (IsIndirectCall) {
2205 F = dyn_cast_or_null<Function>(SimplifiedValues.
lookup(Callee));
2206 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2207 onCallArgumentSetup(Call);
2209 if (!
Call.onlyReadsMemory())
2210 disableLoadElimination();
2211 return Base::visitCallBase(Call);
2215 assert(
F &&
"Expected a call to a known function");
2218 if (simplifyCallSite(
F, Call))
2224 switch (II->getIntrinsicID()) {
2227 disableLoadElimination();
2228 return Base::visitCallBase(Call);
2230 case Intrinsic::load_relative:
2231 onLoadRelativeIntrinsic();
2234 case Intrinsic::memset:
2235 case Intrinsic::memcpy:
2236 case Intrinsic::memmove:
2237 disableLoadElimination();
2240 case Intrinsic::icall_branch_funnel:
2241 case Intrinsic::localescape:
2242 HasUninlineableIntrinsic =
true;
2244 case Intrinsic::vastart:
2245 InitsVargArgs =
true;
2247 case Intrinsic::launder_invariant_group:
2248 case Intrinsic::strip_invariant_group:
2249 if (
auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2250 SROAArgValues[II] = SROAArg;
2252 case Intrinsic::is_constant:
2253 return simplifyIntrinsicCallIsConstant(Call);
2254 case Intrinsic::objectsize:
2255 return simplifyIntrinsicCallObjectSize(Call);
2259 if (
F ==
Call.getFunction()) {
2262 IsRecursiveCall =
true;
2263 if (!AllowRecursiveCall)
2268 onLoweredCall(
F, Call, IsIndirectCall);
2271 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2272 disableLoadElimination();
2273 return Base::visitCallBase(Call);
2276bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2278 bool Free = !HasReturn;
2283bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2290 isa_and_nonnull<ConstantInt>(
2294bool CallAnalyzer::visitSelectInst(
SelectInst &SI) {
2295 bool CheckSROA =
SI.getType()->isPointerTy();
2299 Constant *TrueC = dyn_cast<Constant>(TrueVal);
2301 TrueC = SimplifiedValues.
lookup(TrueVal);
2302 Constant *FalseC = dyn_cast<Constant>(FalseVal);
2304 FalseC = SimplifiedValues.
lookup(FalseVal);
2306 dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
SI.getCondition()));
2310 if (TrueC == FalseC && TrueC) {
2311 SimplifiedValues[&
SI] = TrueC;
2316 return Base::visitSelectInst(SI);
2318 std::pair<Value *, APInt> TrueBaseAndOffset =
2319 ConstantOffsetPtrs.
lookup(TrueVal);
2320 std::pair<Value *, APInt> FalseBaseAndOffset =
2321 ConstantOffsetPtrs.
lookup(FalseVal);
2322 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2323 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2325 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2326 SROAArgValues[&
SI] = SROAArg;
2330 return Base::visitSelectInst(SI);
2341 if (TrueC && FalseC) {
2343 SimplifiedValues[&
SI] =
C;
2347 return Base::visitSelectInst(SI);
2351 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2352 SimplifiedValues[&
SI] = SelectedC;
2359 std::pair<Value *, APInt> BaseAndOffset =
2360 ConstantOffsetPtrs.
lookup(SelectedV);
2361 if (BaseAndOffset.first) {
2362 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2364 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2365 SROAArgValues[&
SI] = SROAArg;
2371bool CallAnalyzer::visitSwitchInst(
SwitchInst &SI) {
2374 if (isa<ConstantInt>(
SI.getCondition()))
2377 if (isa<ConstantInt>(V))
2392 unsigned JumpTableSize = 0;
2394 unsigned NumCaseCluster =
2397 onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2410 HasIndirectBr =
true;
2414bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2448 for (
const Use &
Op :
I.operands())
2473 if (
I.isDebugOrPseudoInst())
2481 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2482 ++NumVectorInstructions;
2489 onInstructionAnalysisStart(&
I);
2491 if (Base::visit(&
I))
2492 ++NumInstructionsSimplified;
2494 onMissedSimplification();
2496 onInstructionAnalysisFinish(&
I);
2497 using namespace ore;
2500 if (IsRecursiveCall && !AllowRecursiveCall)
2502 else if (ExposesReturnsTwice)
2504 else if (HasDynamicAlloca)
2506 else if (HasIndirectBr)
2508 else if (HasUninlineableIntrinsic)
2510 else if (InitsVargArgs)
2512 if (!
IR.isSuccess()) {
2517 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2518 <<
NV(
"InlineResult",
IR.getFailureReason())
2519 <<
") and cost is not fully computed";
2534 <<
NV(
"Callee", &
F) <<
" is "
2535 <<
NV(
"InlineResult",
IR.getFailureReason())
2536 <<
". Cost is not fully computed";
2543 "Call site analysis is not favorable to inlining.");
2555ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2556 if (!
V->getType()->isPointerTy())
2559 unsigned AS =
V->getType()->getPointerAddressSpace();
2560 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2569 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2571 V =
GEP->getPointerOperand();
2573 V = cast<Operator>(V)->getOperand(0);
2574 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2575 if (GA->isInterposable())
2577 V = GA->getAliasee();
2581 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2582 }
while (Visited.
insert(V).second);
2584 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2599 return (DeadBlocks.
count(Pred) ||
2600 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2605 return (!DeadBlocks.
count(BB) &&
2611 if (Succ == NextBB || !IsNewlyDead(Succ))
2615 while (!NewDead.
empty()) {
2617 if (DeadBlocks.
insert(Dead).second)
2636 auto Result = onAnalysisStart();
2647 if (Call &&
Call->getFunction() == Caller) {
2648 IsCallerRecursive =
true;
2658 if (
Constant *
C = dyn_cast<Constant>(CAI))
2659 SimplifiedValues[&FAI] =
C;
2661 Value *PtrArg = *CAI;
2662 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2663 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2666 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2667 SROAArgValues[&FAI] = SROAArg;
2668 onInitializeSROAArg(SROAArg);
2669 EnabledSROAAllocas.
insert(SROAArg);
2674 NumConstantArgs = SimplifiedValues.
size();
2675 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2676 NumAllocaArgs = SROAArgValues.
size();
2693 BBWorklist.
insert(&
F.getEntryBlock());
2716 if (!isa<CallBrInst>(*U))
2722 if (!
IR.isSuccess())
2729 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2733 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2735 BBWorklist.
insert(NextBB);
2736 KnownSuccessors[BB] = NextBB;
2737 findDeadBlocks(BB, NextBB);
2741 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2744 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2745 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2746 BBWorklist.
insert(NextBB);
2747 KnownSuccessors[BB] = NextBB;
2748 findDeadBlocks(BB, NextBB);
2759 onBlockAnalyzed(BB);
2765 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
2775 FinalStackSizeThreshold = *AttrMaxStackSize;
2776 if (AllocatedSize > FinalStackSizeThreshold)
2779 return finalizeAnalysis();
2783#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2785 F.print(
OS, &Writer);
2799#undef DEBUG_PRINT_STAT
2802#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2816 auto CalleeTLI = GetTLI(*Callee);
2817 return GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2824 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I) {
2825 if (Call.isByValArgument(
I)) {
2828 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2829 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
2831 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
2833 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
2841 NumStores = std::min(NumStores, 8U);
2853 return std::min<int64_t>(
Cost, INT_MAX);
2862 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2863 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2882 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2883 GetAssumptionCache, GetBFI, PSI, ORE,
true,
2885 auto R = CA.analyze();
2887 return std::nullopt;
2888 return CA.getCost();
2896 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2897 ORE, *Call.getCalledFunction(), Call);
2898 auto R = CFA.analyze();
2900 return std::nullopt;
2901 return CFA.features();
2916 if (Callee->isPresplitCoroutine())
2924 unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2925 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I)
2926 if (Call.isByValArgument(
I)) {
2927 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2934 Function *Caller = Call.getCaller();
2941 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2942 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
2946 if (IsViable.isSuccess())
2962 if (Caller->hasOptNone())
2967 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2971 if (Callee->isInterposable())
2975 if (Callee->hasFnAttribute(Attribute::NoInline))
2979 if (Call.isNoInline())
2982 return std::nullopt;
2997 if (UserDecision->isSuccess())
3003 <<
"... (caller:" << Call.getCaller()->getName()
3006 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3007 GetAssumptionCache, GetBFI, PSI, ORE);
3015 if (CA.wasDecidedByCostBenefit()) {
3018 CA.getCostBenefitPair());
3023 if (CA.wasDecidedByCostThreshold())
3025 CA.getStaticBonusApplied());
3034 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3044 if (!isa<CallBrInst>(*U))
3047 for (
auto &II : BB) {
3048 CallBase *Call = dyn_cast<CallBase>(&II);
3053 Function *Callee = Call->getCalledFunction();
3059 if (!ReturnsTwice && isa<CallInst>(Call) &&
3060 cast<CallInst>(Call)->canReturnTwice())
3064 switch (Callee->getIntrinsicID()) {
3067 case llvm::Intrinsic::icall_branch_funnel:
3071 "disallowed inlining of @llvm.icall.branch.funnel");
3072 case llvm::Intrinsic::localescape:
3076 "disallowed inlining of @llvm.localescape");
3077 case llvm::Intrinsic::vastart:
3081 "contains VarArgs initialized with va_start");
3152 unsigned SizeOptLevel) {
3155 if (SizeOptLevel == 1)
3157 if (SizeOptLevel == 2)
3193 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
3194 Function *CalledFunction = CI->getCalledFunction();
3198 InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params,
TTI,
3199 GetAssumptionCache,
nullptr, &PSI, &ORE);
3201 OS <<
" Analyzing call of " << CalledFunction->
getName()
3202 <<
"... (caller:" << CI->getCaller()->getName() <<
")\n";
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
SetVector< BasicBlock * > BBSetVector
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
mir Rename Register Operands
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &)
emitInstructionAnnot - This may be implemented to emit a string right before an instruction is emitte...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
StringRef getValueAsString() const
Return the attribute's value as a string.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getEntryFreq() const
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Type * getReturnType() const
Class to represent profile counts.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Indirect Branch Instruction.
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
RetTy visitIndirectBrInst(IndirectBrInst &I)
RetTy visitCmpInst(CmpInst &I)
RetTy visitCallBase(CallBase &I)
RetTy visitCleanupReturnInst(CleanupReturnInst &I)
RetTy visitUnreachableInst(UnreachableInst &I)
RetTy visitSwitchInst(SwitchInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitReturnInst(ReturnInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitResumeInst(ResumeInst &I)
RetTy visitCatchReturnInst(CatchReturnInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitBranchInst(BranchInst &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class represents a cast from an integer to a pointer.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis providing profile information.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ C
The default llvm calling convention, compatible with C.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const int LastCallToStaticBonus
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Function::ProfileCount ProfileCount
std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
gep_type_iterator gep_type_end(const User *GEP)
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.