31#include "llvm/Config/llvm-config.h"
52#define DEBUG_TYPE "inline-cost"
54STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
58 cl::desc(
"Default amount of inlining to perform"));
67 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
68 "during inline cost calculation"));
72 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
76 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
80 cl::desc(
"Threshold for inlining functions with inline hint"));
85 cl::desc(
"Threshold for inlining cold callsites"));
89 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
93 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
97 cl::desc(
"The maximum size of a callee that get's "
98 "inlined without sufficient cycle savings"));
105 cl::desc(
"Threshold for inlining functions with cold attribute"));
109 cl::desc(
"Threshold for hot callsites "));
113 cl::desc(
"Threshold for locally hot callsites "));
117 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
118 "entry frequency, for a callsite to be cold in the absence of "
119 "profile information."));
123 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
124 "entry frequency, for a callsite to be hot in the absence of "
125 "profile information."));
129 cl::desc(
"Cost of a single instruction when inlining"));
133 cl::desc(
"Cost of load/store instruction when inlining"));
137 cl::desc(
"Call penalty that is applied per callsite when inlining"));
141 cl::init(std::numeric_limits<size_t>::max()),
142 cl::desc(
"Do not inline functions with a stack size "
143 "that exceeds the specified limit"));
148 cl::desc(
"Do not inline recursive functions with a stack "
149 "size that exceeds the specified limit"));
153 cl::desc(
"Compute the full inline cost of a call site even when the cost "
154 "exceeds the threshold."));
158 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
163 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
183namespace InlineConstants {
191class InlineCostCallAnalyzer;
195struct InstructionCostDetail {
198 int ThresholdBefore = 0;
199 int ThresholdAfter = 0;
201 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
203 int getCostDelta()
const {
return CostAfter - CostBefore; }
205 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
210 InlineCostCallAnalyzer *
const ICCA;
213 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
226class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
231 virtual ~CallAnalyzer() =
default;
260 virtual void onBlockStart(
const BasicBlock *BB) {}
263 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
266 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
269 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
279 virtual bool shouldStop() {
return false; }
291 virtual void onDisableLoadElimination() {}
295 virtual bool onCallBaseVisitStart(
CallBase &Call) {
return true; }
298 virtual void onCallPenalty() {}
301 virtual void onMemAccess(){};
305 virtual void onLoadEliminationOpportunity() {}
309 virtual void onCallArgumentSetup(
const CallBase &Call) {}
312 virtual void onLoadRelativeIntrinsic() {}
320 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
324 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
328 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
329 unsigned NumCaseCluster) {}
333 virtual void onMissedSimplification() {}
339 virtual void onAggregateSROAUse(
AllocaInst *V) {}
341 bool handleSROA(
Value *V,
bool DoNotDisable) {
343 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
345 onAggregateSROAUse(SROAArg);
348 disableSROAForArg(SROAArg);
353 bool IsCallerRecursive =
false;
354 bool IsRecursiveCall =
false;
355 bool ExposesReturnsTwice =
false;
356 bool HasDynamicAlloca =
false;
357 bool ContainsNoDuplicateCall =
false;
358 bool HasReturn =
false;
359 bool HasIndirectBr =
false;
360 bool HasUninlineableIntrinsic =
false;
361 bool InitsVargArgs =
false;
365 unsigned NumInstructions = 0;
366 unsigned NumVectorInstructions = 0;
397 bool EnableLoadElimination =
true;
400 bool AllowRecursiveCall =
false;
405 auto It = SROAArgValues.
find(V);
406 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
412 bool isAllocaDerivedArg(
Value *V);
414 void disableSROA(
Value *V);
416 void disableLoadElimination();
422 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
423 bool simplifyIntrinsicCallObjectSize(
CallBase &CB);
435 bool isKnownNonNullInCallee(
Value *V);
438 bool allowSizeGrowth(
CallBase &Call);
489 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
491 CandidateCall(
Call) {}
495 std::optional<Constant *> getSimplifiedValue(
Instruction *
I) {
497 return SimplifiedValues[
I];
503 unsigned NumConstantArgs = 0;
504 unsigned NumConstantOffsetPtrArgs = 0;
505 unsigned NumAllocaArgs = 0;
506 unsigned NumConstantPtrCmps = 0;
507 unsigned NumConstantPtrDiffs = 0;
508 unsigned NumInstructionsSimplified = 0;
528int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
529 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
534class InlineCostCallAnalyzer final :
public CallAnalyzer {
535 const bool ComputeFullInlineCost;
536 int LoadEliminationCost = 0;
541 int SingleBBBonus = 0;
556 int StaticBonusApplied = 0;
559 const bool BoostIndirectCalls;
562 const bool IgnoreThreshold;
565 const bool CostBenefitAnalysisEnabled;
576 int CostAtBBStart = 0;
583 bool DecidedByCostThreshold =
false;
586 bool DecidedByCostBenefit =
false;
591 bool SingleBB =
true;
593 unsigned SROACostSavings = 0;
594 unsigned SROACostSavingsLost = 0;
610 std::optional<int> getHotCallSiteThreshold(
CallBase &Call,
614 void addCost(int64_t Inc) {
615 Inc = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc), INT_MIN);
616 Cost = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc + Cost), INT_MIN);
620 auto CostIt = SROAArgCosts.
find(
Arg);
621 if (CostIt == SROAArgCosts.
end())
623 addCost(CostIt->second);
624 SROACostSavings -= CostIt->second;
625 SROACostSavingsLost += CostIt->second;
626 SROAArgCosts.
erase(CostIt);
629 void onDisableLoadElimination()
override {
630 addCost(LoadEliminationCost);
631 LoadEliminationCost = 0;
634 bool onCallBaseVisitStart(
CallBase &Call)
override {
635 if (std::optional<int> AttrCallThresholdBonus =
637 Threshold += *AttrCallThresholdBonus;
639 if (std::optional<int> AttrCallCost =
641 addCost(*AttrCallCost);
649 void onCallPenalty()
override { addCost(
CallPenalty); }
653 void onCallArgumentSetup(
const CallBase &Call)
override {
658 void onLoadRelativeIntrinsic()
override {
663 bool IsIndirectCall)
override {
672 if (IsIndirectCall && BoostIndirectCalls) {
673 auto IndirectCallParams = Params;
678 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
679 GetAssumptionCache, GetBFI, PSI, ORE,
false);
680 if (CA.analyze().isSuccess()) {
683 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
690 void onFinalizeSwitch(
unsigned JumpTableSize,
691 unsigned NumCaseCluster)
override {
703 if (NumCaseCluster <= 3) {
709 int64_t ExpectedNumberOfCompare =
710 getExpectedNumberOfCompare(NumCaseCluster);
711 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
715 void onMissedSimplification()
override { addCost(
InstrCost); }
719 "Should not initialize SROA costs for null value.");
720 SROAArgCosts[
Arg] = 0;
723 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
724 auto CostIt = SROAArgCosts.
find(SROAArg);
726 "expected this argument to have a cost");
731 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart =
Cost; }
733 void onBlockAnalyzed(
const BasicBlock *BB)
override {
734 if (CostBenefitAnalysisEnabled) {
737 assert(GetBFI &&
"GetBFI must be available");
739 assert(BFI &&
"BFI must be available");
742 ColdSize +=
Cost - CostAtBBStart;
750 if (SingleBB && TI->getNumSuccessors() > 1) {
752 Threshold -= SingleBBBonus;
757 void onInstructionAnalysisStart(
const Instruction *
I)
override {
762 InstructionCostDetailMap[
I].CostBefore =
Cost;
763 InstructionCostDetailMap[
I].ThresholdBefore = Threshold;
766 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
771 InstructionCostDetailMap[
I].CostAfter =
Cost;
772 InstructionCostDetailMap[
I].ThresholdAfter = Threshold;
775 bool isCostBenefitAnalysisEnabled() {
776 if (!PSI || !PSI->hasProfileSummary())
788 if (!PSI->hasInstrumentationProfile())
793 if (!
Caller->getEntryCount())
801 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
805 auto EntryCount =
F.getEntryCount();
806 if (!EntryCount || !EntryCount->getCount())
819 std::optional<bool> costBenefitAnalysis() {
820 if (!CostBenefitAnalysisEnabled)
844 APInt CycleSavings(128, 0);
847 APInt CurrentSavings(128, 0);
851 if (BI->isConditional() &&
852 isa_and_nonnull<ConstantInt>(
853 SimplifiedValues.
lookup(BI->getCondition()))) {
856 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
858 if (SimplifiedValues.
count(V)) {
866 CycleSavings += CurrentSavings;
870 auto EntryProfileCount =
F.getEntryCount();
871 assert(EntryProfileCount && EntryProfileCount->getCount());
872 auto EntryCount = EntryProfileCount->getCount();
873 CycleSavings += EntryCount / 2;
874 CycleSavings = CycleSavings.udiv(EntryCount);
877 auto *CallerBB = CandidateCall.
getParent();
902 APInt RHS(128, PSI->getOrCompHotCountThreshold());
914 if (
Caller->hasMinSize()) {
920 if (DeadBlocks.
count(
L->getHeader()))
930 if (NumVectorInstructions <= NumInstructions / 10)
931 Threshold -= VectorBonus;
932 else if (NumVectorInstructions <= NumInstructions / 2)
933 Threshold -= VectorBonus / 2;
935 if (std::optional<int> AttrCost =
942 Cost *= *AttrCostMult;
944 if (std::optional<int> AttrThreshold =
946 Threshold = *AttrThreshold;
948 if (
auto Result = costBenefitAnalysis()) {
949 DecidedByCostBenefit =
true;
959 DecidedByCostThreshold =
true;
960 return Cost < std::max(1, Threshold)
965 bool shouldStop()
override {
966 if (IgnoreThreshold || ComputeFullInlineCost)
970 if (Cost < Threshold)
972 DecidedByCostThreshold =
true;
976 void onLoadEliminationOpportunity()
override {
991 assert(NumInstructions == 0);
992 assert(NumVectorInstructions == 0);
995 updateThreshold(CandidateCall,
F);
1001 assert(SingleBBBonus >= 0);
1002 assert(VectorBonus >= 0);
1007 Threshold += (SingleBBBonus + VectorBonus);
1021 if (Cost >= Threshold && !ComputeFullInlineCost)
1028 InlineCostCallAnalyzer(
1035 bool IgnoreThreshold =
false)
1036 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1038 Params.ComputeFullInlineCost || ORE ||
1039 isCostBenefitAnalysisEnabled()),
1041 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1042 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1048 InlineCostAnnotationWriter Writer;
1056 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1057 if (InstructionCostDetailMap.
contains(
I))
1058 return InstructionCostDetailMap[
I];
1059 return std::nullopt;
1062 virtual ~InlineCostCallAnalyzer() =
default;
1063 int getThreshold()
const {
return Threshold; }
1064 int getCost()
const {
return Cost; }
1065 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1066 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1067 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1068 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1072static bool isSoleCallToLocalFunction(
const CallBase &CB,
1074 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1078class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1085 static constexpr int JTCostMultiplier = 4;
1086 static constexpr int CaseClusterCostMultiplier = 2;
1087 static constexpr int SwitchCostMultiplier = 2;
1091 unsigned SROACostSavingOpportunities = 0;
1092 int VectorBonus = 0;
1093 int SingleBBBonus = 0;
1099 Cost[
static_cast<size_t>(Feature)] += Delta;
1103 Cost[
static_cast<size_t>(Feature)] =
Value;
1107 auto CostIt = SROACosts.
find(
Arg);
1108 if (CostIt == SROACosts.
end())
1111 increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1112 SROACostSavingOpportunities -= CostIt->second;
1113 SROACosts.
erase(CostIt);
1116 void onDisableLoadElimination()
override {
1117 set(InlineCostFeatureIndex::LoadElimination, 1);
1120 void onCallPenalty()
override {
1121 increment(InlineCostFeatureIndex::CallPenalty,
CallPenalty);
1124 void onCallArgumentSetup(
const CallBase &Call)
override {
1125 increment(InlineCostFeatureIndex::CallArgumentSetup,
1129 void onLoadRelativeIntrinsic()
override {
1130 increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 *
InstrCost);
1134 bool IsIndirectCall)
override {
1135 increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1138 if (IsIndirectCall) {
1152 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1153 GetAssumptionCache, GetBFI, PSI, ORE,
false,
1155 if (CA.analyze().isSuccess()) {
1156 increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1158 increment(InlineCostFeatureIndex::NestedInlines, 1);
1165 void onFinalizeSwitch(
unsigned JumpTableSize,
1166 unsigned NumCaseCluster)
override {
1168 if (JumpTableSize) {
1169 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1171 increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1175 if (NumCaseCluster <= 3) {
1176 increment(InlineCostFeatureIndex::CaseClusterPenalty,
1177 NumCaseCluster * CaseClusterCostMultiplier *
InstrCost);
1181 int64_t ExpectedNumberOfCompare =
1182 getExpectedNumberOfCompare(NumCaseCluster);
1184 int64_t SwitchCost =
1185 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1186 increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1189 void onMissedSimplification()
override {
1190 increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1197 SROACostSavingOpportunities +=
InstrCost;
1200 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1202 set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1203 Threshold -= SingleBBBonus;
1208 if (
Caller->hasMinSize()) {
1211 for (
Loop *L : LI) {
1213 if (DeadBlocks.
count(
L->getHeader()))
1215 increment(InlineCostFeatureIndex::NumLoops,
1219 set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.
size());
1220 set(InlineCostFeatureIndex::SimplifiedInstructions,
1221 NumInstructionsSimplified);
1222 set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1223 set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1224 NumConstantOffsetPtrArgs);
1225 set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1227 if (NumVectorInstructions <= NumInstructions / 10)
1228 Threshold -= VectorBonus;
1229 else if (NumVectorInstructions <= NumInstructions / 2)
1230 Threshold -= VectorBonus / 2;
1232 set(InlineCostFeatureIndex::Threshold, Threshold);
1237 bool shouldStop()
override {
return false; }
1239 void onLoadEliminationOpportunity()
override {
1240 increment(InlineCostFeatureIndex::LoadElimination, 1);
1244 increment(InlineCostFeatureIndex::CallSiteCost,
1247 set(InlineCostFeatureIndex::ColdCcPenalty,
1250 set(InlineCostFeatureIndex::LastCallToStaticBonus,
1251 isSoleCallToLocalFunction(CandidateCall,
F));
1256 int SingleBBBonusPercent = 50;
1260 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1261 VectorBonus = Threshold * VectorBonusPercent / 100;
1262 Threshold += (SingleBBBonus + VectorBonus);
1268 InlineCostFeaturesAnalyzer(
1274 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI) {}
1282bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1283 return SROAArgValues.
count(V);
1286void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1287 onDisableSROA(SROAArg);
1288 EnabledSROAAllocas.
erase(SROAArg);
1289 disableLoadElimination();
1292void InlineCostAnnotationWriter::emitInstructionAnnot(
1297 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1299 OS <<
"; No analysis for the instruction";
1301 OS <<
"; cost before = " <<
Record->CostBefore
1302 <<
", cost after = " <<
Record->CostAfter
1303 <<
", threshold before = " <<
Record->ThresholdBefore
1304 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1305 OS <<
"cost delta = " <<
Record->getCostDelta();
1306 if (
Record->hasThresholdChanged())
1307 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1309 auto C = ICCA->getSimplifiedValue(
const_cast<Instruction *
>(
I));
1311 OS <<
", simplified to ";
1312 (*C)->print(
OS,
true);
1318void CallAnalyzer::disableSROA(
Value *V) {
1319 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1320 disableSROAForArg(SROAArg);
1324void CallAnalyzer::disableLoadElimination() {
1325 if (EnableLoadElimination) {
1326 onDisableLoadElimination();
1327 EnableLoadElimination =
false;
1336 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1340 GTI != GTE; ++GTI) {
1341 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1343 if (
Constant *SimpleOp = SimplifiedValues.
lookup(GTI.getOperand()))
1344 OpC = dyn_cast<ConstantInt>(SimpleOp);
1351 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1358 APInt TypeSize(IntPtrWidth,
DL.getTypeAllocSize(GTI.getIndexedType()));
1370 for (
const Use &Op :
GEP.indices())
1381 disableSROA(
I.getOperand(0));
1385 if (
I.isArrayAllocation()) {
1387 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(
Size)) {
1396 Type *Ty =
I.getAllocatedType();
1398 AllocSize->getLimitedValue(),
1399 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1401 HasDynamicAlloca =
true;
1407 if (
I.isStaticAlloca()) {
1408 Type *Ty =
I.getAllocatedType();
1409 AllocatedSize =
SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinValue(),
1417 if (!
I.isStaticAlloca())
1418 HasDynamicAlloca =
true;
1423bool CallAnalyzer::visitPHI(
PHINode &
I) {
1435 bool CheckSROA =
I.getType()->isPointerTy();
1439 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1440 Value *FirstV =
nullptr;
1442 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1445 if (DeadBlocks.
count(Pred))
1449 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1450 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1453 Value *
V =
I.getIncomingValue(i);
1460 C = SimplifiedValues.
lookup(V);
1462 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1463 if (!
C && CheckSROA)
1464 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1466 if (!
C && !BaseAndOffset.first)
1483 if (FirstBaseAndOffset == BaseAndOffset)
1497 FirstBaseAndOffset = BaseAndOffset;
1502 SimplifiedValues[&
I] = FirstC;
1507 if (FirstBaseAndOffset.first) {
1508 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1510 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1511 SROAArgValues[&
I] = SROAArg;
1523 std::pair<Value *, APInt> BaseAndOffset =
1524 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1525 if (!BaseAndOffset.first)
1530 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1534 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1540 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1544 for (
const Use &Op :
GEP.indices())
1545 if (!isa<Constant>(Op) && !SimplifiedValues.
lookup(Op))
1554 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1556 SROAArgValues[&
I] = SROAArg;
1564 disableSROAForArg(SROAArg);
1565 return isGEPFree(
I);
1569bool CallAnalyzer::simplifyInstruction(
Instruction &
I) {
1571 for (
Value *Op :
I.operands()) {
1572 Constant *COp = dyn_cast<Constant>(Op);
1574 COp = SimplifiedValues.
lookup(Op);
1582 SimplifiedValues[&
I] =
C;
1595bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1597 auto *
C = dyn_cast<Constant>(
Arg);
1600 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
Arg));
1607bool CallAnalyzer::simplifyIntrinsicCallObjectSize(
CallBase &CB) {
1615 Constant *
C = dyn_cast_or_null<Constant>(V);
1617 SimplifiedValues[&CB] =
C;
1627 std::pair<Value *, APInt> BaseAndOffset =
1628 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1630 if (BaseAndOffset.first)
1631 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1634 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1635 SROAArgValues[&
I] = SROAArg;
1648 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1649 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1650 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1651 std::pair<Value *, APInt> BaseAndOffset =
1652 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1653 if (BaseAndOffset.first)
1654 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1664 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1665 SROAArgValues[&
I] = SROAArg;
1679 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1680 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1681 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(Op);
1682 if (BaseAndOffset.first)
1683 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1687 if (
auto *SROAArg = getSROAArgForValueOrNull(Op))
1688 SROAArgValues[&
I] = SROAArg;
1694bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1701 disableSROA(
I.getOperand(0));
1706 switch (
I.getOpcode()) {
1707 case Instruction::FPTrunc:
1708 case Instruction::FPExt:
1709 case Instruction::UIToFP:
1710 case Instruction::SIToFP:
1711 case Instruction::FPToUI:
1712 case Instruction::FPToSI:
1728bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1734 if (
Argument *
A = dyn_cast<Argument>(V))
1735 if (paramHasAttr(
A, Attribute::NonNull))
1741 if (isAllocaDerivedArg(V))
1750bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
1766 if (
InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1767 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1769 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
1775bool InlineCostCallAnalyzer::isColdCallSite(
CallBase &Call,
1779 if (PSI && PSI->hasProfileSummary())
1780 return PSI->isColdCallSite(Call, CallerBFI);
1791 auto CallSiteBB =
Call.getParent();
1792 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
1793 auto CallerEntryFreq =
1795 return CallSiteFreq < CallerEntryFreq * ColdProb;
1799InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
1804 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1810 return std::nullopt;
1816 auto CallSiteBB =
Call.getParent();
1823 return std::nullopt;
1828 if (!allowSizeGrowth(Call)) {
1836 auto MinIfValid = [](
int A, std::optional<int>
B) {
1837 return B ? std::min(
A, *
B) :
A;
1841 auto MaxIfValid = [](
int A, std::optional<int>
B) {
1842 return B ? std::max(
A, *
B) :
A;
1857 int SingleBBBonusPercent = 50;
1862 auto DisallowAllBonuses = [&]() {
1863 SingleBBBonusPercent = 0;
1864 VectorBonusPercent = 0;
1870 if (
Caller->hasMinSize()) {
1876 SingleBBBonusPercent = 0;
1877 VectorBonusPercent = 0;
1878 }
else if (
Caller->hasOptSize())
1883 if (!
Caller->hasMinSize()) {
1884 if (
Callee.hasFnAttribute(Attribute::InlineHint))
1909 DisallowAllBonuses();
1914 if (PSI->isFunctionEntryHot(&
Callee)) {
1920 }
else if (PSI->isFunctionEntryCold(&
Callee)) {
1926 DisallowAllBonuses();
1938 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1939 VectorBonus = Threshold * VectorBonusPercent / 100;
1944 if (isSoleCallToLocalFunction(Call,
F)) {
1950bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
1956 if (
I.getOpcode() == Instruction::FCmp)
1961 Value *LHSBase, *RHSBase;
1962 APInt LHSOffset, RHSOffset;
1963 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
1965 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
1966 if (RHSBase && LHSBase == RHSBase) {
1972 SimplifiedValues[&
I] =
C;
1973 ++NumConstantPtrCmps;
1981 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1)) &&
1982 isKnownNonNullInCallee(
I.getOperand(0))) {
1988 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
1995 Value *LHSBase, *RHSBase;
1996 APInt LHSOffset, RHSOffset;
1997 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
1999 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2000 if (RHSBase && LHSBase == RHSBase) {
2006 SimplifiedValues[&
I] =
C;
2007 ++NumConstantPtrDiffs;
2015 return Base::visitSub(
I);
2020 Constant *CLHS = dyn_cast<Constant>(LHS);
2022 CLHS = SimplifiedValues.
lookup(LHS);
2023 Constant *CRHS = dyn_cast<Constant>(RHS);
2025 CRHS = SimplifiedValues.
lookup(RHS);
2027 Value *SimpleV =
nullptr;
2028 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
2029 SimpleV =
simplifyBinOp(
I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2030 FI->getFastMathFlags(),
DL);
2035 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2036 SimplifiedValues[&
I] =
C;
2049 if (
I.getType()->isFloatingPointTy() &&
2059 Constant *COp = dyn_cast<Constant>(Op);
2061 COp = SimplifiedValues.
lookup(Op);
2064 COp ? COp : Op, cast<FPMathOperator>(
I).getFastMathFlags(),
DL);
2066 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2067 SimplifiedValues[&
I] =
C;
2078bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2079 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2085 if (EnableLoadElimination &&
2086 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2087 onLoadEliminationOpportunity();
2096 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2107 disableLoadElimination();
2119 return Base::visitExtractValue(
I);
2128 return Base::visitInsertValue(
I);
2151 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
I));
2158 SimplifiedValues[&
Call] =
C;
2165bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2166 if (!onCallBaseVisitStart(Call))
2169 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2170 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2172 ExposesReturnsTwice =
true;
2175 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2176 ContainsNoDuplicateCall =
true;
2179 bool IsIndirectCall = !
F;
2180 if (IsIndirectCall) {
2184 F = dyn_cast_or_null<Function>(SimplifiedValues.
lookup(
Callee));
2185 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2186 onCallArgumentSetup(Call);
2188 if (!
Call.onlyReadsMemory())
2189 disableLoadElimination();
2190 return Base::visitCallBase(Call);
2194 assert(
F &&
"Expected a call to a known function");
2197 if (simplifyCallSite(
F, Call))
2203 switch (II->getIntrinsicID()) {
2206 disableLoadElimination();
2207 return Base::visitCallBase(Call);
2209 case Intrinsic::load_relative:
2210 onLoadRelativeIntrinsic();
2213 case Intrinsic::memset:
2214 case Intrinsic::memcpy:
2215 case Intrinsic::memmove:
2216 disableLoadElimination();
2219 case Intrinsic::icall_branch_funnel:
2220 case Intrinsic::localescape:
2221 HasUninlineableIntrinsic =
true;
2223 case Intrinsic::vastart:
2224 InitsVargArgs =
true;
2226 case Intrinsic::launder_invariant_group:
2227 case Intrinsic::strip_invariant_group:
2228 if (
auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2229 SROAArgValues[II] = SROAArg;
2231 case Intrinsic::is_constant:
2232 return simplifyIntrinsicCallIsConstant(Call);
2233 case Intrinsic::objectsize:
2234 return simplifyIntrinsicCallObjectSize(Call);
2238 if (
F ==
Call.getFunction()) {
2241 IsRecursiveCall =
true;
2242 if (!AllowRecursiveCall)
2247 onLoweredCall(
F, Call, IsIndirectCall);
2250 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2251 disableLoadElimination();
2252 return Base::visitCallBase(Call);
2255bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2257 bool Free = !HasReturn;
2262bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2268 isa_and_nonnull<ConstantInt>(
2272bool CallAnalyzer::visitSelectInst(
SelectInst &SI) {
2273 bool CheckSROA =
SI.getType()->isPointerTy();
2277 Constant *TrueC = dyn_cast<Constant>(TrueVal);
2279 TrueC = SimplifiedValues.
lookup(TrueVal);
2280 Constant *FalseC = dyn_cast<Constant>(FalseVal);
2282 FalseC = SimplifiedValues.
lookup(FalseVal);
2284 dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
SI.getCondition()));
2288 if (TrueC == FalseC && TrueC) {
2289 SimplifiedValues[&
SI] = TrueC;
2294 return Base::visitSelectInst(SI);
2296 std::pair<Value *, APInt> TrueBaseAndOffset =
2297 ConstantOffsetPtrs.
lookup(TrueVal);
2298 std::pair<Value *, APInt> FalseBaseAndOffset =
2299 ConstantOffsetPtrs.
lookup(FalseVal);
2300 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2301 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2303 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2304 SROAArgValues[&
SI] = SROAArg;
2308 return Base::visitSelectInst(SI);
2319 if (TrueC && FalseC) {
2321 SimplifiedValues[&
SI] =
C;
2325 return Base::visitSelectInst(SI);
2329 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2330 SimplifiedValues[&
SI] = SelectedC;
2337 std::pair<Value *, APInt> BaseAndOffset =
2338 ConstantOffsetPtrs.
lookup(SelectedV);
2339 if (BaseAndOffset.first) {
2340 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2342 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2343 SROAArgValues[&
SI] = SROAArg;
2349bool CallAnalyzer::visitSwitchInst(
SwitchInst &SI) {
2352 if (isa<ConstantInt>(
SI.getCondition()))
2355 if (isa<ConstantInt>(V))
2370 unsigned JumpTableSize = 0;
2372 unsigned NumCaseCluster =
2375 onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2388 HasIndirectBr =
true;
2392bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2426 for (
const Use &Op :
I.operands())
2451 if (
I.isDebugOrPseudoInst())
2459 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2460 ++NumVectorInstructions;
2467 onInstructionAnalysisStart(&
I);
2469 if (Base::visit(&
I))
2470 ++NumInstructionsSimplified;
2472 onMissedSimplification();
2474 onInstructionAnalysisFinish(&
I);
2475 using namespace ore;
2478 if (IsRecursiveCall && !AllowRecursiveCall)
2480 else if (ExposesReturnsTwice)
2482 else if (HasDynamicAlloca)
2484 else if (HasIndirectBr)
2486 else if (HasUninlineableIntrinsic)
2488 else if (InitsVargArgs)
2490 if (!
IR.isSuccess()) {
2495 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2496 <<
NV(
"InlineResult",
IR.getFailureReason())
2497 <<
") and cost is not fully computed";
2512 <<
NV(
"Callee", &
F) <<
" is "
2513 <<
NV(
"InlineResult",
IR.getFailureReason())
2514 <<
". Cost is not fully computed";
2521 "Call site analysis is not favorable to inlining.");
2533ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2534 if (!
V->getType()->isPointerTy())
2537 unsigned AS =
V->getType()->getPointerAddressSpace();
2538 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2547 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2549 V =
GEP->getPointerOperand();
2551 V = cast<Operator>(V)->getOperand(0);
2552 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2553 if (GA->isInterposable())
2555 V = GA->getAliasee();
2559 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2560 }
while (Visited.
insert(V).second);
2562 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2577 return (DeadBlocks.
count(Pred) ||
2578 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2583 return (!DeadBlocks.
count(BB) &&
2589 if (Succ == NextBB || !IsNewlyDead(Succ))
2593 while (!NewDead.
empty()) {
2595 if (DeadBlocks.
insert(Dead).second)
2614 auto Result = onAnalysisStart();
2625 if (Call &&
Call->getFunction() == Caller) {
2626 IsCallerRecursive =
true;
2636 if (
Constant *
C = dyn_cast<Constant>(CAI))
2637 SimplifiedValues[&FAI] =
C;
2639 Value *PtrArg = *CAI;
2640 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2641 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2644 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2645 SROAArgValues[&FAI] = SROAArg;
2646 onInitializeSROAArg(SROAArg);
2647 EnabledSROAAllocas.
insert(SROAArg);
2652 NumConstantArgs = SimplifiedValues.
size();
2653 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2654 NumAllocaArgs = SROAArgValues.
size();
2673 BBWorklist.
insert(&
F.getEntryBlock());
2696 if (!isa<CallBrInst>(*U))
2702 if (!
IR.isSuccess())
2709 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2713 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2715 BBWorklist.
insert(NextBB);
2716 KnownSuccessors[BB] = NextBB;
2717 findDeadBlocks(BB, NextBB);
2721 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2724 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2725 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2726 BBWorklist.
insert(NextBB);
2727 KnownSuccessors[BB] = NextBB;
2728 findDeadBlocks(BB, NextBB);
2739 onBlockAnalyzed(BB);
2745 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
2755 FinalStackSizeThreshold = *AttrMaxStackSize;
2756 if (AllocatedSize > FinalStackSizeThreshold)
2759 return finalizeAnalysis();
2763#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2765 F.print(
OS, &Writer);
2779#undef DEBUG_PRINT_STAT
2782#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2796 auto CalleeTLI = GetTLI(*
Callee);
2799 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2806 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I) {
2807 if (Call.isByValArgument(
I)) {
2810 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2811 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
2813 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
2815 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
2823 NumStores = std::min(NumStores, 8U);
2835 return std::min<int64_t>(
Cost, INT_MAX);
2844 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2845 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2864 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2865 GetAssumptionCache, GetBFI, PSI, ORE,
true,
2867 auto R = CA.analyze();
2869 return std::nullopt;
2870 return CA.getCost();
2878 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2879 ORE, *Call.getCalledFunction(), Call);
2880 auto R = CFA.analyze();
2882 return std::nullopt;
2883 return CFA.features();
2898 if (
Callee->isPresplitCoroutine())
2906 unsigned AllocaAS =
Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2907 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I)
2908 if (Call.isByValArgument(
I)) {
2909 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2917 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2918 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
2922 if (IsViable.isSuccess())
2929 Function *Caller = Call.getCaller();
2934 if (Caller->hasOptNone())
2939 if (!Caller->nullPointerIsDefined() &&
Callee->nullPointerIsDefined())
2943 if (
Callee->isInterposable())
2947 if (
Callee->hasFnAttribute(Attribute::NoInline))
2951 if (Call.isNoInline())
2954 return std::nullopt;
2969 if (UserDecision->isSuccess())
2975 <<
"... (caller:" << Call.getCaller()->getName()
2978 InlineCostCallAnalyzer CA(*
Callee, Call, Params, CalleeTTI,
2979 GetAssumptionCache, GetBFI, PSI, ORE);
2987 if (CA.wasDecidedByCostBenefit()) {
2990 CA.getCostBenefitPair());
2995 if (CA.wasDecidedByCostThreshold())
2997 CA.getStaticBonusApplied());
3006 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3016 if (!isa<CallBrInst>(*U))
3019 for (
auto &II : BB) {
3020 CallBase *Call = dyn_cast<CallBase>(&II);
3031 if (!ReturnsTwice && isa<CallInst>(Call) &&
3032 cast<CallInst>(Call)->canReturnTwice())
3036 switch (
Callee->getIntrinsicID()) {
3039 case llvm::Intrinsic::icall_branch_funnel:
3043 "disallowed inlining of @llvm.icall.branch.funnel");
3044 case llvm::Intrinsic::localescape:
3048 "disallowed inlining of @llvm.localescape");
3049 case llvm::Intrinsic::vastart:
3053 "contains VarArgs initialized with va_start");
3124 unsigned SizeOptLevel) {
3127 if (SizeOptLevel == 1)
3129 if (SizeOptLevel == 2)
3165 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
3166 Function *CalledFunction = CI->getCalledFunction();
3170 InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params,
TTI,
3171 GetAssumptionCache,
nullptr, &PSI, &ORE);
3173 OS <<
" Analyzing call of " << CalledFunction->
getName()
3174 <<
"... (caller:" << CI->getCaller()->getName() <<
")\n";
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
SmallVector< MachineOperand, 4 > Cond
SetVector< BasicBlock * > BBSetVector
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Statically lint checks LLVM IR
mir Rename Register Operands
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &)
emitInstructionAnnot - This may be implemented to emit a string right before an instruction is emitte...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
StringRef getValueAsString() const
Return the attribute's value as a string.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getEntryFreq() const
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Type * getReturnType() const
Class to represent profile counts.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Indirect Branch Instruction.
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
RetTy visitIndirectBrInst(IndirectBrInst &I)
RetTy visitCmpInst(CmpInst &I)
RetTy visitCallBase(CallBase &I)
RetTy visitCleanupReturnInst(CleanupReturnInst &I)
RetTy visitUnreachableInst(UnreachableInst &I)
RetTy visitSwitchInst(SwitchInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitReturnInst(ReturnInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitResumeInst(ResumeInst &I)
RetTy visitCatchReturnInst(CatchReturnInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitBranchInst(BranchInst &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class represents a cast from an integer to a pointer.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis providing profile information.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
uint64_t getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ C
The default llvm calling convention, compatible with C.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const int LastCallToStaticBonus
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Function::ProfileCount ProfileCount
std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
gep_type_iterator gep_type_end(const User *GEP)
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.