30 #include "llvm/Config/llvm-config.h"
48 #define DEBUG_TYPE "inline-cost"
50 STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
55 cl::desc(
"Default amount of inlining to perform"));
64 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
65 "during inline cost calculation"));
69 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
73 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
77 cl::desc(
"Threshold for inlining functions with inline hint"));
82 cl::desc(
"Threshold for inlining cold callsites"));
86 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
90 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
95 cl::desc(
"The maximum size of a callee that get's "
96 "inlined without sufficient cycle savings"));
103 cl::desc(
"Threshold for inlining functions with cold attribute"));
108 cl::desc(
"Threshold for hot callsites "));
112 cl::desc(
"Threshold for locally hot callsites "));
116 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
117 "entry frequency, for a callsite to be cold in the absence of "
118 "profile information."));
122 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
123 "entry frequency, for a callsite to be hot in the absence of "
124 "profile information."));
128 cl::desc(
"Call penalty that is applied per callsite when inlining"));
132 cl::desc(
"Compute the full inline cost of a call site even when the cost "
133 "exceeds the threshold."));
138 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
143 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
156 class InlineCostCallAnalyzer;
160 struct InstructionCostDetail {
163 int ThresholdBefore = 0;
164 int ThresholdAfter = 0;
166 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
168 int getCostDelta()
const {
return CostAfter - CostBefore; }
170 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
175 InlineCostCallAnalyzer *
const ICCA;
178 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
191 class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
196 virtual ~CallAnalyzer() =
default;
231 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
234 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
244 virtual bool shouldStop() {
return false; }
256 virtual void onDisableLoadElimination() {}
260 virtual bool onCallBaseVisitStart(
CallBase &
Call) {
return true; }
263 virtual void onCallPenalty() {}
267 virtual void onLoadEliminationOpportunity() {}
271 virtual void onCallArgumentSetup(
const CallBase &
Call) {}
274 virtual void onLoadRelativeIntrinsic() {}
282 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
286 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
290 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
291 unsigned NumCaseCluster) {}
295 virtual void onMissedSimplification() {}
301 virtual void onAggregateSROAUse(
AllocaInst *V) {}
303 bool handleSROA(
Value *V,
bool DoNotDisable) {
305 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
307 onAggregateSROAUse(SROAArg);
310 disableSROAForArg(SROAArg);
315 bool IsCallerRecursive =
false;
316 bool IsRecursiveCall =
false;
317 bool ExposesReturnsTwice =
false;
318 bool HasDynamicAlloca =
false;
319 bool ContainsNoDuplicateCall =
false;
320 bool HasReturn =
false;
321 bool HasIndirectBr =
false;
322 bool HasUninlineableIntrinsic =
false;
323 bool InitsVargArgs =
false;
327 unsigned NumInstructions = 0;
328 unsigned NumVectorInstructions = 0;
359 bool EnableLoadElimination =
true;
362 bool AllowRecursiveCall =
false;
367 auto It = SROAArgValues.
find(V);
368 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
374 bool isAllocaDerivedArg(
Value *V);
376 void disableSROA(
Value *V);
378 void disableLoadElimination();
383 template <
typename Callable>
384 bool simplifyInstruction(
Instruction &
I, Callable Evaluate);
385 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
397 bool isKnownNonNullInCallee(
Value *V);
451 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
453 CandidateCall(
Call) {}
458 if (SimplifiedValues.
find(
I) != SimplifiedValues.
end())
459 return SimplifiedValues[
I];
465 unsigned NumConstantArgs = 0;
466 unsigned NumConstantOffsetPtrArgs = 0;
467 unsigned NumAllocaArgs = 0;
468 unsigned NumConstantPtrCmps = 0;
469 unsigned NumConstantPtrDiffs = 0;
470 unsigned NumInstructionsSimplified = 0;
490 int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
491 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
496 class InlineCostCallAnalyzer final :
public CallAnalyzer {
498 const bool ComputeFullInlineCost;
499 int LoadEliminationCost = 0;
504 int SingleBBBonus = 0;
519 const bool BoostIndirectCalls;
522 const bool IgnoreThreshold;
525 const bool CostBenefitAnalysisEnabled;
536 int CostAtBBStart = 0;
543 bool DecidedByCostThreshold =
false;
546 bool DecidedByCostBenefit =
false;
551 bool SingleBB =
true;
553 unsigned SROACostSavings = 0;
554 unsigned SROACostSavingsLost = 0;
574 void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
575 assert(UpperBound > 0 && UpperBound <= INT_MAX &&
"invalid upper bound");
576 Cost = std::min<int>(UpperBound, Cost + Inc);
580 auto CostIt = SROAArgCosts.
find(
Arg);
581 if (CostIt == SROAArgCosts.
end())
583 addCost(CostIt->second);
584 SROACostSavings -= CostIt->second;
585 SROACostSavingsLost += CostIt->second;
586 SROAArgCosts.
erase(CostIt);
589 void onDisableLoadElimination()
override {
590 addCost(LoadEliminationCost);
591 LoadEliminationCost = 0;
594 bool onCallBaseVisitStart(
CallBase &Call)
override {
597 Threshold += *AttrCallThresholdBonus;
601 addCost(*AttrCallCost);
609 void onCallPenalty()
override { addCost(
CallPenalty); }
610 void onCallArgumentSetup(
const CallBase &Call)
override {
615 void onLoadRelativeIntrinsic()
override {
620 bool IsIndirectCall)
override {
629 if (IsIndirectCall && BoostIndirectCalls) {
630 auto IndirectCallParams = Params;
635 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
636 GetAssumptionCache, GetBFI, PSI, ORE,
false);
637 if (CA.analyze().isSuccess()) {
640 Cost -=
std::max(0, CA.getThreshold() - CA.getCost());
647 void onFinalizeSwitch(
unsigned JumpTableSize,
648 unsigned NumCaseCluster)
override {
657 addCost(JTCost,
static_cast<int64_t
>(CostUpperBound));
661 if (NumCaseCluster <= 3) {
667 int64_t ExpectedNumberOfCompare =
668 getExpectedNumberOfCompare(NumCaseCluster);
672 addCost(SwitchCost,
static_cast<int64_t
>(CostUpperBound));
674 void onMissedSimplification()
override {
680 "Should not initialize SROA costs for null value.");
681 SROAArgCosts[
Arg] = 0;
684 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
685 auto CostIt = SROAArgCosts.
find(SROAArg);
687 "expected this argument to have a cost");
692 void onBlockStart(
const BasicBlock *
BB)
override { CostAtBBStart = Cost; }
695 if (CostBenefitAnalysisEnabled) {
698 assert(GetBFI &&
"GetBFI must be available");
704 ColdSize += Cost - CostAtBBStart;
707 auto *TI =
BB->getTerminator();
712 if (SingleBB && TI->getNumSuccessors() > 1) {
714 Threshold -= SingleBBBonus;
719 void onInstructionAnalysisStart(
const Instruction *
I)
override {
724 InstructionCostDetailMap[
I].CostBefore = Cost;
725 InstructionCostDetailMap[
I].ThresholdBefore = Threshold;
728 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
733 InstructionCostDetailMap[
I].CostAfter = Cost;
734 InstructionCostDetailMap[
I].ThresholdAfter = Threshold;
737 bool isCostBenefitAnalysisEnabled() {
738 if (!PSI || !PSI->hasProfileSummary())
750 if (!PSI->hasInstrumentationProfile())
755 if (!
Caller->getEntryCount())
763 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
767 auto EntryCount =
F.getEntryCount();
768 if (!EntryCount || !EntryCount->getCount())
782 if (!CostBenefitAnalysisEnabled)
806 APInt CycleSavings(128, 0);
809 APInt CurrentSavings(128, 0);
813 if (BI->isConditional() &&
814 isa_and_nonnull<ConstantInt>(
815 SimplifiedValues.
lookup(BI->getCondition()))) {
818 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
820 if (SimplifiedValues.
count(V)) {
829 CycleSavings += CurrentSavings;
833 auto EntryProfileCount =
F.getEntryCount();
834 assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount());
835 auto EntryCount = EntryProfileCount->getCount();
836 CycleSavings += EntryCount / 2;
837 CycleSavings = CycleSavings.udiv(EntryCount);
840 auto *CallerBB = CandidateCall.
getParent();
846 int Size = Cost - ColdSize;
865 APInt RHS(128, PSI->getOrCompHotCountThreshold());
877 if (
Caller->hasMinSize()) {
883 if (DeadBlocks.
count(L->getHeader()))
893 if (NumVectorInstructions <= NumInstructions / 10)
894 Threshold -= VectorBonus;
895 else if (NumVectorInstructions <= NumInstructions / 2)
896 Threshold -= VectorBonus / 2;
905 Cost *= *AttrCostMult;
909 Threshold = *AttrThreshold;
911 if (
auto Result = costBenefitAnalysis()) {
912 DecidedByCostBenefit =
true;
922 DecidedByCostThreshold =
true;
923 return Cost <
std::max(1, Threshold)
928 bool shouldStop()
override {
929 if (IgnoreThreshold || ComputeFullInlineCost)
933 if (Cost < Threshold)
935 DecidedByCostThreshold =
true;
939 void onLoadEliminationOpportunity()
override {
954 assert(NumInstructions == 0);
955 assert(NumVectorInstructions == 0);
958 updateThreshold(CandidateCall,
F);
964 assert(SingleBBBonus >= 0);
970 Threshold += (SingleBBBonus + VectorBonus);
982 if (Cost >= Threshold && !ComputeFullInlineCost)
989 InlineCostCallAnalyzer(
996 bool IgnoreThreshold =
false)
997 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI, ORE),
999 Params.ComputeFullInlineCost || ORE ||
1000 isCostBenefitAnalysisEnabled()),
1002 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1003 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1009 InlineCostAnnotationWriter Writer;
1018 if (InstructionCostDetailMap.
find(
I) != InstructionCostDetailMap.
end())
1019 return InstructionCostDetailMap[
I];
1023 virtual ~InlineCostCallAnalyzer() =
default;
1024 int getThreshold()
const {
return Threshold; }
1025 int getCost()
const {
return Cost; }
1027 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1028 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1031 class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1038 static constexpr
int JTCostMultiplier = 4;
1039 static constexpr
int CaseClusterCostMultiplier = 2;
1040 static constexpr
int SwitchCostMultiplier = 2;
1044 unsigned SROACostSavingOpportunities = 0;
1045 int VectorBonus = 0;
1046 int SingleBBBonus = 0;
1052 Cost[
static_cast<size_t>(Feature)] += Delta;
1056 Cost[
static_cast<size_t>(Feature)] =
Value;
1060 auto CostIt = SROACosts.
find(
Arg);
1061 if (CostIt == SROACosts.
end())
1064 increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1065 SROACostSavingOpportunities -= CostIt->second;
1066 SROACosts.
erase(CostIt);
1069 void onDisableLoadElimination()
override {
1070 set(InlineCostFeatureIndex::LoadElimination, 1);
1073 void onCallPenalty()
override {
1077 void onCallArgumentSetup(
const CallBase &Call)
override {
1078 increment(InlineCostFeatureIndex::CallArgumentSetup,
1082 void onLoadRelativeIntrinsic()
override {
1083 increment(InlineCostFeatureIndex::LoadRelativeIntrinsic,
1088 bool IsIndirectCall)
override {
1089 increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1092 if (IsIndirectCall) {
1106 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1107 GetAssumptionCache, GetBFI, PSI, ORE,
false,
1109 if (CA.analyze().isSuccess()) {
1110 increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1112 increment(InlineCostFeatureIndex::NestedInlines, 1);
1119 void onFinalizeSwitch(
unsigned JumpTableSize,
1120 unsigned NumCaseCluster)
override {
1122 if (JumpTableSize) {
1126 increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1130 if (NumCaseCluster <= 3) {
1131 increment(InlineCostFeatureIndex::CaseClusterPenalty,
1132 NumCaseCluster * CaseClusterCostMultiplier *
1137 int64_t ExpectedNumberOfCompare =
1138 getExpectedNumberOfCompare(NumCaseCluster);
1140 int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier *
1142 increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1145 void onMissedSimplification()
override {
1146 increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1157 if (
BB->getTerminator()->getNumSuccessors() > 1)
1158 set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1159 Threshold -= SingleBBBonus;
1164 if (
Caller->hasMinSize()) {
1167 for (
Loop *L : LI) {
1169 if (DeadBlocks.
count(L->getHeader()))
1171 increment(InlineCostFeatureIndex::NumLoops,
1175 set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.
size());
1176 set(InlineCostFeatureIndex::SimplifiedInstructions,
1177 NumInstructionsSimplified);
1178 set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1179 set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1180 NumConstantOffsetPtrArgs);
1181 set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1183 if (NumVectorInstructions <= NumInstructions / 10)
1184 Threshold -= VectorBonus;
1185 else if (NumVectorInstructions <= NumInstructions / 2)
1186 Threshold -= VectorBonus / 2;
1188 set(InlineCostFeatureIndex::Threshold, Threshold);
1193 bool shouldStop()
override {
return false; }
1195 void onLoadEliminationOpportunity()
override {
1196 increment(InlineCostFeatureIndex::LoadElimination, 1);
1200 increment(InlineCostFeatureIndex::CallSiteCost,
1203 set(InlineCostFeatureIndex::ColdCcPenalty,
1207 (
F.hasLocalLinkage() &&
F.hasOneLiveUse() &&
1213 int SingleBBBonusPercent = 50;
1217 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1218 VectorBonus = Threshold * VectorBonusPercent / 100;
1219 Threshold += (SingleBBBonus + VectorBonus);
1225 InlineCostFeaturesAnalyzer(
1231 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI) {}
1239 bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1240 return SROAArgValues.
count(V);
1243 void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1244 onDisableSROA(SROAArg);
1245 EnabledSROAAllocas.
erase(SROAArg);
1246 disableLoadElimination();
1249 void InlineCostAnnotationWriter::emitInstructionAnnot(
1256 OS <<
"; No analysis for the instruction";
1258 OS <<
"; cost before = " <<
Record->CostBefore
1259 <<
", cost after = " <<
Record->CostAfter
1260 <<
", threshold before = " <<
Record->ThresholdBefore
1261 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1262 OS <<
"cost delta = " <<
Record->getCostDelta();
1263 if (
Record->hasThresholdChanged())
1264 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1266 auto C = ICCA->getSimplifiedValue(
const_cast<Instruction *
>(
I));
1268 OS <<
", simplified to ";
1269 C.getValue()->print(OS,
true);
1275 void CallAnalyzer::disableSROA(
Value *V) {
1276 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1277 disableSROAForArg(SROAArg);
1281 void CallAnalyzer::disableLoadElimination() {
1282 if (EnableLoadElimination) {
1283 onDisableLoadElimination();
1284 EnableLoadElimination =
false;
1293 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1297 GTI != GTE; ++GTI) {
1298 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1300 if (
Constant *SimpleOp = SimplifiedValues.
lookup(GTI.getOperand()))
1301 OpC = dyn_cast<ConstantInt>(SimpleOp);
1308 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1315 APInt TypeSize(IntPtrWidth,
DL.getTypeAllocSize(GTI.getIndexedType()));
1327 for (
const Use &
Op :
GEP.indices())
1338 disableSROA(
I.getOperand(0));
1342 if (
I.isArrayAllocation()) {
1344 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1353 Type *Ty =
I.getAllocatedType();
1355 AllocSize->getLimitedValue(),
1356 DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1358 HasDynamicAlloca =
true;
1364 if (
I.isStaticAlloca()) {
1365 Type *Ty =
I.getAllocatedType();
1367 SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1374 if (!
I.isStaticAlloca())
1375 HasDynamicAlloca =
true;
1380 bool CallAnalyzer::visitPHI(
PHINode &
I) {
1392 bool CheckSROA =
I.getType()->isPointerTy();
1396 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1397 Value *FirstV =
nullptr;
1399 for (
unsigned i = 0,
e =
I.getNumIncomingValues();
i !=
e; ++
i) {
1402 if (DeadBlocks.
count(Pred))
1406 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1407 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1410 Value *V =
I.getIncomingValue(
i);
1417 C = SimplifiedValues.
lookup(V);
1419 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1420 if (!
C && CheckSROA)
1421 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1423 if (!
C && !BaseAndOffset.first)
1440 if (FirstBaseAndOffset == BaseAndOffset)
1454 FirstBaseAndOffset = BaseAndOffset;
1459 SimplifiedValues[&
I] = FirstC;
1464 if (FirstBaseAndOffset.first) {
1465 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1467 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1468 SROAArgValues[&
I] = SROAArg;
1480 std::pair<Value *, APInt> BaseAndOffset =
1481 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1482 if (!BaseAndOffset.first)
1487 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1491 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1497 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1501 for (
const Use &
Op :
GEP.indices())
1502 if (!isa<Constant>(
Op) && !SimplifiedValues.
lookup(
Op))
1510 for (
unsigned int Index = 1;
Index < COps.size(); ++
Index)
1511 Indices.push_back(COps[Index]);
1513 I.getSourceElementType(), COps[0], Indices,
I.isInBounds());
1517 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1519 SROAArgValues[&
I] = SROAArg;
1527 disableSROAForArg(SROAArg);
1528 return isGEPFree(
I);
1534 template <
typename Callable>
1535 bool CallAnalyzer::simplifyInstruction(
Instruction &
I, Callable Evaluate) {
1543 COps.push_back(COp);
1545 auto *
C = Evaluate(COps);
1548 SimplifiedValues[&
I] =
C;
1561 bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1563 auto *
C = dyn_cast<Constant>(
Arg);
1566 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
Arg));
1581 std::pair<Value *, APInt> BaseAndOffset =
1582 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1584 if (BaseAndOffset.first)
1585 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1588 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1589 SROAArgValues[&
I] = SROAArg;
1604 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1605 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1606 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1607 std::pair<Value *, APInt> BaseAndOffset =
1608 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1609 if (BaseAndOffset.first)
1610 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1620 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1621 SROAArgValues[&
I] = SROAArg;
1637 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1638 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1639 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1640 if (BaseAndOffset.first)
1641 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1645 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1646 SROAArgValues[&
I] = SROAArg;
1652 bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1661 disableSROA(
I.getOperand(0));
1666 switch (
I.getOpcode()) {
1667 case Instruction::FPTrunc:
1668 case Instruction::FPExt:
1669 case Instruction::UIToFP:
1670 case Instruction::SIToFP:
1671 case Instruction::FPToUI:
1672 case Instruction::FPToSI:
1688 bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1694 if (
Argument *A = dyn_cast<Argument>(V))
1695 if (paramHasAttr(A, Attribute::NonNull))
1701 if (isAllocaDerivedArg(V))
1710 bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
1726 if (
InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1727 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1729 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
1739 if (PSI && PSI->hasProfileSummary())
1740 return PSI->isColdCallSite(Call, CallerBFI);
1751 auto CallSiteBB =
Call.getParent();
1752 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
1753 auto CallerEntryFreq =
1755 return CallSiteFreq < CallerEntryFreq * ColdProb;
1759 InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
1764 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1776 auto CallSiteBB =
Call.getParent();
1786 void InlineCostCallAnalyzer::updateThreshold(
CallBase &Call,
Function &Callee) {
1788 if (!allowSizeGrowth(Call)) {
1817 int SingleBBBonusPercent = 50;
1822 auto DisallowAllBonuses = [&]() {
1823 SingleBBBonusPercent = 0;
1824 VectorBonusPercent = 0;
1830 if (
Caller->hasMinSize()) {
1836 SingleBBBonusPercent = 0;
1837 VectorBonusPercent = 0;
1838 }
else if (
Caller->hasOptSize())
1843 if (!
Caller->hasMinSize()) {
1844 if (
Callee.hasFnAttribute(Attribute::InlineHint))
1869 DisallowAllBonuses();
1874 if (PSI->isFunctionEntryHot(&Callee)) {
1880 }
else if (PSI->isFunctionEntryCold(&Callee)) {
1886 DisallowAllBonuses();
1898 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1899 VectorBonus = Threshold * VectorBonusPercent / 100;
1901 bool OnlyOneCallAndLocalLinkage =
F.hasLocalLinkage() &&
F.hasOneLiveUse() &&
1902 &
F ==
Call.getCalledFunction();
1906 if (OnlyOneCallAndLocalLinkage)
1910 bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
1918 if (
I.getOpcode() == Instruction::FCmp)
1923 Value *LHSBase, *RHSBase;
1924 APInt LHSOffset, RHSOffset;
1925 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
1927 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
1928 if (RHSBase && LHSBase == RHSBase) {
1934 SimplifiedValues[&
I] =
C;
1935 ++NumConstantPtrCmps;
1943 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1)) &&
1944 isKnownNonNullInCallee(
I.getOperand(0))) {
1950 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
1957 Value *LHSBase, *RHSBase;
1958 APInt LHSOffset, RHSOffset;
1959 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
1961 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
1962 if (RHSBase && LHSBase == RHSBase) {
1968 SimplifiedValues[&
I] =
C;
1969 ++NumConstantPtrDiffs;
1977 return Base::visitSub(
I);
1989 Value *SimpleV =
nullptr;
1990 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
1992 FI->getFastMathFlags(),
DL);
1997 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
1998 SimplifiedValues[&
I] =
C;
2011 if (
I.getType()->isFloatingPointTy() &&
2026 COp ? COp :
Op, cast<FPMathOperator>(
I).getFastMathFlags(),
DL);
2028 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2029 SimplifiedValues[&
I] =
C;
2040 bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2041 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2047 if (EnableLoadElimination &&
2048 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2049 onLoadEliminationOpportunity();
2056 bool CallAnalyzer::visitStore(
StoreInst &
I) {
2057 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2068 disableLoadElimination();
2080 return Base::visitExtractValue(
I);
2093 return Base::visitInsertValue(
I);
2116 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
I));
2120 ConstantArgs.push_back(
C);
2123 SimplifiedValues[&
Call] =
C;
2130 bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2131 if (!onCallBaseVisitStart(Call))
2134 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2135 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2137 ExposesReturnsTwice =
true;
2140 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2141 ContainsNoDuplicateCall =
true;
2144 bool IsIndirectCall = !
F;
2145 if (IsIndirectCall) {
2149 F = dyn_cast_or_null<Function>(SimplifiedValues.
lookup(Callee));
2150 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2151 onCallArgumentSetup(Call);
2153 if (!
Call.onlyReadsMemory())
2154 disableLoadElimination();
2155 return Base::visitCallBase(Call);
2159 assert(
F &&
"Expected a call to a known function");
2162 if (simplifyCallSite(
F, Call))
2168 switch (II->getIntrinsicID()) {
2171 disableLoadElimination();
2172 return Base::visitCallBase(Call);
2174 case Intrinsic::load_relative:
2175 onLoadRelativeIntrinsic();
2178 case Intrinsic::memset:
2180 case Intrinsic::memmove:
2181 disableLoadElimination();
2184 case Intrinsic::icall_branch_funnel:
2185 case Intrinsic::localescape:
2186 HasUninlineableIntrinsic =
true;
2188 case Intrinsic::vastart:
2189 InitsVargArgs =
true;
2191 case Intrinsic::launder_invariant_group:
2192 case Intrinsic::strip_invariant_group:
2193 if (
auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2194 SROAArgValues[II] = SROAArg;
2196 case Intrinsic::is_constant:
2197 return simplifyIntrinsicCallIsConstant(Call);
2201 if (
F ==
Call.getFunction()) {
2204 IsRecursiveCall =
true;
2205 if (!AllowRecursiveCall)
2210 onLoweredCall(
F, Call, IsIndirectCall);
2213 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2214 disableLoadElimination();
2215 return Base::visitCallBase(Call);
2218 bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2220 bool Free = !HasReturn;
2225 bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2231 isa_and_nonnull<ConstantInt>(
2236 bool CheckSROA =
SI.getType()->isPointerTy();
2247 dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
SI.getCondition()));
2251 if (TrueC == FalseC && TrueC) {
2252 SimplifiedValues[&
SI] = TrueC;
2257 return Base::visitSelectInst(
SI);
2259 std::pair<Value *, APInt> TrueBaseAndOffset =
2261 std::pair<Value *, APInt> FalseBaseAndOffset =
2263 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2264 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2266 if (
auto *SROAArg = getSROAArgForValueOrNull(
TrueVal))
2267 SROAArgValues[&
SI] = SROAArg;
2271 return Base::visitSelectInst(
SI);
2282 if (TrueC && FalseC) {
2284 SimplifiedValues[&
SI] =
C;
2288 return Base::visitSelectInst(
SI);
2292 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2293 SimplifiedValues[&
SI] = SelectedC;
2300 std::pair<Value *, APInt> BaseAndOffset =
2301 ConstantOffsetPtrs.
lookup(SelectedV);
2302 if (BaseAndOffset.first) {
2303 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2305 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2306 SROAArgValues[&
SI] = SROAArg;
2315 if (isa<ConstantInt>(
SI.getCondition()))
2318 if (isa<ConstantInt>(V))
2333 unsigned JumpTableSize = 0;
2335 unsigned NumCaseCluster =
2338 onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2351 HasIndirectBr =
true;
2355 bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2389 for (
const Use &
Op :
I.operands())
2414 if (
I.isDebugOrPseudoInst())
2422 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2423 ++NumVectorInstructions;
2430 onInstructionAnalysisStart(&
I);
2432 if (Base::visit(&
I))
2433 ++NumInstructionsSimplified;
2435 onMissedSimplification();
2437 onInstructionAnalysisFinish(&
I);
2438 using namespace ore;
2441 if (IsRecursiveCall && !AllowRecursiveCall)
2443 else if (ExposesReturnsTwice)
2445 else if (HasDynamicAlloca)
2447 else if (HasIndirectBr)
2449 else if (HasUninlineableIntrinsic)
2451 else if (InitsVargArgs)
2453 if (!
IR.isSuccess()) {
2458 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2459 <<
NV(
"InlineResult",
IR.getFailureReason())
2460 <<
") and cost is not fully computed";
2468 if (IsCallerRecursive &&
2476 <<
NV(
"Callee", &
F) <<
" is "
2477 <<
NV(
"InlineResult",
IR.getFailureReason())
2478 <<
". Cost is not fully computed";
2485 "Call site analysis is not favorable to inlining.");
2497 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2502 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2511 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP, Offset))
2513 V =
GEP->getPointerOperand();
2515 V = cast<Operator>(V)->getOperand(0);
2516 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2517 if (GA->isInterposable())
2519 V = GA->getAliasee();
2524 }
while (Visited.
insert(V).second);
2541 return (DeadBlocks.
count(Pred) ||
2542 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2547 return (!DeadBlocks.
count(
BB) &&
2553 if (Succ == NextBB || !IsNewlyDead(Succ))
2556 NewDead.push_back(Succ);
2557 while (!NewDead.empty()) {
2563 NewDead.push_back(
S);
2578 auto Result = onAnalysisStart();
2589 if (Call &&
Call->getFunction() == Caller) {
2590 IsCallerRecursive =
true;
2600 if (
Constant *
C = dyn_cast<Constant>(CAI))
2601 SimplifiedValues[&FAI] =
C;
2603 Value *PtrArg = *CAI;
2604 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2605 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2608 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2609 SROAArgValues[&FAI] = SROAArg;
2610 onInitializeSROAArg(SROAArg);
2611 EnabledSROAAllocas.
insert(SROAArg);
2616 NumConstantArgs = SimplifiedValues.
size();
2617 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2618 NumAllocaArgs = SROAArgValues.
size();
2637 BBWorklist.
insert(&
F.getEntryBlock());
2640 for (
unsigned Idx = 0; Idx != BBWorklist.
size(); ++Idx) {
2658 if (
BB->hasAddressTaken())
2660 if (!isa<CallBrInst>(*U))
2666 if (!
IR.isSuccess())
2673 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2677 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2679 BBWorklist.
insert(NextBB);
2680 KnownSuccessors[
BB] = NextBB;
2681 findDeadBlocks(
BB, NextBB);
2685 }
else if (
SwitchInst *
SI = dyn_cast<SwitchInst>(TI)) {
2688 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2689 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2690 BBWorklist.
insert(NextBB);
2691 KnownSuccessors[
BB] = NextBB;
2692 findDeadBlocks(
BB, NextBB);
2703 onBlockAnalyzed(
BB);
2706 bool OnlyOneCallAndLocalLinkage =
F.hasLocalLinkage() &&
F.hasOneLiveUse() &&
2711 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
2714 return finalizeAnalysis();
2718 #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2720 F.print(OS, &Writer);
2734 #undef DEBUG_PRINT_STAT
2737 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2751 auto CalleeTLI = GetTLI(*
Callee);
2754 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2761 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I) {
2762 if (Call.isByValArgument(
I)) {
2765 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2766 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
2778 NumStores =
std::min(NumStores, 8U);
2798 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2799 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2818 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2819 GetAssumptionCache, GetBFI, PSI, ORE,
true,
2821 auto R = CA.analyze();
2824 return CA.getCost();
2832 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2833 ORE, *Call.getCalledFunction(), Call);
2834 auto R = CFA.analyze();
2837 return CFA.features();
2852 if (
Callee->isPresplitCoroutine())
2860 unsigned AllocaAS =
Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2861 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I)
2862 if (Call.isByValArgument(
I)) {
2863 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2871 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2872 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
2876 if (IsViable.isSuccess())
2883 Function *Caller = Call.getCaller();
2888 if (Caller->hasOptNone())
2893 if (!Caller->nullPointerIsDefined() &&
Callee->nullPointerIsDefined())
2897 if (
Callee->isInterposable())
2901 if (
Callee->hasFnAttribute(Attribute::NoInline))
2905 if (Call.isNoInline())
2922 if (UserDecision.hasValue()) {
2923 if (UserDecision->isSuccess())
2929 <<
"... (caller:" << Call.getCaller()->getName()
2932 InlineCostCallAnalyzer CA(*
Callee, Call, Params, CalleeTTI,
2933 GetAssumptionCache, GetBFI, PSI, ORE);
2941 if (CA.wasDecidedByCostBenefit()) {
2944 CA.getCostBenefitPair());
2949 if (CA.wasDecidedByCostThreshold())
2959 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
2962 if (isa<IndirectBrInst>(
BB.getTerminator()))
2967 if (
BB.hasAddressTaken())
2969 if (!isa<CallBrInst>(*U))
2972 for (
auto &II :
BB) {
2973 CallBase *Call = dyn_cast<CallBase>(&II);
2984 if (!ReturnsTwice && isa<CallInst>(Call) &&
2985 cast<CallInst>(Call)->canReturnTwice())
2989 switch (
Callee->getIntrinsicID()) {
2992 case llvm::Intrinsic::icall_branch_funnel:
2996 "disallowed inlining of @llvm.icall.branch.funnel");
2997 case llvm::Intrinsic::localescape:
3001 "disallowed inlining of @llvm.localescape");
3002 case llvm::Intrinsic::vastart:
3006 "contains VarArgs initialized with va_start");
3077 unsigned SizeOptLevel) {
3080 if (SizeOptLevel == 1)
3082 if (SizeOptLevel == 2)
3118 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
3119 Function *CalledFunction = CI->getCalledFunction();
3123 InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params,
TTI,
3124 GetAssumptionCache,
nullptr, &PSI, &ORE);
3126 OS <<
" Analyzing call of " << CalledFunction->
getName()
3127 <<
"... (caller:" << CI->getCaller()->getName() <<
")\n";