33#include "llvm/Config/llvm-config.h"
55#define DEBUG_TYPE "inline-cost"
57STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
61 cl::desc(
"Default amount of inlining to perform"));
70 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
71 "during inline cost calculation"));
75 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
79 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
83 cl::desc(
"Threshold for inlining functions with inline hint"));
88 cl::desc(
"Threshold for inlining cold callsites"));
92 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
99 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
106 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
107 "savings won't justify the cost"));
111 cl::desc(
"The maximum size of a callee that get's "
112 "inlined without sufficient cycle savings"));
119 cl::desc(
"Threshold for inlining functions with cold attribute"));
123 cl::desc(
"Threshold for hot callsites "));
127 cl::desc(
"Threshold for locally hot callsites "));
131 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
132 "entry frequency, for a callsite to be cold in the absence of "
133 "profile information."));
137 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
138 "entry frequency, for a callsite to be hot in the absence of "
139 "profile information."));
143 cl::desc(
"Cost of a single instruction when inlining"));
147 cl::desc(
"Cost of a single inline asm instruction when inlining"));
151 cl::desc(
"Cost of load/store instruction when inlining"));
155 cl::desc(
"Call penalty that is applied per callsite when inlining"));
159 cl::init(std::numeric_limits<size_t>::max()),
160 cl::desc(
"Do not inline functions with a stack size "
161 "that exceeds the specified limit"));
166 cl::desc(
"Do not inline recursive functions with a stack "
167 "size that exceeds the specified limit"));
171 cl::desc(
"Compute the full inline cost of a call site even when the cost "
172 "exceeds the threshold."));
176 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
181 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
185 cl::desc(
"Inline all viable calls, even if they exceed the inlining "
213class InlineCostCallAnalyzer;
217struct InstructionCostDetail {
220 int ThresholdBefore = 0;
221 int ThresholdAfter = 0;
223 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
225 int getCostDelta()
const {
return CostAfter - CostBefore; }
227 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
232 InlineCostCallAnalyzer *
const ICCA;
235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
236 void emitInstructionAnnot(
const Instruction *
I,
237 formatted_raw_ostream &OS)
override;
248class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
249 typedef InstVisitor<CallAnalyzer, bool> Base;
250 friend class InstVisitor<CallAnalyzer, bool>;
253 virtual ~CallAnalyzer() =
default;
255 const TargetTransformInfo &TTI;
258 function_ref<AssumptionCache &(
Function &)> GetAssumptionCache;
261 function_ref<BlockFrequencyInfo &(
Function &)> GetBFI;
264 function_ref<
const TargetLibraryInfo &(
Function &)> GetTLI;
267 ProfileSummaryInfo *PSI;
273 const DataLayout &DL;
276 OptimizationRemarkEmitter *ORE;
281 CallBase &CandidateCall;
284 function_ref<EphemeralValuesCache &(
Function &)> GetEphValuesCache =
nullptr;
288 virtual void onBlockStart(
const BasicBlock *BB) {}
291 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
294 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
297 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
307 virtual bool shouldStop() {
return false; }
316 virtual void onDisableSROA(AllocaInst *Arg) {}
319 virtual void onDisableLoadElimination() {}
323 virtual bool onCallBaseVisitStart(CallBase &
Call) {
return true; }
326 virtual void onCallPenalty() {}
329 virtual void onMemAccess(){};
333 virtual void onLoadEliminationOpportunity() {}
337 virtual void onCallArgumentSetup(
const CallBase &
Call) {}
340 virtual void onLoadRelativeIntrinsic() {}
348 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
352 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
356 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
357 bool DefaultDestUnreachable) {}
361 virtual void onMissedSimplification() {}
364 virtual void onInlineAsm(
const InlineAsm &Arg) {}
367 virtual void onInitializeSROAArg(AllocaInst *Arg) {}
370 virtual void onAggregateSROAUse(AllocaInst *V) {}
372 bool handleSROA(
Value *V,
bool DoNotDisable) {
374 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
376 onAggregateSROAUse(SROAArg);
379 disableSROAForArg(SROAArg);
384 bool IsCallerRecursive =
false;
385 bool IsRecursiveCall =
false;
386 bool ExposesReturnsTwice =
false;
387 bool HasDynamicAlloca =
false;
388 bool ContainsNoDuplicateCall =
false;
389 bool HasReturn =
false;
390 bool HasIndirectBr =
false;
391 bool HasUninlineableIntrinsic =
false;
392 bool InitsVargArgs =
false;
395 uint64_t AllocatedSize = 0;
396 unsigned NumInstructions = 0;
397 unsigned NumInlineAsmInstructions = 0;
398 unsigned NumVectorInstructions = 0;
408 DenseMap<Value *, Value *> SimplifiedValues;
412 DenseMap<Value *, AllocaInst *> SROAArgValues;
415 DenseSet<AllocaInst *> EnabledSROAAllocas;
418 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
421 SmallPtrSet<BasicBlock *, 16> DeadBlocks;
425 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
430 bool EnableLoadElimination =
true;
433 bool AllowRecursiveCall =
false;
435 SmallPtrSet<Value *, 16> LoadAddrSet;
437 AllocaInst *getSROAArgForValueOrNull(
Value *V)
const {
438 auto It = SROAArgValues.find(V);
439 if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
446 template <
typename T>
T *getDirectOrSimplifiedValue(
Value *V)
const {
449 return getSimplifiedValue<T>(V);
453 bool isAllocaDerivedArg(
Value *V);
454 void disableSROAForArg(AllocaInst *SROAArg);
455 void disableSROA(
Value *V);
456 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
457 void disableLoadElimination();
458 bool isGEPFree(GetElementPtrInst &
GEP);
459 bool canFoldInboundsGEP(GetElementPtrInst &
I);
460 bool accumulateGEPOffset(GEPOperator &
GEP, APInt &
Offset);
461 bool simplifyCallSite(Function *F, CallBase &
Call);
462 bool simplifyCmpInstForRecCall(CmpInst &Cmp);
464 bool simplifyIntrinsicCallIsConstant(CallBase &CB);
465 bool simplifyIntrinsicCallObjectSize(CallBase &CB);
466 ConstantInt *stripAndComputeInBoundsConstantOffsets(
Value *&V);
467 bool isLoweredToCall(Function *F, CallBase &
Call);
474 bool paramHasAttr(Argument *
A, Attribute::AttrKind Attr);
478 bool isKnownNonNullInCallee(
Value *V);
481 bool allowSizeGrowth(CallBase &
Call);
484 InlineResult analyzeBlock(BasicBlock *BB,
485 const SmallPtrSetImpl<const Value *> &EphValues);
491 void visit(Function *);
492 void visit(Function &);
493 void visit(BasicBlock *);
494 void visit(BasicBlock &);
497 bool visitInstruction(Instruction &
I);
500 bool visitAlloca(AllocaInst &
I);
501 bool visitPHI(PHINode &
I);
502 bool visitGetElementPtr(GetElementPtrInst &
I);
503 bool visitBitCast(BitCastInst &
I);
504 bool visitPtrToInt(PtrToIntInst &
I);
505 bool visitIntToPtr(IntToPtrInst &
I);
506 bool visitCastInst(CastInst &
I);
507 bool visitCmpInst(CmpInst &
I);
508 bool visitSub(BinaryOperator &
I);
509 bool visitBinaryOperator(BinaryOperator &
I);
510 bool visitFNeg(UnaryOperator &
I);
511 bool visitLoad(LoadInst &
I);
512 bool visitStore(StoreInst &
I);
513 bool visitExtractValue(ExtractValueInst &
I);
514 bool visitInsertValue(InsertValueInst &
I);
515 bool visitCallBase(CallBase &
Call);
516 bool visitReturnInst(ReturnInst &RI);
517 bool visitBranchInst(BranchInst &BI);
518 bool visitSelectInst(SelectInst &SI);
519 bool visitSwitchInst(SwitchInst &SI);
520 bool visitIndirectBrInst(IndirectBrInst &IBI);
521 bool visitResumeInst(ResumeInst &RI);
522 bool visitCleanupReturnInst(CleanupReturnInst &RI);
523 bool visitCatchReturnInst(CatchReturnInst &RI);
524 bool visitUnreachableInst(UnreachableInst &
I);
528 Function &Callee, CallBase &
Call,
const TargetTransformInfo &TTI,
529 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
530 function_ref<BlockFrequencyInfo &(Function &)> GetBFI =
nullptr,
531 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI =
nullptr,
532 ProfileSummaryInfo *PSI =
nullptr,
533 OptimizationRemarkEmitter *ORE =
nullptr,
534 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
536 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
537 GetTLI(GetTLI), PSI(PSI), F(
Callee), DL(F.getDataLayout()), ORE(ORE),
538 CandidateCall(
Call), GetEphValuesCache(GetEphValuesCache) {}
540 InlineResult analyze();
543 Value *getSimplifiedValueUnchecked(
Value *V)
const {
544 return SimplifiedValues.lookup(V);
549 template <
typename T>
T *getSimplifiedValue(
Value *V)
const {
550 Value *SimpleV = SimplifiedValues.lookup(V);
556 if constexpr (std::is_base_of_v<Constant, T>)
561 if (
I->getFunction() != &F)
564 if (Arg->getParent() != &F)
573 unsigned NumConstantArgs = 0;
574 unsigned NumConstantOffsetPtrArgs = 0;
575 unsigned NumAllocaArgs = 0;
576 unsigned NumConstantPtrCmps = 0;
577 unsigned NumConstantPtrDiffs = 0;
578 unsigned NumInstructionsSimplified = 0;
598int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
599 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
604class InlineCostCallAnalyzer final :
public CallAnalyzer {
605 const bool ComputeFullInlineCost;
606 int LoadEliminationCost = 0;
611 int SingleBBBonus = 0;
614 const InlineParams &Params;
619 DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
626 int StaticBonusApplied = 0;
629 const bool BoostIndirectCalls;
632 const bool IgnoreThreshold;
635 const bool CostBenefitAnalysisEnabled;
646 int CostAtBBStart = 0;
653 bool DecidedByCostThreshold =
false;
656 bool DecidedByCostBenefit =
false;
659 std::optional<CostBenefitPair> CostBenefit;
661 bool SingleBB =
true;
663 unsigned SROACostSavings = 0;
664 unsigned SROACostSavingsLost = 0;
669 DenseMap<AllocaInst *, int> SROAArgCosts;
678 void updateThreshold(CallBase &
Call, Function &Callee);
680 std::optional<int> getHotCallSiteThreshold(CallBase &
Call,
681 BlockFrequencyInfo *CallerBFI);
684 void addCost(int64_t Inc) {
685 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
686 Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);
689 void onDisableSROA(AllocaInst *Arg)
override {
690 auto CostIt = SROAArgCosts.find(Arg);
691 if (CostIt == SROAArgCosts.end())
693 addCost(CostIt->second);
694 SROACostSavings -= CostIt->second;
695 SROACostSavingsLost += CostIt->second;
696 SROAArgCosts.erase(CostIt);
699 void onDisableLoadElimination()
override {
700 addCost(LoadEliminationCost);
701 LoadEliminationCost = 0;
704 bool onCallBaseVisitStart(CallBase &
Call)
override {
705 if (std::optional<int> AttrCallThresholdBonus =
707 Threshold += *AttrCallThresholdBonus;
709 if (std::optional<int> AttrCallCost =
711 addCost(*AttrCallCost);
719 void onCallPenalty()
override { addCost(
CallPenalty); }
723 void onCallArgumentSetup(
const CallBase &
Call)
override {
728 void onLoadRelativeIntrinsic()
override {
732 void onLoweredCall(Function *
F, CallBase &
Call,
743 auto IndirectCallParams = Params;
744 IndirectCallParams.DefaultThreshold =
748 InlineCostCallAnalyzer CA(*
F,
Call, IndirectCallParams,
TTI,
749 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
751 if (CA.analyze().isSuccess()) {
754 addCost(-std::max(0, CA.getThreshold() - CA.getCost()));
762 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
763 bool DefaultDestUnreachable)
override {
770 if (!DefaultDestUnreachable)
779 if (NumCaseCluster <= 3) {
783 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 *
InstrCost);
787 int64_t ExpectedNumberOfCompare =
788 getExpectedNumberOfCompare(NumCaseCluster);
789 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
797 void onInlineAsm(
const InlineAsm &Arg)
override {
802 int SectionLevel = 0;
803 int InlineAsmInstrCount = 0;
804 for (StringRef AsmStr : AsmStrs) {
806 StringRef Trimmed = AsmStr.trim();
807 size_t hashPos = Trimmed.
find(
'#');
809 Trimmed = Trimmed.
substr(0, hashPos);
828 if (SectionLevel == 0)
829 ++InlineAsmInstrCount;
831 NumInlineAsmInstructions += InlineAsmInstrCount;
835 void onMissedSimplification()
override { addCost(
InstrCost); }
837 void onInitializeSROAArg(AllocaInst *Arg)
override {
839 "Should not initialize SROA costs for null value.");
841 SROACostSavings += SROAArgCost;
842 SROAArgCosts[Arg] = SROAArgCost;
845 void onAggregateSROAUse(AllocaInst *SROAArg)
override {
846 auto CostIt = SROAArgCosts.find(SROAArg);
847 assert(CostIt != SROAArgCosts.end() &&
848 "expected this argument to have a cost");
853 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart = Cost; }
855 void onBlockAnalyzed(
const BasicBlock *BB)
override {
856 if (CostBenefitAnalysisEnabled) {
859 assert(GetBFI &&
"GetBFI must be available");
860 BlockFrequencyInfo *BFI = &(GetBFI(
F));
861 assert(BFI &&
"BFI must be available");
864 ColdSize += Cost - CostAtBBStart;
872 if (SingleBB && TI->getNumSuccessors() > 1) {
874 Threshold -= SingleBBBonus;
879 void onInstructionAnalysisStart(
const Instruction *
I)
override {
884 auto &CostDetail = InstructionCostDetailMap[
I];
885 CostDetail.CostBefore = Cost;
886 CostDetail.ThresholdBefore = Threshold;
889 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
894 auto &CostDetail = InstructionCostDetailMap[
I];
895 CostDetail.CostAfter = Cost;
896 CostDetail.ThresholdAfter = Threshold;
899 bool isCostBenefitAnalysisEnabled() {
900 if (!PSI || !PSI->hasProfileSummary())
912 if (!PSI->hasInstrumentationProfile())
917 if (!
Caller->getEntryCount())
920 BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
925 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
929 auto EntryCount =
F.getEntryCount();
930 if (!EntryCount || !EntryCount->getCount())
933 BlockFrequencyInfo *CalleeBFI = &(GetBFI(
F));
941 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
948 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
954 void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings,
int &
Size) {
956 CandidateCall,
"inline-cycle-savings-for-test")) {
957 CycleSavings = *AttrCycleSavings;
961 CandidateCall,
"inline-runtime-cost-for-test")) {
962 Size = *AttrRuntimeCost;
969 std::optional<bool> costBenefitAnalysis() {
970 if (!CostBenefitAnalysisEnabled)
981 BlockFrequencyInfo *CalleeBFI = &(GetBFI(
F));
994 APInt CycleSavings(128, 0);
997 APInt CurrentSavings(128, 0);
1001 if (BI->isConditional() &&
1002 getSimplifiedValue<ConstantInt>(BI->getCondition())) {
1006 if (getSimplifiedValue<ConstantInt>(
SI->getCondition()))
1010 if (SimplifiedValues.
count(V)) {
1018 CycleSavings += CurrentSavings;
1022 auto EntryProfileCount =
F.getEntryCount();
1023 assert(EntryProfileCount && EntryProfileCount->getCount());
1024 auto EntryCount = EntryProfileCount->getCount();
1025 CycleSavings += EntryCount / 2;
1026 CycleSavings = CycleSavings.
udiv(EntryCount);
1029 auto *CallerBB = CandidateCall.
getParent();
1030 BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
1037 int Size = Cost - ColdSize;
1043 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
1044 CostBenefit.emplace(APInt(128,
Size), CycleSavings);
1067 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
1070 APInt UpperBoundCycleSavings = CycleSavings;
1071 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1072 if (UpperBoundCycleSavings.
uge(Threshold))
1075 APInt LowerBoundCycleSavings = CycleSavings;
1076 LowerBoundCycleSavings *=
1077 getInliningCostBenefitAnalysisProfitableMultiplier();
1078 if (LowerBoundCycleSavings.
ult(Threshold))
1082 return std::nullopt;
1085 InlineResult finalizeAnalysis()
override {
1092 if (
Caller->hasMinSize()) {
1093 DominatorTree DT(
F);
1096 for (Loop *L : LI) {
1098 if (DeadBlocks.
count(
L->getHeader()))
1108 if (NumVectorInstructions <= NumInstructions / 10)
1109 Threshold -= VectorBonus;
1110 else if (NumVectorInstructions <= NumInstructions / 2)
1111 Threshold -= VectorBonus / 2;
1113 if (std::optional<int> AttrCost =
1120 Cost *= *AttrCostMult;
1122 if (std::optional<int> AttrThreshold =
1124 Threshold = *AttrThreshold;
1126 if (
auto Result = costBenefitAnalysis()) {
1127 DecidedByCostBenefit =
true;
1134 if (IgnoreThreshold)
1137 DecidedByCostThreshold =
true;
1138 return Cost < std::max(1, Threshold)
1140 : InlineResult::
failure(
"Cost over threshold.");
1143 bool shouldStop()
override {
1144 if (IgnoreThreshold || ComputeFullInlineCost)
1148 if (Cost < Threshold)
1150 DecidedByCostThreshold =
true;
1154 void onLoadEliminationOpportunity()
override {
1158 InlineResult onAnalysisStart()
override {
1169 assert(NumInstructions == 0);
1170 assert(NumVectorInstructions == 0);
1173 updateThreshold(CandidateCall,
F);
1179 assert(SingleBBBonus >= 0);
1180 assert(VectorBonus >= 0);
1185 Threshold += (SingleBBBonus + VectorBonus);
1193 if (
F.getCallingConv() == CallingConv::Cold)
1199 if (Cost >= Threshold && !ComputeFullInlineCost)
1206 InlineCostCallAnalyzer(
1207 Function &Callee, CallBase &
Call,
const InlineParams &Params,
1208 const TargetTransformInfo &
TTI,
1209 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1210 function_ref<BlockFrequencyInfo &(Function &)> GetBFI =
nullptr,
1211 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI =
nullptr,
1212 ProfileSummaryInfo *PSI =
nullptr,
1213 OptimizationRemarkEmitter *ORE =
nullptr,
bool BoostIndirect =
true,
1214 bool IgnoreThreshold =
false,
1215 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1217 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1218 ORE, GetEphValuesCache),
1220 Params.ComputeFullInlineCost || ORE ||
1221 isCostBenefitAnalysisEnabled()),
1223 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1224 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1226 AllowRecursiveCall = *Params.AllowRecursiveCall;
1230 InlineCostAnnotationWriter Writer;
1236 void print(raw_ostream &OS);
1238 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1239 auto It = InstructionCostDetailMap.find(
I);
1240 if (It != InstructionCostDetailMap.end())
1242 return std::nullopt;
1245 ~InlineCostCallAnalyzer()
override =
default;
1246 int getThreshold()
const {
return Threshold; }
1247 int getCost()
const {
return Cost; }
1248 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1249 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1250 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1251 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1255static bool isSoleCallToLocalFunction(
const CallBase &CB,
1257 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1261class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1268 static constexpr int JTCostMultiplier = 2;
1269 static constexpr int CaseClusterCostMultiplier = 2;
1270 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1271 static constexpr int SwitchCostMultiplier = 2;
1275 unsigned SROACostSavingOpportunities = 0;
1276 int VectorBonus = 0;
1277 int SingleBBBonus = 0;
1280 DenseMap<AllocaInst *, unsigned> SROACosts;
1283 Cost[
static_cast<size_t>(Feature)] += Delta;
1287 Cost[
static_cast<size_t>(Feature)] =
Value;
1290 void onDisableSROA(AllocaInst *Arg)
override {
1291 auto CostIt = SROACosts.find(Arg);
1292 if (CostIt == SROACosts.end())
1295 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1296 SROACostSavingOpportunities -= CostIt->second;
1297 SROACosts.erase(CostIt);
1300 void onDisableLoadElimination()
override {
1301 set(InlineCostFeatureIndex::load_elimination, 1);
1304 void onCallPenalty()
override {
1305 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1308 void onCallArgumentSetup(
const CallBase &
Call)
override {
1309 increment(InlineCostFeatureIndex::call_argument_setup,
1313 void onLoadRelativeIntrinsic()
override {
1314 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1317 void onLoweredCall(Function *
F, CallBase &
Call,
1319 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1323 InlineParams IndirectCallParams = { 0,
1336 InlineCostCallAnalyzer CA(*
F,
Call, IndirectCallParams,
TTI,
1337 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1339 if (CA.analyze().isSuccess()) {
1340 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1342 increment(InlineCostFeatureIndex::nested_inlines, 1);
1349 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
1350 bool DefaultDestUnreachable)
override {
1351 if (JumpTableSize) {
1352 if (!DefaultDestUnreachable)
1353 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1354 SwitchDefaultDestCostMultiplier *
InstrCost);
1355 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1357 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1361 if (NumCaseCluster <= 3) {
1362 increment(InlineCostFeatureIndex::case_cluster_penalty,
1363 (NumCaseCluster - DefaultDestUnreachable) *
1368 int64_t ExpectedNumberOfCompare =
1369 getExpectedNumberOfCompare(NumCaseCluster);
1371 int64_t SwitchCost =
1372 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1373 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1376 void onMissedSimplification()
override {
1377 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1381 void onInitializeSROAArg(AllocaInst *Arg)
override {
1383 SROACosts[Arg] = SROAArgCost;
1384 SROACostSavingOpportunities += SROAArgCost;
1387 void onAggregateSROAUse(AllocaInst *Arg)
override {
1388 SROACosts.find(Arg)->second +=
InstrCost;
1389 SROACostSavingOpportunities +=
InstrCost;
1392 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1394 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1395 Threshold -= SingleBBBonus;
1398 InlineResult finalizeAnalysis()
override {
1400 if (
Caller->hasMinSize()) {
1401 DominatorTree DT(
F);
1403 for (Loop *L : LI) {
1405 if (DeadBlocks.
count(
L->getHeader()))
1407 increment(InlineCostFeatureIndex::num_loops,
1411 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1412 set(InlineCostFeatureIndex::simplified_instructions,
1413 NumInstructionsSimplified);
1414 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1415 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1416 NumConstantOffsetPtrArgs);
1417 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1419 if (NumVectorInstructions <= NumInstructions / 10)
1420 Threshold -= VectorBonus;
1421 else if (NumVectorInstructions <= NumInstructions / 2)
1422 Threshold -= VectorBonus / 2;
1424 set(InlineCostFeatureIndex::threshold, Threshold);
1429 bool shouldStop()
override {
return false; }
1431 void onLoadEliminationOpportunity()
override {
1432 increment(InlineCostFeatureIndex::load_elimination, 1);
1435 InlineResult onAnalysisStart()
override {
1436 increment(InlineCostFeatureIndex::callsite_cost,
1439 set(InlineCostFeatureIndex::cold_cc_penalty,
1440 (
F.getCallingConv() == CallingConv::Cold));
1442 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1443 isSoleCallToLocalFunction(CandidateCall,
F));
1448 int SingleBBBonusPercent = 50;
1452 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1453 VectorBonus = Threshold * VectorBonusPercent / 100;
1454 Threshold += (SingleBBBonus + VectorBonus);
1460 InlineCostFeaturesAnalyzer(
1461 const TargetTransformInfo &
TTI,
1462 function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1463 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1464 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI,
1465 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
1467 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI,
1476bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1477 return SROAArgValues.
count(V);
1480void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1481 onDisableSROA(SROAArg);
1482 EnabledSROAAllocas.
erase(SROAArg);
1483 disableLoadElimination();
1486void InlineCostAnnotationWriter::emitInstructionAnnot(
1487 const Instruction *
I, formatted_raw_ostream &OS) {
1491 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1493 OS <<
"; No analysis for the instruction";
1495 OS <<
"; cost before = " <<
Record->CostBefore
1496 <<
", cost after = " <<
Record->CostAfter
1497 <<
", threshold before = " <<
Record->ThresholdBefore
1498 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1499 OS <<
"cost delta = " <<
Record->getCostDelta();
1500 if (
Record->hasThresholdChanged())
1501 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1503 auto *
V = ICCA->getSimplifiedValueUnchecked(
const_cast<Instruction *
>(
I));
1505 OS <<
", simplified to ";
1508 if (
VI->getFunction() !=
I->getFunction())
1509 OS <<
" (caller instruction)";
1511 if (VArg->getParent() !=
I->getFunction())
1512 OS <<
" (caller argument)";
1519void CallAnalyzer::disableSROA(
Value *V) {
1520 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1521 disableSROAForArg(SROAArg);
1525void CallAnalyzer::disableLoadElimination() {
1526 if (EnableLoadElimination) {
1527 onDisableLoadElimination();
1528 EnableLoadElimination =
false;
1536bool CallAnalyzer::accumulateGEPOffset(GEPOperator &
GEP, APInt &
Offset) {
1537 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1541 GTI != GTE; ++GTI) {
1543 getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand());
1550 if (StructType *STy = GTI.getStructTypeOrNull()) {
1552 const StructLayout *SL =
DL.getStructLayout(STy);
1557 APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(
DL));
1566bool CallAnalyzer::isGEPFree(GetElementPtrInst &
GEP) {
1567 SmallVector<Value *, 4> Operands;
1569 for (
const Use &
Op :
GEP.indices())
1570 if (Constant *SimpleOp = getSimplifiedValue<Constant>(
Op))
1579bool CallAnalyzer::visitAlloca(AllocaInst &
I) {
1580 disableSROA(
I.getOperand(0));
1584 if (
I.isArrayAllocation()) {
1585 Constant *
Size = getSimplifiedValue<Constant>(
I.getArraySize());
1595 Type *Ty =
I.getAllocatedType();
1597 AllocSize->getLimitedValue(),
1598 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1600 HasDynamicAlloca =
true;
1605 if (
I.isStaticAlloca()) {
1617 HasDynamicAlloca =
true;
1623bool CallAnalyzer::visitPHI(PHINode &
I) {
1635 bool CheckSROA =
I.getType()->isPointerTy();
1639 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1640 Value *FirstV =
nullptr;
1642 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1645 if (DeadBlocks.
count(Pred))
1649 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1650 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1653 Value *
V =
I.getIncomingValue(i);
1658 Constant *
C = getDirectOrSimplifiedValue<Constant>(V);
1660 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1661 if (!
C && CheckSROA)
1662 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1664 if (!
C && !BaseAndOffset.first)
1681 if (FirstBaseAndOffset == BaseAndOffset)
1695 FirstBaseAndOffset = BaseAndOffset;
1700 SimplifiedValues[&
I] = FirstC;
1705 if (FirstBaseAndOffset.first) {
1706 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1708 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1709 SROAArgValues[&
I] = SROAArg;
1719bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &
I) {
1721 std::pair<Value *, APInt> BaseAndOffset =
1722 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1723 if (!BaseAndOffset.first)
1732 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1737bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &
I) {
1738 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1741 auto IsGEPOffsetConstant = [&](GetElementPtrInst &
GEP) {
1742 for (
const Use &
Op :
GEP.indices())
1743 if (!getDirectOrSimplifiedValue<Constant>(
Op))
1752 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1754 SROAArgValues[&
I] = SROAArg;
1762 disableSROAForArg(SROAArg);
1763 return isGEPFree(
I);
1769bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1773 auto *CmpOp =
Cmp.getOperand(0);
1778 auto *CallBB = CandidateCall.
getParent();
1779 auto *Predecessor = CallBB->getSinglePredecessor();
1784 if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
1789 bool ArgFound =
false;
1790 Value *FuncArg =
nullptr, *CallArg =
nullptr;
1791 for (
unsigned ArgNum = 0;
1792 ArgNum <
F.arg_size() && ArgNum < CandidateCall.
arg_size(); ArgNum++) {
1793 FuncArg =
F.getArg(ArgNum);
1795 if (FuncArg == CmpOp && CallArg != CmpOp) {
1806 CondContext CC(&Cmp);
1807 CC.Invert = (CallBB != Br->getSuccessor(0));
1809 CC.AffectedValues.insert(FuncArg);
1815 if ((ConstVal->isOne() && CC.Invert) ||
1816 (ConstVal->isZero() && !CC.Invert)) {
1817 SimplifiedValues[&
Cmp] = ConstVal;
1825bool CallAnalyzer::simplifyInstruction(Instruction &
I) {
1828 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
1836 SimplifiedValues[&
I] =
C;
1849bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1851 auto *
C = getDirectOrSimplifiedValue<Constant>(Arg);
1854 SimplifiedValues[&CB] = ConstantInt::get(RT,
C ? 1 : 0);
1858bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1868 SimplifiedValues[&CB] =
C;
1872bool CallAnalyzer::visitBitCast(BitCastInst &
I) {
1878 std::pair<Value *, APInt> BaseAndOffset =
1879 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1881 if (BaseAndOffset.first)
1882 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1885 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1886 SROAArgValues[&
I] = SROAArg;
1892bool CallAnalyzer::visitPtrToInt(PtrToIntInst &
I) {
1900 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1901 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1902 std::pair<Value *, APInt> BaseAndOffset =
1903 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1904 if (BaseAndOffset.first)
1905 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1915 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1916 SROAArgValues[&
I] = SROAArg;
1922bool CallAnalyzer::visitIntToPtr(IntToPtrInst &
I) {
1930 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1931 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1932 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1933 if (BaseAndOffset.first)
1934 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1938 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1939 SROAArgValues[&
I] = SROAArg;
1945bool CallAnalyzer::visitCastInst(CastInst &
I) {
1952 disableSROA(
I.getOperand(0));
1957 switch (
I.getOpcode()) {
1958 case Instruction::FPTrunc:
1959 case Instruction::FPExt:
1960 case Instruction::UIToFP:
1961 case Instruction::SIToFP:
1962 case Instruction::FPToUI:
1963 case Instruction::FPToSI:
1975bool CallAnalyzer::paramHasAttr(Argument *
A, Attribute::AttrKind Attr) {
1979bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1986 if (paramHasAttr(
A, Attribute::NonNull))
1992 if (isAllocaDerivedArg(V))
2001bool CallAnalyzer::allowSizeGrowth(CallBase &
Call) {
2026bool InlineCostCallAnalyzer::isColdCallSite(CallBase &
Call,
2027 BlockFrequencyInfo *CallerBFI) {
2030 if (PSI && PSI->hasProfileSummary())
2031 return PSI->isColdCallSite(
Call, CallerBFI);
2043 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
2044 auto CallerEntryFreq =
2046 return CallSiteFreq < CallerEntryFreq * ColdProb;
2050InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &
Call,
2051 BlockFrequencyInfo *CallerBFI) {
2055 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(
Call, CallerBFI))
2061 return std::nullopt;
2068 BlockFrequency CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
2069 BlockFrequency CallerEntryFreq = CallerBFI->
getEntryFreq();
2071 if (Limit && CallSiteFreq >= *Limit)
2075 return std::nullopt;
2078void InlineCostCallAnalyzer::updateThreshold(CallBase &
Call, Function &Callee) {
2080 if (!allowSizeGrowth(
Call)) {
2088 auto MinIfValid = [](
int A, std::optional<int>
B) {
2089 return B ? std::min(
A, *
B) :
A;
2093 auto MaxIfValid = [](
int A, std::optional<int>
B) {
2094 return B ? std::max(
A, *
B) :
A;
2109 int SingleBBBonusPercent = 50;
2114 auto DisallowAllBonuses = [&]() {
2115 SingleBBBonusPercent = 0;
2116 VectorBonusPercent = 0;
2117 LastCallToStaticBonus = 0;
2122 if (
Caller->hasMinSize()) {
2128 SingleBBBonusPercent = 0;
2129 VectorBonusPercent = 0;
2130 }
else if (
Caller->hasOptSize())
2135 if (!
Caller->hasMinSize()) {
2136 if (
Callee.hasFnAttribute(Attribute::InlineHint))
2146 BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
2161 DisallowAllBonuses();
2166 if (PSI->isFunctionEntryHot(&Callee)) {
2172 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2178 DisallowAllBonuses();
2190 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2191 VectorBonus = Threshold * VectorBonusPercent / 100;
2196 if (isSoleCallToLocalFunction(
Call,
F)) {
2197 addCost(-LastCallToStaticBonus);
2198 StaticBonusApplied = LastCallToStaticBonus;
2202bool CallAnalyzer::visitCmpInst(CmpInst &
I) {
2209 if (simplifyCmpInstForRecCall(
I))
2212 if (
I.getOpcode() == Instruction::FCmp)
2217 Value *LHSBase, *RHSBase;
2218 APInt LHSOffset, RHSOffset;
2219 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
2221 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
2222 if (RHSBase && LHSBase == RHSBase) {
2228 ++NumConstantPtrCmps;
2233 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2234 for (
auto *User :
I.users())
2236 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2244 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2252 if (isImplicitNullCheckCmp(
I))
2258bool CallAnalyzer::visitSub(BinaryOperator &
I) {
2262 Value *LHSBase, *RHSBase;
2263 APInt LHSOffset, RHSOffset;
2264 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
2266 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
2267 if (RHSBase && LHSBase == RHSBase) {
2273 SimplifiedValues[&
I] =
C;
2274 ++NumConstantPtrDiffs;
2282 return Base::visitSub(
I);
2285bool CallAnalyzer::visitBinaryOperator(BinaryOperator &
I) {
2287 Constant *CLHS = getDirectOrSimplifiedValue<Constant>(
LHS);
2288 Constant *CRHS = getDirectOrSimplifiedValue<Constant>(
RHS);
2290 Value *SimpleV =
nullptr;
2293 FI->getFastMathFlags(),
DL);
2299 SimplifiedValues[&
I] =
C;
2311 using namespace llvm::PatternMatch;
2312 if (
I.getType()->isFloatingPointTy() &&
2320bool CallAnalyzer::visitFNeg(UnaryOperator &
I) {
2322 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
2328 SimplifiedValues[&
I] =
C;
2339bool CallAnalyzer::visitLoad(LoadInst &
I) {
2340 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2346 if (EnableLoadElimination &&
2347 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2348 onLoadEliminationOpportunity();
2356bool CallAnalyzer::visitStore(StoreInst &
I) {
2357 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2368 disableLoadElimination();
2374bool CallAnalyzer::visitExtractValue(ExtractValueInst &
I) {
2375 Value *
Op =
I.getAggregateOperand();
2379 if (
Value *SimpleOp = getSimplifiedValueUnchecked(
Op)) {
2380 SimplifyQuery SQ(
DL);
2383 SimplifiedValues[&
I] = SimpleV;
2389 return Base::visitExtractValue(
I);
2392bool CallAnalyzer::visitInsertValue(InsertValueInst &
I) {
2398 return Base::visitInsertValue(
I);
2407bool CallAnalyzer::simplifyCallSite(Function *
F, CallBase &
Call) {
2416 SmallVector<Constant *, 4> ConstantArgs;
2419 Constant *
C = getDirectOrSimplifiedValue<Constant>(
I);
2426 SimplifiedValues[&
Call] =
C;
2433bool CallAnalyzer::isLoweredToCall(Function *
F, CallBase &
Call) {
2434 const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*
F) : nullptr;
2440 case LibFunc_memcpy_chk:
2441 case LibFunc_memmove_chk:
2442 case LibFunc_mempcpy_chk:
2443 case LibFunc_memset_chk: {
2450 auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(
Call.
getOperand(2));
2453 if (LenOp && ObjSizeOp &&
2454 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2466bool CallAnalyzer::visitCallBase(CallBase &
Call) {
2467 if (!onCallBaseVisitStart(
Call))
2471 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2473 ExposesReturnsTwice =
true;
2477 ContainsNoDuplicateCall =
true;
2480 onInlineAsm(*InlineAsmOp);
2488 F = getSimplifiedValue<Function>(Callee);
2490 onCallArgumentSetup(
Call);
2493 disableLoadElimination();
2494 return Base::visitCallBase(
Call);
2498 assert(
F &&
"Expected a call to a known function");
2501 if (simplifyCallSite(
F,
Call))
2507 switch (
II->getIntrinsicID()) {
2510 disableLoadElimination();
2511 return Base::visitCallBase(
Call);
2513 case Intrinsic::load_relative:
2514 onLoadRelativeIntrinsic();
2517 case Intrinsic::memset:
2518 case Intrinsic::memcpy:
2519 case Intrinsic::memmove:
2520 disableLoadElimination();
2523 case Intrinsic::icall_branch_funnel:
2524 case Intrinsic::localescape:
2525 HasUninlineableIntrinsic =
true;
2527 case Intrinsic::vastart:
2528 InitsVargArgs =
true;
2530 case Intrinsic::launder_invariant_group:
2531 case Intrinsic::strip_invariant_group:
2532 if (
auto *SROAArg = getSROAArgForValueOrNull(
II->getOperand(0)))
2533 SROAArgValues[
II] = SROAArg;
2535 case Intrinsic::is_constant:
2536 return simplifyIntrinsicCallIsConstant(
Call);
2537 case Intrinsic::objectsize:
2538 return simplifyIntrinsicCallObjectSize(
Call);
2545 IsRecursiveCall =
true;
2546 if (!AllowRecursiveCall)
2550 if (isLoweredToCall(
F,
Call)) {
2555 disableLoadElimination();
2556 return Base::visitCallBase(
Call);
2559bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2561 bool Free = !HasReturn;
2566bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2572 getDirectOrSimplifiedValue<ConstantInt>(BI.
getCondition()) ||
2576bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2577 bool CheckSROA =
SI.getType()->isPointerTy();
2581 Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
2582 Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
2583 Constant *CondC = getSimplifiedValue<Constant>(
SI.getCondition());
2587 if (TrueC == FalseC && TrueC) {
2588 SimplifiedValues[&
SI] = TrueC;
2593 return Base::visitSelectInst(SI);
2595 std::pair<Value *, APInt> TrueBaseAndOffset =
2596 ConstantOffsetPtrs.
lookup(TrueVal);
2597 std::pair<Value *, APInt> FalseBaseAndOffset =
2598 ConstantOffsetPtrs.
lookup(FalseVal);
2599 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2600 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2602 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2603 SROAArgValues[&
SI] = SROAArg;
2607 return Base::visitSelectInst(SI);
2618 if (TrueC && FalseC) {
2620 SimplifiedValues[&
SI] =
C;
2624 return Base::visitSelectInst(SI);
2629 SimplifiedValues[&
SI] = SelectedC;
2636 std::pair<Value *, APInt> BaseAndOffset =
2637 ConstantOffsetPtrs.
lookup(SelectedV);
2638 if (BaseAndOffset.first) {
2639 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2641 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2642 SROAArgValues[&
SI] = SROAArg;
2648bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2651 if (getDirectOrSimplifiedValue<ConstantInt>(
SI.getCondition()))
2666 unsigned JumpTableSize = 0;
2667 BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(
F)) : nullptr;
2668 unsigned NumCaseCluster =
2671 onFinalizeSwitch(JumpTableSize, NumCaseCluster,
SI.defaultDestUnreachable());
2675bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2684 HasIndirectBr =
true;
2688bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2694bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2700bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2706bool CallAnalyzer::visitUnreachableInst(UnreachableInst &
I) {
2713bool CallAnalyzer::visitInstruction(Instruction &
I) {
2722 for (
const Use &
Op :
I.operands())
2736CallAnalyzer::analyzeBlock(BasicBlock *BB,
2737 const SmallPtrSetImpl<const Value *> &EphValues) {
2738 for (Instruction &
I : *BB) {
2747 if (
I.isDebugOrPseudoInst())
2756 ++NumVectorInstructions;
2763 onInstructionAnalysisStart(&
I);
2765 if (Base::visit(&
I))
2766 ++NumInstructionsSimplified;
2768 onMissedSimplification();
2770 onInstructionAnalysisFinish(&
I);
2771 using namespace ore;
2774 if (IsRecursiveCall && !AllowRecursiveCall)
2776 else if (ExposesReturnsTwice)
2778 else if (HasDynamicAlloca)
2780 else if (HasIndirectBr)
2782 else if (HasUninlineableIntrinsic)
2784 else if (InitsVargArgs)
2786 if (!
IR.isSuccess()) {
2789 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NeverInline",
2791 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2792 <<
NV(
"InlineResult",
IR.getFailureReason())
2793 <<
") and cost is not fully computed";
2806 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NeverInline",
2808 <<
NV(
"Callee", &
F) <<
" is "
2809 <<
NV(
"InlineResult",
IR.getFailureReason())
2810 <<
". Cost is not fully computed";
2817 "Call site analysis is not favorable to inlining.");
2829ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2830 if (!
V->getType()->isPointerTy())
2833 unsigned AS =
V->getType()->getPointerAddressSpace();
2834 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2839 SmallPtrSet<Value *, 4> Visited;
2843 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2845 V =
GEP->getPointerOperand();
2847 if (GA->isInterposable())
2849 V = GA->getAliasee();
2853 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2854 }
while (Visited.
insert(V).second);
2856 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2867void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2871 if (DeadBlocks.
count(Pred))
2873 BasicBlock *KnownSucc = KnownSuccessors[Pred];
2874 return KnownSucc && KnownSucc != Succ;
2879 return (!DeadBlocks.
count(BB) &&
2881 [&](BasicBlock *
P) {
return IsEdgeDead(
P, BB); }));
2884 for (BasicBlock *Succ :
successors(CurrBB)) {
2885 if (Succ == NextBB || !IsNewlyDead(Succ))
2889 while (!NewDead.
empty()) {
2907InlineResult CallAnalyzer::analyze() {
2910 auto Result = onAnalysisStart();
2919 for (User *U :
Caller->users()) {
2922 IsCallerRecursive =
true;
2930 for (Argument &FAI :
F.args()) {
2932 SimplifiedValues[&FAI] = *CAI;
2936 Value *PtrArg = *CAI;
2937 if (ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2938 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2942 SROAArgValues[&FAI] = SROAArg;
2943 onInitializeSROAArg(SROAArg);
2944 EnabledSROAAllocas.
insert(SROAArg);
2949 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2950 NumAllocaArgs = SROAArgValues.
size();
2954 SmallPtrSet<const Value *, 32> EphValuesStorage;
2955 const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2956 if (GetEphValuesCache)
2957 EphValues = &GetEphValuesCache(
F).ephValues();
2969 typedef SmallSetVector<BasicBlock *, 16> BBSetVector;
2970 BBSetVector BBWorklist;
2971 BBWorklist.insert(&
F.getEntryBlock());
2974 for (
unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2997 InlineResult
IR = analyzeBlock(BB, *EphValues);
2998 if (!
IR.isSuccess())
3008 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3010 BBWorklist.insert(NextBB);
3011 KnownSuccessors[BB] = NextBB;
3012 findDeadBlocks(BB, NextBB);
3018 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3019 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
3020 BBWorklist.insert(NextBB);
3021 KnownSuccessors[BB] = NextBB;
3022 findDeadBlocks(BB, NextBB);
3031 onBlockAnalyzed(BB);
3037 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
3047 FinalStackSizeThreshold = *AttrMaxStackSize;
3048 if (AllocatedSize > FinalStackSizeThreshold)
3051 return finalizeAnalysis();
3054void InlineCostCallAnalyzer::print(raw_ostream &OS) {
3055#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3057 F.print(OS, &Writer);
3072#undef DEBUG_PRINT_STAT
3075#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3089 auto CalleeTLI = GetTLI(*Callee);
3091 TTI.areInlineCompatible(Caller, Callee)) &&
3092 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
3094 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
3100 for (
unsigned I = 0, E =
Call.arg_size();
I != E; ++
I) {
3101 if (
Call.isByValArgument(
I)) {
3107 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
3109 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
3117 NumStores = std::min(NumStores, 8U);
3130 return std::min<int64_t>(
Cost, INT_MAX);
3141 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3162 InlineCostCallAnalyzer CA(*
Call.getCalledFunction(),
Call, Params, CalleeTTI,
3163 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
true,
3165 auto R = CA.analyze();
3167 return std::nullopt;
3168 return CA.getCost();
3177 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3178 PSI, ORE, *
Call.getCalledFunction(),
Call);
3179 auto R = CFA.analyze();
3181 return std::nullopt;
3182 return CFA.features();
3197 if (Callee->isPresplitCoroutine())
3205 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3206 for (
unsigned I = 0, E =
Call.arg_size();
I != E; ++
I)
3207 if (
Call.isByValArgument(
I)) {
3216 if (
Call.hasFnAttr(Attribute::AlwaysInline)) {
3217 if (
Call.getAttributes().hasFnAttr(Attribute::NoInline))
3221 if (IsViable.isSuccess())
3233 if (Caller->hasOptNone())
3238 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3242 if (Callee->isInterposable())
3246 if (Callee->hasFnAttribute(Attribute::NoInline))
3250 if (
Call.isNoInline())
3254 if (Callee->hasFnAttribute(
"loader-replaceable"))
3257 return std::nullopt;
3273 if (UserDecision->isSuccess())
3280 "Inlining forced by -inline-all-viable-calls");
3283 <<
"... (caller:" <<
Call.getCaller()->getName()
3286 InlineCostCallAnalyzer CA(*Callee,
Call, Params, CalleeTTI,
3287 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3297 if (CA.wasDecidedByCostBenefit()) {
3300 CA.getCostBenefitPair());
3305 if (CA.wasDecidedByCostThreshold())
3307 CA.getStaticBonusApplied());
3316 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3326 for (
auto &
II : BB) {
3343 switch (Callee->getIntrinsicID()) {
3346 case llvm::Intrinsic::icall_branch_funnel:
3350 "disallowed inlining of @llvm.icall.branch.funnel");
3351 case llvm::Intrinsic::localescape:
3355 "disallowed inlining of @llvm.localescape");
3356 case llvm::Intrinsic::vastart:
3360 "contains VarArgs initialized with va_start");
3431 unsigned SizeOptLevel) {
3434 if (SizeOptLevel == 1)
3436 if (SizeOptLevel == 2)
3479 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params,
TTI,
3480 GetAssumptionCache,
nullptr,
nullptr, PSI,
3483 OS <<
" Analyzing call of " << CalledFunction->
getName()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static bool IsIndirectCall(const MachineInstr *MI)
static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PointerType * getType() const
Overload to return most specific pointer type.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getEntryFreq() const
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool onlyReadsMemory(unsigned OpNo) const
Value * getCalledOperand() const
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
LLVM_ABI bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
A cache of ephemeral values within a function.
Type * getReturnType() const
const BasicBlock & getEntryBlock() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
TypeSize getElementOffset(unsigned Idx) const
Analysis pass providing the TargetTransformInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getZero()
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
LLVM_ABI int getInstrCost()
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Function::ProfileCount ProfileCount
LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
DWARFExpression::Operation Op
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.