33#include "llvm/Config/llvm-config.h"
55#define DEBUG_TYPE "inline-cost"
57STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
61 cl::desc(
"Default amount of inlining to perform"));
70 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
71 "during inline cost calculation"));
75 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
79 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
83 cl::desc(
"Threshold for inlining functions with inline hint"));
88 cl::desc(
"Threshold for inlining cold callsites"));
92 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
99 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
106 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
107 "savings won't justify the cost"));
111 cl::desc(
"The maximum size of a callee that get's "
112 "inlined without sufficient cycle savings"));
119 cl::desc(
"Threshold for inlining functions with cold attribute"));
123 cl::desc(
"Threshold for hot callsites "));
127 cl::desc(
"Threshold for locally hot callsites "));
131 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
132 "entry frequency, for a callsite to be cold in the absence of "
133 "profile information."));
137 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
138 "entry frequency, for a callsite to be hot in the absence of "
139 "profile information."));
143 cl::desc(
"Cost of a single instruction when inlining"));
147 cl::desc(
"Cost of a single inline asm instruction when inlining"));
151 cl::desc(
"Cost of load/store instruction when inlining"));
155 cl::desc(
"Call penalty that is applied per callsite when inlining"));
159 cl::init(std::numeric_limits<size_t>::max()),
160 cl::desc(
"Do not inline functions with a stack size "
161 "that exceeds the specified limit"));
166 cl::desc(
"Do not inline recursive functions with a stack "
167 "size that exceeds the specified limit"));
171 cl::desc(
"Compute the full inline cost of a call site even when the cost "
172 "exceeds the threshold."));
176 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
181 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
185 cl::desc(
"Inline all viable calls, even if they exceed the inlining "
213class InlineCostCallAnalyzer;
217struct InstructionCostDetail {
220 int ThresholdBefore = 0;
221 int ThresholdAfter = 0;
223 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
225 int getCostDelta()
const {
return CostAfter - CostBefore; }
227 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
232 InlineCostCallAnalyzer *
const ICCA;
235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
236 void emitInstructionAnnot(
const Instruction *
I,
237 formatted_raw_ostream &OS)
override;
248class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
249 typedef InstVisitor<CallAnalyzer, bool> Base;
250 friend class InstVisitor<CallAnalyzer, bool>;
253 virtual ~CallAnalyzer() =
default;
255 const TargetTransformInfo &TTI;
258 function_ref<AssumptionCache &(
Function &)> GetAssumptionCache;
261 function_ref<BlockFrequencyInfo &(
Function &)> GetBFI;
264 function_ref<
const TargetLibraryInfo &(
Function &)> GetTLI;
267 ProfileSummaryInfo *PSI;
273 const DataLayout &DL;
276 OptimizationRemarkEmitter *ORE;
281 CallBase &CandidateCall;
284 function_ref<EphemeralValuesCache &(
Function &)> GetEphValuesCache =
nullptr;
288 virtual void onBlockStart(
const BasicBlock *BB) {}
291 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
294 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
297 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
307 virtual bool shouldStop() {
return false; }
316 virtual void onDisableSROA(AllocaInst *Arg) {}
319 virtual void onDisableLoadElimination() {}
323 virtual bool onCallBaseVisitStart(CallBase &
Call) {
return true; }
326 virtual void onCallPenalty() {}
329 virtual void onMemAccess(){};
333 virtual void onLoadEliminationOpportunity() {}
337 virtual void onCallArgumentSetup(
const CallBase &
Call) {}
340 virtual void onLoadRelativeIntrinsic() {}
348 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
352 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
356 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
357 bool DefaultDestUnreachable) {}
361 virtual void onMissedSimplification() {}
364 virtual void onInlineAsm(
const InlineAsm &Arg) {}
367 virtual void onInitializeSROAArg(AllocaInst *Arg) {}
370 virtual void onAggregateSROAUse(AllocaInst *V) {}
372 bool handleSROA(
Value *V,
bool DoNotDisable) {
374 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
376 onAggregateSROAUse(SROAArg);
379 disableSROAForArg(SROAArg);
384 bool IsCallerRecursive =
false;
385 bool IsRecursiveCall =
false;
386 bool ExposesReturnsTwice =
false;
387 bool HasDynamicAlloca =
false;
388 bool ContainsNoDuplicateCall =
false;
389 bool HasReturn =
false;
390 bool HasIndirectBr =
false;
391 bool HasUninlineableIntrinsic =
false;
392 bool InitsVargArgs =
false;
395 uint64_t AllocatedSize = 0;
396 unsigned NumInstructions = 0;
397 unsigned NumInlineAsmInstructions = 0;
398 unsigned NumVectorInstructions = 0;
408 DenseMap<Value *, Value *> SimplifiedValues;
412 DenseMap<Value *, AllocaInst *> SROAArgValues;
415 DenseSet<AllocaInst *> EnabledSROAAllocas;
418 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
421 SmallPtrSet<BasicBlock *, 16> DeadBlocks;
425 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
430 bool EnableLoadElimination =
true;
433 bool AllowRecursiveCall =
false;
435 SmallPtrSet<Value *, 16> LoadAddrSet;
437 AllocaInst *getSROAArgForValueOrNull(
Value *V)
const {
438 auto It = SROAArgValues.find(V);
439 if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
446 template <
typename T>
T *getDirectOrSimplifiedValue(
Value *V)
const {
449 return getSimplifiedValue<T>(V);
453 bool isAllocaDerivedArg(
Value *V);
454 void disableSROAForArg(AllocaInst *SROAArg);
455 void disableSROA(
Value *V);
456 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
457 void disableLoadElimination();
458 bool isGEPFree(GetElementPtrInst &
GEP);
459 bool canFoldInboundsGEP(GetElementPtrInst &
I);
460 bool accumulateGEPOffset(GEPOperator &
GEP, APInt &
Offset);
461 bool simplifyCallSite(Function *F, CallBase &
Call);
462 bool simplifyCmpInstForRecCall(CmpInst &Cmp);
464 bool simplifyIntrinsicCallIsConstant(CallBase &CB);
465 bool simplifyIntrinsicCallObjectSize(CallBase &CB);
466 ConstantInt *stripAndComputeInBoundsConstantOffsets(
Value *&V);
467 bool isLoweredToCall(Function *F, CallBase &
Call);
474 bool paramHasAttr(Argument *
A, Attribute::AttrKind Attr);
478 bool isKnownNonNullInCallee(
Value *V);
481 bool allowSizeGrowth(CallBase &
Call);
484 InlineResult analyzeBlock(BasicBlock *BB,
485 const SmallPtrSetImpl<const Value *> &EphValues);
491 void visit(Function *);
492 void visit(Function &);
493 void visit(BasicBlock *);
494 void visit(BasicBlock &);
497 bool visitInstruction(Instruction &
I);
500 bool visitAlloca(AllocaInst &
I);
501 bool visitPHI(PHINode &
I);
502 bool visitGetElementPtr(GetElementPtrInst &
I);
503 bool visitBitCast(BitCastInst &
I);
504 bool visitPtrToInt(PtrToIntInst &
I);
505 bool visitIntToPtr(IntToPtrInst &
I);
506 bool visitCastInst(CastInst &
I);
507 bool visitCmpInst(CmpInst &
I);
508 bool visitSub(BinaryOperator &
I);
509 bool visitBinaryOperator(BinaryOperator &
I);
510 bool visitFNeg(UnaryOperator &
I);
511 bool visitLoad(LoadInst &
I);
512 bool visitStore(StoreInst &
I);
513 bool visitExtractValue(ExtractValueInst &
I);
514 bool visitInsertValue(InsertValueInst &
I);
515 bool visitCallBase(CallBase &
Call);
516 bool visitReturnInst(ReturnInst &RI);
517 bool visitUncondBrInst(UncondBrInst &BI);
518 bool visitCondBrInst(CondBrInst &BI);
519 bool visitSelectInst(SelectInst &SI);
520 bool visitSwitchInst(SwitchInst &SI);
521 bool visitIndirectBrInst(IndirectBrInst &IBI);
522 bool visitResumeInst(ResumeInst &RI);
523 bool visitCleanupReturnInst(CleanupReturnInst &RI);
524 bool visitCatchReturnInst(CatchReturnInst &RI);
525 bool visitUnreachableInst(UnreachableInst &
I);
529 Function &Callee, CallBase &
Call,
const TargetTransformInfo &TTI,
530 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
531 function_ref<BlockFrequencyInfo &(Function &)> GetBFI =
nullptr,
532 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI =
nullptr,
533 ProfileSummaryInfo *PSI =
nullptr,
534 OptimizationRemarkEmitter *ORE =
nullptr,
535 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
537 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
538 GetTLI(GetTLI), PSI(PSI), F(
Callee), DL(F.getDataLayout()), ORE(ORE),
539 CandidateCall(
Call), GetEphValuesCache(GetEphValuesCache) {}
541 InlineResult analyze();
544 Value *getSimplifiedValueUnchecked(
Value *V)
const {
545 return SimplifiedValues.lookup(V);
550 template <
typename T>
T *getSimplifiedValue(
Value *V)
const {
551 Value *SimpleV = SimplifiedValues.lookup(V);
557 if constexpr (std::is_base_of_v<Constant, T>)
562 if (
I->getFunction() != &F)
565 if (Arg->getParent() != &F)
574 unsigned NumConstantArgs = 0;
575 unsigned NumConstantOffsetPtrArgs = 0;
576 unsigned NumAllocaArgs = 0;
577 unsigned NumConstantPtrCmps = 0;
578 unsigned NumConstantPtrDiffs = 0;
579 unsigned NumInstructionsSimplified = 0;
599int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
600 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
605class InlineCostCallAnalyzer final :
public CallAnalyzer {
606 const bool ComputeFullInlineCost;
607 int LoadEliminationCost = 0;
612 int SingleBBBonus = 0;
615 const InlineParams &Params;
620 DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
627 int StaticBonusApplied = 0;
630 const bool BoostIndirectCalls;
633 const bool IgnoreThreshold;
636 const bool CostBenefitAnalysisEnabled;
647 int CostAtBBStart = 0;
654 bool DecidedByCostThreshold =
false;
657 bool DecidedByCostBenefit =
false;
660 std::optional<CostBenefitPair> CostBenefit;
662 bool SingleBB =
true;
664 unsigned SROACostSavings = 0;
665 unsigned SROACostSavingsLost = 0;
670 DenseMap<AllocaInst *, int> SROAArgCosts;
679 void updateThreshold(CallBase &
Call, Function &Callee);
681 std::optional<int> getHotCallSiteThreshold(CallBase &
Call,
682 BlockFrequencyInfo *CallerBFI);
685 void addCost(int64_t Inc) {
686 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
687 Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);
690 void onDisableSROA(AllocaInst *Arg)
override {
691 auto CostIt = SROAArgCosts.find(Arg);
692 if (CostIt == SROAArgCosts.end())
694 addCost(CostIt->second);
695 SROACostSavings -= CostIt->second;
696 SROACostSavingsLost += CostIt->second;
697 SROAArgCosts.erase(CostIt);
700 void onDisableLoadElimination()
override {
701 addCost(LoadEliminationCost);
702 LoadEliminationCost = 0;
705 bool onCallBaseVisitStart(CallBase &
Call)
override {
706 if (std::optional<int> AttrCallThresholdBonus =
708 Threshold += *AttrCallThresholdBonus;
710 if (std::optional<int> AttrCallCost =
712 addCost(*AttrCallCost);
720 void onCallPenalty()
override { addCost(
CallPenalty); }
724 void onCallArgumentSetup(
const CallBase &
Call)
override {
729 void onLoadRelativeIntrinsic()
override {
733 void onLoweredCall(Function *
F, CallBase &
Call,
744 auto IndirectCallParams = Params;
745 IndirectCallParams.DefaultThreshold =
749 InlineCostCallAnalyzer CA(*
F,
Call, IndirectCallParams,
TTI,
750 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
752 if (CA.analyze().isSuccess()) {
755 addCost(-std::max(0, CA.getThreshold() - CA.getCost()));
763 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
764 bool DefaultDestUnreachable)
override {
771 if (!DefaultDestUnreachable)
780 if (NumCaseCluster <= 3) {
784 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 *
InstrCost);
788 int64_t ExpectedNumberOfCompare =
789 getExpectedNumberOfCompare(NumCaseCluster);
790 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
798 void onInlineAsm(
const InlineAsm &Arg)
override {
803 int SectionLevel = 0;
804 int InlineAsmInstrCount = 0;
805 for (StringRef AsmStr : AsmStrs) {
807 StringRef Trimmed = AsmStr.trim();
808 size_t hashPos = Trimmed.
find(
'#');
810 Trimmed = Trimmed.
substr(0, hashPos);
829 if (SectionLevel == 0)
830 ++InlineAsmInstrCount;
832 NumInlineAsmInstructions += InlineAsmInstrCount;
836 void onMissedSimplification()
override { addCost(
InstrCost); }
838 void onInitializeSROAArg(AllocaInst *Arg)
override {
840 "Should not initialize SROA costs for null value.");
842 SROACostSavings += SROAArgCost;
843 SROAArgCosts[Arg] = SROAArgCost;
846 void onAggregateSROAUse(AllocaInst *SROAArg)
override {
847 auto CostIt = SROAArgCosts.find(SROAArg);
848 assert(CostIt != SROAArgCosts.end() &&
849 "expected this argument to have a cost");
854 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart = Cost; }
856 void onBlockAnalyzed(
const BasicBlock *BB)
override {
857 if (CostBenefitAnalysisEnabled) {
860 assert(GetBFI &&
"GetBFI must be available");
861 BlockFrequencyInfo *BFI = &(GetBFI(
F));
862 assert(BFI &&
"BFI must be available");
864 if (*ProfileCount == 0)
865 ColdSize += Cost - CostAtBBStart;
873 if (SingleBB && TI->getNumSuccessors() > 1) {
875 Threshold -= SingleBBBonus;
880 void onInstructionAnalysisStart(
const Instruction *
I)
override {
885 auto &CostDetail = InstructionCostDetailMap[
I];
886 CostDetail.CostBefore = Cost;
887 CostDetail.ThresholdBefore = Threshold;
890 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
895 auto &CostDetail = InstructionCostDetailMap[
I];
896 CostDetail.CostAfter = Cost;
897 CostDetail.ThresholdAfter = Threshold;
900 bool isCostBenefitAnalysisEnabled() {
901 if (!PSI || !PSI->hasProfileSummary())
913 if (!PSI->hasInstrumentationProfile())
918 if (!
Caller->getEntryCount())
921 BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
926 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
930 auto EntryCount =
F.getEntryCount();
931 if (!EntryCount || *EntryCount == 0)
934 BlockFrequencyInfo *CalleeBFI = &(GetBFI(
F));
942 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
949 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
955 void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings,
int &
Size) {
957 CandidateCall,
"inline-cycle-savings-for-test")) {
958 CycleSavings = *AttrCycleSavings;
962 CandidateCall,
"inline-runtime-cost-for-test")) {
963 Size = *AttrRuntimeCost;
970 std::optional<bool> costBenefitAnalysis() {
971 if (!CostBenefitAnalysisEnabled)
982 BlockFrequencyInfo *CalleeBFI = &(GetBFI(
F));
995 APInt CycleSavings(128, 0);
998 APInt CurrentSavings(128, 0);
1002 if (getSimplifiedValue<ConstantInt>(BI->getCondition()))
1005 if (getSimplifiedValue<ConstantInt>(
SI->getCondition()))
1007 }
else if (SimplifiedValues.
count(&
I)) {
1014 CurrentSavings *= *ProfileCount;
1015 CycleSavings += CurrentSavings;
1019 auto EntryProfileCount =
F.getEntryCount();
1020 assert(EntryProfileCount && *EntryProfileCount);
1021 CycleSavings += *EntryProfileCount / 2;
1022 CycleSavings = CycleSavings.
udiv(*EntryProfileCount);
1025 auto *CallerBB = CandidateCall.
getParent();
1026 BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
1033 int Size = Cost - ColdSize;
1039 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
1040 CostBenefit.emplace(APInt(128,
Size), CycleSavings);
1063 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
1066 APInt UpperBoundCycleSavings = CycleSavings;
1067 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1068 if (UpperBoundCycleSavings.
uge(Threshold))
1071 APInt LowerBoundCycleSavings = CycleSavings;
1072 LowerBoundCycleSavings *=
1073 getInliningCostBenefitAnalysisProfitableMultiplier();
1074 if (LowerBoundCycleSavings.
ult(Threshold))
1078 return std::nullopt;
1081 InlineResult finalizeAnalysis()
override {
1088 if (
Caller->hasMinSize()) {
1089 DominatorTree DT(
F);
1092 for (Loop *L : LI) {
1094 if (DeadBlocks.
count(
L->getHeader()))
1104 if (NumVectorInstructions <= NumInstructions / 10)
1105 Threshold -= VectorBonus;
1106 else if (NumVectorInstructions <= NumInstructions / 2)
1107 Threshold -= VectorBonus / 2;
1109 if (std::optional<int> AttrCost =
1116 Cost *= *AttrCostMult;
1118 if (std::optional<int> AttrThreshold =
1120 Threshold = *AttrThreshold;
1122 if (
auto Result = costBenefitAnalysis()) {
1123 DecidedByCostBenefit =
true;
1130 if (IgnoreThreshold)
1133 DecidedByCostThreshold =
true;
1134 return Cost < std::max(1, Threshold)
1136 : InlineResult::
failure(
"Cost over threshold.");
1139 bool shouldStop()
override {
1140 if (IgnoreThreshold || ComputeFullInlineCost)
1144 if (Cost < Threshold)
1146 DecidedByCostThreshold =
true;
1150 void onLoadEliminationOpportunity()
override {
1154 InlineResult onAnalysisStart()
override {
1165 assert(NumInstructions == 0);
1166 assert(NumVectorInstructions == 0);
1169 updateThreshold(CandidateCall,
F);
1175 assert(SingleBBBonus >= 0);
1176 assert(VectorBonus >= 0);
1181 Threshold += (SingleBBBonus + VectorBonus);
1189 if (
F.getCallingConv() == CallingConv::Cold)
1195 if (Cost >= Threshold && !ComputeFullInlineCost)
1202 InlineCostCallAnalyzer(
1203 Function &Callee, CallBase &
Call,
const InlineParams &Params,
1204 const TargetTransformInfo &
TTI,
1205 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1206 function_ref<BlockFrequencyInfo &(Function &)> GetBFI =
nullptr,
1207 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI =
nullptr,
1208 ProfileSummaryInfo *PSI =
nullptr,
1209 OptimizationRemarkEmitter *ORE =
nullptr,
bool BoostIndirect =
true,
1210 bool IgnoreThreshold =
false,
1211 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1213 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1214 ORE, GetEphValuesCache),
1216 Params.ComputeFullInlineCost || ORE ||
1217 isCostBenefitAnalysisEnabled()),
1219 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1220 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1222 AllowRecursiveCall = *Params.AllowRecursiveCall;
1226 InlineCostAnnotationWriter Writer;
1232 void print(raw_ostream &OS);
1234 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1235 auto It = InstructionCostDetailMap.find(
I);
1236 if (It != InstructionCostDetailMap.end())
1238 return std::nullopt;
1241 ~InlineCostCallAnalyzer()
override =
default;
1242 int getThreshold()
const {
return Threshold; }
1243 int getCost()
const {
return Cost; }
1244 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1245 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1246 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1247 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1251static bool isSoleCallToLocalFunction(
const CallBase &CB,
1253 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1257class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1264 static constexpr int JTCostMultiplier = 2;
1265 static constexpr int CaseClusterCostMultiplier = 2;
1266 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1267 static constexpr int SwitchCostMultiplier = 2;
1271 unsigned SROACostSavingOpportunities = 0;
1272 int VectorBonus = 0;
1273 int SingleBBBonus = 0;
1276 DenseMap<AllocaInst *, unsigned> SROACosts;
1279 Cost[
static_cast<size_t>(Feature)] += Delta;
1283 Cost[
static_cast<size_t>(Feature)] =
Value;
1286 void onDisableSROA(AllocaInst *Arg)
override {
1287 auto CostIt = SROACosts.find(Arg);
1288 if (CostIt == SROACosts.end())
1291 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1292 SROACostSavingOpportunities -= CostIt->second;
1293 SROACosts.erase(CostIt);
1296 void onDisableLoadElimination()
override {
1297 set(InlineCostFeatureIndex::load_elimination, 1);
1300 void onCallPenalty()
override {
1301 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1304 void onCallArgumentSetup(
const CallBase &
Call)
override {
1305 increment(InlineCostFeatureIndex::call_argument_setup,
1309 void onLoadRelativeIntrinsic()
override {
1310 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1313 void onLoweredCall(Function *
F, CallBase &
Call,
1315 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1319 InlineParams IndirectCallParams = { 0,
1333 InlineCostCallAnalyzer CA(*
F,
Call, IndirectCallParams,
TTI,
1334 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1336 if (CA.analyze().isSuccess()) {
1337 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1339 increment(InlineCostFeatureIndex::nested_inlines, 1);
1346 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
1347 bool DefaultDestUnreachable)
override {
1348 if (JumpTableSize) {
1349 if (!DefaultDestUnreachable)
1350 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1351 SwitchDefaultDestCostMultiplier *
InstrCost);
1352 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1354 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1358 if (NumCaseCluster <= 3) {
1359 increment(InlineCostFeatureIndex::case_cluster_penalty,
1360 (NumCaseCluster - DefaultDestUnreachable) *
1365 int64_t ExpectedNumberOfCompare =
1366 getExpectedNumberOfCompare(NumCaseCluster);
1368 int64_t SwitchCost =
1369 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1370 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1373 void onMissedSimplification()
override {
1374 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1378 void onInitializeSROAArg(AllocaInst *Arg)
override {
1380 SROACosts[Arg] = SROAArgCost;
1381 SROACostSavingOpportunities += SROAArgCost;
1384 void onAggregateSROAUse(AllocaInst *Arg)
override {
1385 SROACosts.find(Arg)->second +=
InstrCost;
1386 SROACostSavingOpportunities +=
InstrCost;
1389 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1391 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1392 Threshold -= SingleBBBonus;
1395 InlineResult finalizeAnalysis()
override {
1397 if (
Caller->hasMinSize()) {
1398 DominatorTree DT(
F);
1400 for (Loop *L : LI) {
1402 if (DeadBlocks.
count(
L->getHeader()))
1404 increment(InlineCostFeatureIndex::num_loops,
1408 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1409 set(InlineCostFeatureIndex::simplified_instructions,
1410 NumInstructionsSimplified);
1411 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1412 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1413 NumConstantOffsetPtrArgs);
1414 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1416 if (NumVectorInstructions <= NumInstructions / 10)
1417 Threshold -= VectorBonus;
1418 else if (NumVectorInstructions <= NumInstructions / 2)
1419 Threshold -= VectorBonus / 2;
1421 set(InlineCostFeatureIndex::threshold, Threshold);
1426 bool shouldStop()
override {
return false; }
1428 void onLoadEliminationOpportunity()
override {
1429 increment(InlineCostFeatureIndex::load_elimination, 1);
1432 InlineResult onAnalysisStart()
override {
1433 increment(InlineCostFeatureIndex::callsite_cost,
1436 set(InlineCostFeatureIndex::cold_cc_penalty,
1437 (
F.getCallingConv() == CallingConv::Cold));
1439 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1440 isSoleCallToLocalFunction(CandidateCall,
F));
1445 int SingleBBBonusPercent = 50;
1449 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1450 VectorBonus = Threshold * VectorBonusPercent / 100;
1451 Threshold += (SingleBBBonus + VectorBonus);
1457 InlineCostFeaturesAnalyzer(
1458 const TargetTransformInfo &
TTI,
1459 function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1460 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1461 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI,
1462 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
1464 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI,
1473bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1474 return SROAArgValues.
count(V);
1477void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1478 onDisableSROA(SROAArg);
1479 EnabledSROAAllocas.
erase(SROAArg);
1480 disableLoadElimination();
1483void InlineCostAnnotationWriter::emitInstructionAnnot(
1484 const Instruction *
I, formatted_raw_ostream &OS) {
1488 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1490 OS <<
"; No analysis for the instruction";
1492 OS <<
"; cost before = " <<
Record->CostBefore
1493 <<
", cost after = " <<
Record->CostAfter
1494 <<
", threshold before = " <<
Record->ThresholdBefore
1495 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1496 OS <<
"cost delta = " <<
Record->getCostDelta();
1497 if (
Record->hasThresholdChanged())
1498 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1500 auto *
V = ICCA->getSimplifiedValueUnchecked(
const_cast<Instruction *
>(
I));
1502 OS <<
", simplified to ";
1505 if (
VI->getFunction() !=
I->getFunction())
1506 OS <<
" (caller instruction)";
1508 if (VArg->getParent() !=
I->getFunction())
1509 OS <<
" (caller argument)";
1516void CallAnalyzer::disableSROA(
Value *V) {
1517 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1518 disableSROAForArg(SROAArg);
1522void CallAnalyzer::disableLoadElimination() {
1523 if (EnableLoadElimination) {
1524 onDisableLoadElimination();
1525 EnableLoadElimination =
false;
1533bool CallAnalyzer::accumulateGEPOffset(GEPOperator &
GEP, APInt &
Offset) {
1534 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1538 GTI != GTE; ++GTI) {
1540 getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand());
1547 if (StructType *STy = GTI.getStructTypeOrNull()) {
1549 const StructLayout *SL =
DL.getStructLayout(STy);
1554 APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(
DL));
1563bool CallAnalyzer::isGEPFree(GetElementPtrInst &
GEP) {
1564 SmallVector<Value *, 4> Operands;
1566 for (
const Use &
Op :
GEP.indices())
1567 if (Constant *SimpleOp = getSimplifiedValue<Constant>(
Op))
1576bool CallAnalyzer::visitAlloca(AllocaInst &
I) {
1577 disableSROA(
I.getOperand(0));
1581 if (
I.isArrayAllocation()) {
1582 Constant *
Size = getSimplifiedValue<Constant>(
I.getArraySize());
1592 Type *Ty =
I.getAllocatedType();
1594 AllocSize->getLimitedValue(),
1595 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1597 HasDynamicAlloca =
true;
1602 if (
I.isStaticAlloca()) {
1614 HasDynamicAlloca =
true;
1620bool CallAnalyzer::visitPHI(PHINode &
I) {
1632 bool CheckSROA =
I.getType()->isPointerTy();
1636 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1637 Value *FirstV =
nullptr;
1639 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1642 if (DeadBlocks.
count(Pred))
1646 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1647 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1650 Value *
V =
I.getIncomingValue(i);
1655 Constant *
C = getDirectOrSimplifiedValue<Constant>(V);
1657 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1658 if (!
C && CheckSROA)
1659 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1661 if (!
C && !BaseAndOffset.first)
1678 if (FirstBaseAndOffset == BaseAndOffset)
1692 FirstBaseAndOffset = BaseAndOffset;
1697 SimplifiedValues[&
I] = FirstC;
1702 if (FirstBaseAndOffset.first) {
1703 ConstantOffsetPtrs[&
I] = std::move(FirstBaseAndOffset);
1705 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1706 SROAArgValues[&
I] = SROAArg;
1716bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &
I) {
1718 std::pair<Value *, APInt> BaseAndOffset =
1719 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1720 if (!BaseAndOffset.first)
1729 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1734bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &
I) {
1735 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1738 auto IsGEPOffsetConstant = [&](GetElementPtrInst &
GEP) {
1739 for (
const Use &
Op :
GEP.indices())
1740 if (!getDirectOrSimplifiedValue<Constant>(
Op))
1749 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1751 SROAArgValues[&
I] = SROAArg;
1759 disableSROAForArg(SROAArg);
1760 return isGEPFree(
I);
1766bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1770 auto *CmpOp =
Cmp.getOperand(0);
1775 auto *CallBB = CandidateCall.
getParent();
1776 auto *Predecessor = CallBB->getSinglePredecessor();
1781 if (!Br || Br->getCondition() != &Cmp)
1786 bool ArgFound =
false;
1787 Value *FuncArg =
nullptr, *CallArg =
nullptr;
1788 for (
unsigned ArgNum = 0;
1789 ArgNum <
F.arg_size() && ArgNum < CandidateCall.
arg_size(); ArgNum++) {
1790 FuncArg =
F.getArg(ArgNum);
1792 if (FuncArg == CmpOp && CallArg != CmpOp) {
1803 CondContext CC(&Cmp);
1804 CC.Invert = (CallBB != Br->getSuccessor(0));
1806 CC.AffectedValues.insert(FuncArg);
1812 if ((ConstVal->isOne() && CC.Invert) ||
1813 (ConstVal->isZero() && !CC.Invert)) {
1814 SimplifiedValues[&
Cmp] = ConstVal;
1822bool CallAnalyzer::simplifyInstruction(Instruction &
I) {
1825 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
1833 SimplifiedValues[&
I] =
C;
1846bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1848 auto *
C = getDirectOrSimplifiedValue<Constant>(Arg);
1851 SimplifiedValues[&CB] = ConstantInt::get(RT,
C ? 1 : 0);
1855bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1865 SimplifiedValues[&CB] =
C;
1869bool CallAnalyzer::visitBitCast(BitCastInst &
I) {
1875 std::pair<Value *, APInt> BaseAndOffset =
1876 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1878 if (BaseAndOffset.first)
1879 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1882 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1883 SROAArgValues[&
I] = SROAArg;
1889bool CallAnalyzer::visitPtrToInt(PtrToIntInst &
I) {
1897 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1898 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1899 std::pair<Value *, APInt> BaseAndOffset =
1900 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1901 if (BaseAndOffset.first)
1902 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1912 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1913 SROAArgValues[&
I] = SROAArg;
1919bool CallAnalyzer::visitIntToPtr(IntToPtrInst &
I) {
1927 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1928 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1929 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1930 if (BaseAndOffset.first)
1931 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1935 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1936 SROAArgValues[&
I] = SROAArg;
1942bool CallAnalyzer::visitCastInst(CastInst &
I) {
1949 disableSROA(
I.getOperand(0));
1954 switch (
I.getOpcode()) {
1955 case Instruction::FPTrunc:
1956 case Instruction::FPExt:
1957 case Instruction::UIToFP:
1958 case Instruction::SIToFP:
1959 case Instruction::FPToUI:
1960 case Instruction::FPToSI:
1972bool CallAnalyzer::paramHasAttr(Argument *
A, Attribute::AttrKind Attr) {
1976bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1983 if (paramHasAttr(
A, Attribute::NonNull))
1989 if (isAllocaDerivedArg(V))
1998bool CallAnalyzer::allowSizeGrowth(CallBase &
Call) {
2023bool InlineCostCallAnalyzer::isColdCallSite(CallBase &
Call,
2024 BlockFrequencyInfo *CallerBFI) {
2027 if (PSI && PSI->hasProfileSummary())
2028 return PSI->isColdCallSite(
Call, CallerBFI);
2040 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
2041 auto CallerEntryFreq =
2043 return CallSiteFreq < CallerEntryFreq * ColdProb;
2047InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &
Call,
2048 BlockFrequencyInfo *CallerBFI) {
2052 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(
Call, CallerBFI))
2058 return std::nullopt;
2065 BlockFrequency CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
2066 BlockFrequency CallerEntryFreq = CallerBFI->
getEntryFreq();
2068 if (Limit && CallSiteFreq >= *Limit)
2072 return std::nullopt;
2075void InlineCostCallAnalyzer::updateThreshold(CallBase &
Call, Function &Callee) {
2077 if (!allowSizeGrowth(
Call)) {
2085 auto MinIfValid = [](
int A, std::optional<int>
B) {
2086 return B ? std::min(
A, *
B) :
A;
2090 auto MaxIfValid = [](
int A, std::optional<int>
B) {
2091 return B ? std::max(
A, *
B) :
A;
2106 int SingleBBBonusPercent = 50;
2111 auto DisallowAllBonuses = [&]() {
2112 SingleBBBonusPercent = 0;
2113 VectorBonusPercent = 0;
2114 LastCallToStaticBonus = 0;
2119 if (
Caller->hasMinSize()) {
2125 SingleBBBonusPercent = 0;
2126 VectorBonusPercent = 0;
2127 }
else if (
Caller->hasOptSize())
2132 if (!
Caller->hasMinSize()) {
2136 if (
Callee.hasFnAttribute(Attribute::InlineHint))
2146 BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
2161 DisallowAllBonuses();
2166 if (PSI->isFunctionEntryHot(&Callee)) {
2172 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2178 DisallowAllBonuses();
2190 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2191 VectorBonus = Threshold * VectorBonusPercent / 100;
2196 if (isSoleCallToLocalFunction(
Call,
F)) {
2197 addCost(-LastCallToStaticBonus);
2198 StaticBonusApplied = LastCallToStaticBonus;
2202bool CallAnalyzer::visitCmpInst(CmpInst &
I) {
2209 if (simplifyCmpInstForRecCall(
I))
2212 if (
I.getOpcode() == Instruction::FCmp)
2217 Value *LHSBase, *RHSBase;
2218 APInt LHSOffset, RHSOffset;
2219 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
2221 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
2222 if (RHSBase && LHSBase == RHSBase) {
2228 ++NumConstantPtrCmps;
2233 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2234 for (
auto *User :
I.users())
2236 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2244 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2252 if (isImplicitNullCheckCmp(
I))
2258bool CallAnalyzer::visitSub(BinaryOperator &
I) {
2262 Value *LHSBase, *RHSBase;
2263 APInt LHSOffset, RHSOffset;
2264 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
2266 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
2267 if (RHSBase && LHSBase == RHSBase) {
2273 SimplifiedValues[&
I] =
C;
2274 ++NumConstantPtrDiffs;
2282 return Base::visitSub(
I);
2285bool CallAnalyzer::visitBinaryOperator(BinaryOperator &
I) {
2287 Constant *CLHS = getDirectOrSimplifiedValue<Constant>(
LHS);
2288 Constant *CRHS = getDirectOrSimplifiedValue<Constant>(
RHS);
2290 Value *SimpleV =
nullptr;
2293 FI->getFastMathFlags(),
DL);
2299 SimplifiedValues[&
I] =
C;
2311 using namespace llvm::PatternMatch;
2312 if (
I.getType()->isFloatingPointTy() &&
2320bool CallAnalyzer::visitFNeg(UnaryOperator &
I) {
2322 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
2328 SimplifiedValues[&
I] =
C;
2339bool CallAnalyzer::visitLoad(LoadInst &
I) {
2340 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2346 if (EnableLoadElimination &&
2347 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2348 onLoadEliminationOpportunity();
2356bool CallAnalyzer::visitStore(StoreInst &
I) {
2357 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2368 disableLoadElimination();
2374bool CallAnalyzer::visitExtractValue(ExtractValueInst &
I) {
2375 Value *
Op =
I.getAggregateOperand();
2379 if (
Value *SimpleOp = getSimplifiedValueUnchecked(
Op)) {
2380 SimplifyQuery SQ(
DL);
2383 SimplifiedValues[&
I] = SimpleV;
2389 return Base::visitExtractValue(
I);
2392bool CallAnalyzer::visitInsertValue(InsertValueInst &
I) {
2398 return Base::visitInsertValue(
I);
2407bool CallAnalyzer::simplifyCallSite(Function *
F, CallBase &
Call) {
2416 SmallVector<Constant *, 4> ConstantArgs;
2419 Constant *
C = getDirectOrSimplifiedValue<Constant>(
I);
2426 SimplifiedValues[&
Call] =
C;
2433bool CallAnalyzer::isLoweredToCall(Function *
F, CallBase &
Call) {
2434 const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*
F) : nullptr;
2440 case LibFunc_memcpy_chk:
2441 case LibFunc_memmove_chk:
2442 case LibFunc_mempcpy_chk:
2443 case LibFunc_memset_chk: {
2450 auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(
Call.
getOperand(2));
2453 if (LenOp && ObjSizeOp &&
2454 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2466bool CallAnalyzer::visitCallBase(CallBase &
Call) {
2467 if (!onCallBaseVisitStart(
Call))
2471 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2473 ExposesReturnsTwice =
true;
2477 ContainsNoDuplicateCall =
true;
2480 onInlineAsm(*InlineAsmOp);
2488 F = getSimplifiedValue<Function>(Callee);
2490 onCallArgumentSetup(
Call);
2493 disableLoadElimination();
2494 return Base::visitCallBase(
Call);
2498 assert(
F &&
"Expected a call to a known function");
2501 if (simplifyCallSite(
F,
Call))
2507 switch (
II->getIntrinsicID()) {
2510 disableLoadElimination();
2511 return Base::visitCallBase(
Call);
2513 case Intrinsic::load_relative:
2514 onLoadRelativeIntrinsic();
2517 case Intrinsic::memset:
2518 case Intrinsic::memcpy:
2519 case Intrinsic::memmove:
2520 disableLoadElimination();
2523 case Intrinsic::icall_branch_funnel:
2524 case Intrinsic::localescape:
2525 HasUninlineableIntrinsic =
true;
2527 case Intrinsic::vastart:
2528 InitsVargArgs =
true;
2530 case Intrinsic::launder_invariant_group:
2531 case Intrinsic::strip_invariant_group:
2532 if (
auto *SROAArg = getSROAArgForValueOrNull(
II->getOperand(0)))
2533 SROAArgValues[
II] = SROAArg;
2535 case Intrinsic::is_constant:
2536 return simplifyIntrinsicCallIsConstant(
Call);
2537 case Intrinsic::objectsize:
2538 return simplifyIntrinsicCallObjectSize(
Call);
2545 IsRecursiveCall =
true;
2546 if (!AllowRecursiveCall)
2550 if (isLoweredToCall(
F,
Call)) {
2555 disableLoadElimination();
2556 return Base::visitCallBase(
Call);
2559bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2561 bool Free = !HasReturn;
2566bool CallAnalyzer::visitUncondBrInst(UncondBrInst &BI) {
2573bool CallAnalyzer::visitCondBrInst(CondBrInst &BI) {
2575 return getDirectOrSimplifiedValue<ConstantInt>(BI.
getCondition()) ||
2576 BI.getMetadata(LLVMContext::MD_make_implicit);
2579bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2580 bool CheckSROA =
SI.getType()->isPointerTy();
2584 Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
2585 Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
2586 Constant *CondC = getSimplifiedValue<Constant>(
SI.getCondition());
2590 if (TrueC == FalseC && TrueC) {
2591 SimplifiedValues[&
SI] = TrueC;
2596 return Base::visitSelectInst(SI);
2598 std::pair<Value *, APInt> TrueBaseAndOffset =
2599 ConstantOffsetPtrs.
lookup(TrueVal);
2600 std::pair<Value *, APInt> FalseBaseAndOffset =
2601 ConstantOffsetPtrs.
lookup(FalseVal);
2602 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2603 ConstantOffsetPtrs[&
SI] = std::move(TrueBaseAndOffset);
2605 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2606 SROAArgValues[&
SI] = SROAArg;
2610 return Base::visitSelectInst(SI);
2621 if (TrueC && FalseC) {
2623 SimplifiedValues[&
SI] =
C;
2627 return Base::visitSelectInst(SI);
2632 SimplifiedValues[&
SI] = SelectedC;
2639 std::pair<Value *, APInt> BaseAndOffset =
2640 ConstantOffsetPtrs.
lookup(SelectedV);
2641 if (BaseAndOffset.first) {
2642 ConstantOffsetPtrs[&
SI] = std::move(BaseAndOffset);
2644 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2645 SROAArgValues[&
SI] = SROAArg;
2651bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2654 if (getDirectOrSimplifiedValue<ConstantInt>(
SI.getCondition()))
2669 unsigned JumpTableSize = 0;
2670 BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(
F)) : nullptr;
2671 unsigned NumCaseCluster =
2674 onFinalizeSwitch(JumpTableSize, NumCaseCluster,
SI.defaultDestUnreachable());
2678bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2687 HasIndirectBr =
true;
2691bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2697bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2703bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2709bool CallAnalyzer::visitUnreachableInst(UnreachableInst &
I) {
2716bool CallAnalyzer::visitInstruction(Instruction &
I) {
2725 for (
const Use &
Op :
I.operands())
2739CallAnalyzer::analyzeBlock(BasicBlock *BB,
2740 const SmallPtrSetImpl<const Value *> &EphValues) {
2741 for (Instruction &
I : *BB) {
2750 if (
I.isDebugOrPseudoInst())
2759 ++NumVectorInstructions;
2766 onInstructionAnalysisStart(&
I);
2768 if (Base::visit(&
I))
2769 ++NumInstructionsSimplified;
2771 onMissedSimplification();
2773 onInstructionAnalysisFinish(&
I);
2774 using namespace ore;
2777 if (IsRecursiveCall && !AllowRecursiveCall)
2779 else if (ExposesReturnsTwice)
2781 else if (HasDynamicAlloca)
2783 else if (HasIndirectBr)
2785 else if (HasUninlineableIntrinsic)
2787 else if (InitsVargArgs)
2789 if (!
IR.isSuccess()) {
2792 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NeverInline",
2794 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2795 <<
NV(
"InlineResult",
IR.getFailureReason())
2796 <<
") and cost is not fully computed";
2809 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NeverInline",
2811 <<
NV(
"Callee", &
F) <<
" is "
2812 <<
NV(
"InlineResult",
IR.getFailureReason())
2813 <<
". Cost is not fully computed";
2820 "Call site analysis is not favorable to inlining.");
2832ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2833 if (!
V->getType()->isPointerTy())
2836 unsigned AS =
V->getType()->getPointerAddressSpace();
2837 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2842 SmallPtrSet<Value *, 4> Visited;
2846 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2848 V =
GEP->getPointerOperand();
2850 if (GA->isInterposable())
2852 V = GA->getAliasee();
2856 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2857 }
while (Visited.
insert(V).second);
2859 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2870void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2874 if (DeadBlocks.
count(Pred))
2876 BasicBlock *KnownSucc = KnownSuccessors[Pred];
2877 return KnownSucc && KnownSucc != Succ;
2882 return (!DeadBlocks.
count(BB) &&
2884 [&](BasicBlock *
P) {
return IsEdgeDead(
P, BB); }));
2887 for (BasicBlock *Succ :
successors(CurrBB)) {
2888 if (Succ == NextBB || !IsNewlyDead(Succ))
2892 while (!NewDead.
empty()) {
2910InlineResult CallAnalyzer::analyze() {
2913 auto Result = onAnalysisStart();
2922 for (User *U :
Caller->users()) {
2925 IsCallerRecursive =
true;
2933 for (Argument &FAI :
F.args()) {
2935 SimplifiedValues[&FAI] = *CAI;
2939 Value *PtrArg = *CAI;
2940 if (ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2941 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2945 SROAArgValues[&FAI] = SROAArg;
2946 onInitializeSROAArg(SROAArg);
2947 EnabledSROAAllocas.
insert(SROAArg);
2952 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2953 NumAllocaArgs = SROAArgValues.
size();
2957 SmallPtrSet<const Value *, 32> EphValuesStorage;
2958 const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2959 if (GetEphValuesCache)
2960 EphValues = &GetEphValuesCache(
F).ephValues();
2972 typedef SmallSetVector<BasicBlock *, 16> BBSetVector;
2973 BBSetVector BBWorklist;
2974 BBWorklist.insert(&
F.getEntryBlock());
2977 for (
unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
3000 InlineResult
IR = analyzeBlock(BB, *EphValues);
3001 if (!
IR.isSuccess())
3010 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3012 BBWorklist.insert(NextBB);
3013 KnownSuccessors[BB] = NextBB;
3014 findDeadBlocks(BB, NextBB);
3019 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3020 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
3021 BBWorklist.insert(NextBB);
3022 KnownSuccessors[BB] = NextBB;
3023 findDeadBlocks(BB, NextBB);
3032 onBlockAnalyzed(BB);
3038 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
3048 FinalStackSizeThreshold = *AttrMaxStackSize;
3049 if (AllocatedSize > FinalStackSizeThreshold)
3052 return finalizeAnalysis();
3055void InlineCostCallAnalyzer::print(raw_ostream &OS) {
3056#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3058 F.print(OS, &Writer);
3073#undef DEBUG_PRINT_STAT
3076#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3090 auto CalleeTLI = GetTLI(*Callee);
3092 TTI.areInlineCompatible(Caller, Callee)) &&
3093 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
3095 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
3101 for (
unsigned I = 0, E =
Call.arg_size();
I != E; ++
I) {
3102 if (
Call.isByValArgument(
I)) {
3108 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
3110 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
3118 NumStores = std::min(NumStores, 8U);
3131 return std::min<int64_t>(
Cost, INT_MAX);
3142 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3164 InlineCostCallAnalyzer CA(*
Call.getCalledFunction(),
Call, Params, CalleeTTI,
3165 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
true,
3167 auto R = CA.analyze();
3169 return std::nullopt;
3170 return CA.getCost();
3179 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3180 PSI, ORE, *
Call.getCalledFunction(),
Call);
3181 auto R = CFA.analyze();
3183 return std::nullopt;
3184 return CFA.features();
3199 if (Callee->isPresplitCoroutine())
3207 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3208 for (
unsigned I = 0, E =
Call.arg_size();
I != E; ++
I)
3209 if (
Call.isByValArgument(
I)) {
3218 if (
Call.hasFnAttr(Attribute::AlwaysInline)) {
3219 if (
Call.getAttributes().hasFnAttr(Attribute::NoInline))
3223 if (IsViable.isSuccess())
3236 if (Caller->hasFnAttribute(Attribute::Flatten)) {
3238 if (IsViable.isSuccess())
3244 if (Caller->hasOptNone())
3248 if (Callee->isInterposable(
false))
3252 if (Callee->hasFnAttribute(Attribute::NoInline))
3256 if (
Call.isNoInline())
3260 if (Callee->hasFnAttribute(
"loader-replaceable"))
3263 return std::nullopt;
3279 if (UserDecision->isSuccess())
3286 "Inlining forced by -inline-all-viable-calls");
3289 <<
"... (caller:" <<
Call.getCaller()->getName()
3292 InlineCostCallAnalyzer CA(*Callee,
Call, Params, CalleeTTI,
3293 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3303 if (CA.wasDecidedByCostBenefit()) {
3306 CA.getCostBenefitPair());
3311 if (CA.wasDecidedByCostThreshold())
3313 CA.getStaticBonusApplied());
3322 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3332 for (
auto &
II : BB) {
3349 switch (Callee->getIntrinsicID()) {
3352 case llvm::Intrinsic::icall_branch_funnel:
3356 "disallowed inlining of @llvm.icall.branch.funnel");
3357 case llvm::Intrinsic::localescape:
3361 "disallowed inlining of @llvm.localescape");
3362 case llvm::Intrinsic::vastart:
3366 "contains VarArgs initialized with va_start");
3475 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params,
TTI,
3476 GetAssumptionCache,
nullptr,
nullptr, PSI,
3479 OS <<
" Analyzing call of " << CalledFunction->
getName()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI)
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static bool IsIndirectCall(const MachineInstr *MI)
static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PointerType * getType() const
Overload to return most specific pointer type.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getEntryFreq() const
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool onlyReadsMemory(unsigned OpNo) const
Value * getCalledOperand() const
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
LLVM_ABI bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
A cache of ephemeral values within a function.
Type * getReturnType() const
const BasicBlock & getEntryBlock() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
static constexpr size_t npos
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
TypeSize getElementOffset(unsigned Idx) const
Analysis pass providing the TargetTransformInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getZero()
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
LLVM_ABI int getInstrCost()
bool match(Val *V, const Pattern &P)
auto m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
DWARFExpression::Operation Op
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI InlineParams getInlineParamsFromOptLevel(unsigned OptLevel)
Generate the parameters to tune the inline cost analysis based on command line options.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > OptSizeHintThreshold
Threshold to use for callees with inline hint, when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.