33#include "llvm/Config/llvm-config.h"
55#define DEBUG_TYPE "inline-cost"
57STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
61 cl::desc(
"Default amount of inlining to perform"));
70 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
71 "during inline cost calculation"));
75 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
79 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
83 cl::desc(
"Threshold for inlining functions with inline hint"));
88 cl::desc(
"Threshold for inlining cold callsites"));
92 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
99 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
106 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
107 "savings won't justify the cost"));
111 cl::desc(
"The maximum size of a callee that get's "
112 "inlined without sufficient cycle savings"));
119 cl::desc(
"Threshold for inlining functions with cold attribute"));
123 cl::desc(
"Threshold for hot callsites "));
127 cl::desc(
"Threshold for locally hot callsites "));
131 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
132 "entry frequency, for a callsite to be cold in the absence of "
133 "profile information."));
137 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
138 "entry frequency, for a callsite to be hot in the absence of "
139 "profile information."));
143 cl::desc(
"Cost of a single instruction when inlining"));
147 cl::desc(
"Cost of a single inline asm instruction when inlining"));
151 cl::desc(
"Cost of load/store instruction when inlining"));
155 cl::desc(
"Call penalty that is applied per callsite when inlining"));
159 cl::init(std::numeric_limits<size_t>::max()),
160 cl::desc(
"Do not inline functions with a stack size "
161 "that exceeds the specified limit"));
166 cl::desc(
"Do not inline recursive functions with a stack "
167 "size that exceeds the specified limit"));
171 cl::desc(
"Compute the full inline cost of a call site even when the cost "
172 "exceeds the threshold."));
176 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
181 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
185 cl::desc(
"Inline all viable calls, even if they exceed the inlining "
213class InlineCostCallAnalyzer;
217struct InstructionCostDetail {
220 int ThresholdBefore = 0;
221 int ThresholdAfter = 0;
223 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
225 int getCostDelta()
const {
return CostAfter - CostBefore; }
227 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
232 InlineCostCallAnalyzer *
const ICCA;
235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
236 void emitInstructionAnnot(
const Instruction *
I,
237 formatted_raw_ostream &OS)
override;
248class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
249 typedef InstVisitor<CallAnalyzer, bool> Base;
250 friend class InstVisitor<CallAnalyzer, bool>;
253 virtual ~CallAnalyzer() =
default;
255 const TargetTransformInfo &TTI;
258 function_ref<AssumptionCache &(
Function &)> GetAssumptionCache;
261 function_ref<BlockFrequencyInfo &(
Function &)> GetBFI;
264 function_ref<
const TargetLibraryInfo &(
Function &)> GetTLI;
267 ProfileSummaryInfo *PSI;
273 const DataLayout &DL;
276 OptimizationRemarkEmitter *ORE;
281 CallBase &CandidateCall;
284 function_ref<EphemeralValuesCache &(
Function &)> GetEphValuesCache =
nullptr;
288 virtual void onBlockStart(
const BasicBlock *BB) {}
291 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
294 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
297 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
307 virtual bool shouldStop() {
return false; }
316 virtual void onDisableSROA(AllocaInst *Arg) {}
319 virtual void onDisableLoadElimination() {}
323 virtual bool onCallBaseVisitStart(CallBase &
Call) {
return true; }
326 virtual void onCallPenalty() {}
329 virtual void onMemAccess(){};
333 virtual void onLoadEliminationOpportunity() {}
337 virtual void onCallArgumentSetup(
const CallBase &
Call) {}
340 virtual void onLoadRelativeIntrinsic() {}
348 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
352 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
356 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
357 bool DefaultDestUnreachable) {}
361 virtual void onMissedSimplification() {}
364 virtual void onInlineAsm(
const InlineAsm &Arg) {}
367 virtual void onInitializeSROAArg(AllocaInst *Arg) {}
370 virtual void onAggregateSROAUse(AllocaInst *V) {}
372 bool handleSROA(
Value *V,
bool DoNotDisable) {
374 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
376 onAggregateSROAUse(SROAArg);
379 disableSROAForArg(SROAArg);
384 bool IsCallerRecursive =
false;
385 bool IsRecursiveCall =
false;
386 bool ExposesReturnsTwice =
false;
387 bool HasDynamicAlloca =
false;
388 bool ContainsNoDuplicateCall =
false;
389 bool HasReturn =
false;
390 bool HasIndirectBr =
false;
391 bool HasUninlineableIntrinsic =
false;
392 bool InitsVargArgs =
false;
395 uint64_t AllocatedSize = 0;
396 unsigned NumInstructions = 0;
397 unsigned NumInlineAsmInstructions = 0;
398 unsigned NumVectorInstructions = 0;
408 DenseMap<Value *, Value *> SimplifiedValues;
412 DenseMap<Value *, AllocaInst *> SROAArgValues;
415 DenseSet<AllocaInst *> EnabledSROAAllocas;
418 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
421 SmallPtrSet<BasicBlock *, 16> DeadBlocks;
425 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
430 bool EnableLoadElimination =
true;
433 bool AllowRecursiveCall =
false;
435 SmallPtrSet<Value *, 16> LoadAddrSet;
437 AllocaInst *getSROAArgForValueOrNull(
Value *V)
const {
438 auto It = SROAArgValues.find(V);
439 if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
446 template <
typename T>
T *getDirectOrSimplifiedValue(
Value *V)
const {
449 return getSimplifiedValue<T>(V);
453 bool isAllocaDerivedArg(
Value *V);
454 void disableSROAForArg(AllocaInst *SROAArg);
455 void disableSROA(
Value *V);
456 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
457 void disableLoadElimination();
458 bool isGEPFree(GetElementPtrInst &
GEP);
459 bool canFoldInboundsGEP(GetElementPtrInst &
I);
460 bool accumulateGEPOffset(GEPOperator &
GEP, APInt &
Offset);
461 bool simplifyCallSite(Function *F, CallBase &
Call);
462 bool simplifyCmpInstForRecCall(CmpInst &Cmp);
464 bool simplifyIntrinsicCallIsConstant(CallBase &CB);
465 bool simplifyIntrinsicCallObjectSize(CallBase &CB);
466 ConstantInt *stripAndComputeInBoundsConstantOffsets(
Value *&V);
467 bool isLoweredToCall(Function *F, CallBase &
Call);
474 bool paramHasAttr(Argument *
A, Attribute::AttrKind Attr);
478 bool isKnownNonNullInCallee(
Value *V);
481 bool allowSizeGrowth(CallBase &
Call);
484 InlineResult analyzeBlock(BasicBlock *BB,
485 const SmallPtrSetImpl<const Value *> &EphValues);
491 void visit(Function *);
492 void visit(Function &);
493 void visit(BasicBlock *);
494 void visit(BasicBlock &);
497 bool visitInstruction(Instruction &
I);
500 bool visitAlloca(AllocaInst &
I);
501 bool visitPHI(PHINode &
I);
502 bool visitGetElementPtr(GetElementPtrInst &
I);
503 bool visitBitCast(BitCastInst &
I);
504 bool visitPtrToInt(PtrToIntInst &
I);
505 bool visitIntToPtr(IntToPtrInst &
I);
506 bool visitCastInst(CastInst &
I);
507 bool visitCmpInst(CmpInst &
I);
508 bool visitSub(BinaryOperator &
I);
509 bool visitBinaryOperator(BinaryOperator &
I);
510 bool visitFNeg(UnaryOperator &
I);
511 bool visitLoad(LoadInst &
I);
512 bool visitStore(StoreInst &
I);
513 bool visitExtractValue(ExtractValueInst &
I);
514 bool visitInsertValue(InsertValueInst &
I);
515 bool visitCallBase(CallBase &
Call);
516 bool visitReturnInst(ReturnInst &RI);
517 bool visitUncondBrInst(UncondBrInst &BI);
518 bool visitCondBrInst(CondBrInst &BI);
519 bool visitSelectInst(SelectInst &SI);
520 bool visitSwitchInst(SwitchInst &SI);
521 bool visitIndirectBrInst(IndirectBrInst &IBI);
522 bool visitResumeInst(ResumeInst &RI);
523 bool visitCleanupReturnInst(CleanupReturnInst &RI);
524 bool visitCatchReturnInst(CatchReturnInst &RI);
525 bool visitUnreachableInst(UnreachableInst &
I);
529 Function &Callee, CallBase &
Call,
const TargetTransformInfo &TTI,
530 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
531 function_ref<BlockFrequencyInfo &(Function &)> GetBFI =
nullptr,
532 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI =
nullptr,
533 ProfileSummaryInfo *PSI =
nullptr,
534 OptimizationRemarkEmitter *ORE =
nullptr,
535 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
537 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
538 GetTLI(GetTLI), PSI(PSI), F(
Callee), DL(F.getDataLayout()), ORE(ORE),
539 CandidateCall(
Call), GetEphValuesCache(GetEphValuesCache) {}
541 InlineResult analyze();
544 Value *getSimplifiedValueUnchecked(
Value *V)
const {
545 return SimplifiedValues.lookup(V);
550 template <
typename T>
T *getSimplifiedValue(
Value *V)
const {
551 Value *SimpleV = SimplifiedValues.lookup(V);
557 if constexpr (std::is_base_of_v<Constant, T>)
562 if (
I->getFunction() != &F)
565 if (Arg->getParent() != &F)
574 unsigned NumConstantArgs = 0;
575 unsigned NumConstantOffsetPtrArgs = 0;
576 unsigned NumAllocaArgs = 0;
577 unsigned NumConstantPtrCmps = 0;
578 unsigned NumConstantPtrDiffs = 0;
579 unsigned NumInstructionsSimplified = 0;
599int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
600 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
605class InlineCostCallAnalyzer final :
public CallAnalyzer {
606 const bool ComputeFullInlineCost;
607 int LoadEliminationCost = 0;
612 int SingleBBBonus = 0;
615 const InlineParams &Params;
620 DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
627 int StaticBonusApplied = 0;
630 const bool BoostIndirectCalls;
633 const bool IgnoreThreshold;
636 const bool CostBenefitAnalysisEnabled;
647 int CostAtBBStart = 0;
654 bool DecidedByCostThreshold =
false;
657 bool DecidedByCostBenefit =
false;
660 std::optional<CostBenefitPair> CostBenefit;
662 bool SingleBB =
true;
664 unsigned SROACostSavings = 0;
665 unsigned SROACostSavingsLost = 0;
670 DenseMap<AllocaInst *, int> SROAArgCosts;
679 void updateThreshold(CallBase &
Call, Function &Callee);
681 std::optional<int> getHotCallSiteThreshold(CallBase &
Call,
682 BlockFrequencyInfo *CallerBFI);
685 void addCost(int64_t Inc) {
686 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
687 Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);
690 void onDisableSROA(AllocaInst *Arg)
override {
691 auto CostIt = SROAArgCosts.find(Arg);
692 if (CostIt == SROAArgCosts.end())
694 addCost(CostIt->second);
695 SROACostSavings -= CostIt->second;
696 SROACostSavingsLost += CostIt->second;
697 SROAArgCosts.erase(CostIt);
700 void onDisableLoadElimination()
override {
701 addCost(LoadEliminationCost);
702 LoadEliminationCost = 0;
705 bool onCallBaseVisitStart(CallBase &
Call)
override {
706 if (std::optional<int> AttrCallThresholdBonus =
708 Threshold += *AttrCallThresholdBonus;
710 if (std::optional<int> AttrCallCost =
712 addCost(*AttrCallCost);
720 void onCallPenalty()
override { addCost(
CallPenalty); }
724 void onCallArgumentSetup(
const CallBase &
Call)
override {
729 void onLoadRelativeIntrinsic()
override {
733 void onLoweredCall(Function *
F, CallBase &
Call,
744 auto IndirectCallParams = Params;
745 IndirectCallParams.DefaultThreshold =
749 InlineCostCallAnalyzer CA(*
F,
Call, IndirectCallParams,
TTI,
750 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
752 if (CA.analyze().isSuccess()) {
755 addCost(-std::max(0, CA.getThreshold() - CA.getCost()));
763 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
764 bool DefaultDestUnreachable)
override {
771 if (!DefaultDestUnreachable)
780 if (NumCaseCluster <= 3) {
784 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 *
InstrCost);
788 int64_t ExpectedNumberOfCompare =
789 getExpectedNumberOfCompare(NumCaseCluster);
790 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
798 void onInlineAsm(
const InlineAsm &Arg)
override {
803 int SectionLevel = 0;
804 int InlineAsmInstrCount = 0;
805 for (StringRef AsmStr : AsmStrs) {
807 StringRef Trimmed = AsmStr.trim();
808 size_t hashPos = Trimmed.
find(
'#');
810 Trimmed = Trimmed.
substr(0, hashPos);
829 if (SectionLevel == 0)
830 ++InlineAsmInstrCount;
832 NumInlineAsmInstructions += InlineAsmInstrCount;
836 void onMissedSimplification()
override { addCost(
InstrCost); }
838 void onInitializeSROAArg(AllocaInst *Arg)
override {
840 "Should not initialize SROA costs for null value.");
842 SROACostSavings += SROAArgCost;
843 SROAArgCosts[Arg] = SROAArgCost;
846 void onAggregateSROAUse(AllocaInst *SROAArg)
override {
847 auto CostIt = SROAArgCosts.find(SROAArg);
848 assert(CostIt != SROAArgCosts.end() &&
849 "expected this argument to have a cost");
854 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart = Cost; }
856 void onBlockAnalyzed(
const BasicBlock *BB)
override {
857 if (CostBenefitAnalysisEnabled) {
860 assert(GetBFI &&
"GetBFI must be available");
861 BlockFrequencyInfo *BFI = &(GetBFI(
F));
862 assert(BFI &&
"BFI must be available");
865 ColdSize += Cost - CostAtBBStart;
873 if (SingleBB && TI->getNumSuccessors() > 1) {
875 Threshold -= SingleBBBonus;
880 void onInstructionAnalysisStart(
const Instruction *
I)
override {
885 auto &CostDetail = InstructionCostDetailMap[
I];
886 CostDetail.CostBefore = Cost;
887 CostDetail.ThresholdBefore = Threshold;
890 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
895 auto &CostDetail = InstructionCostDetailMap[
I];
896 CostDetail.CostAfter = Cost;
897 CostDetail.ThresholdAfter = Threshold;
900 bool isCostBenefitAnalysisEnabled() {
901 if (!PSI || !PSI->hasProfileSummary())
913 if (!PSI->hasInstrumentationProfile())
918 if (!
Caller->getEntryCount())
921 BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
926 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
930 auto EntryCount =
F.getEntryCount();
931 if (!EntryCount || !EntryCount->getCount())
934 BlockFrequencyInfo *CalleeBFI = &(GetBFI(
F));
942 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
949 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
955 void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings,
int &
Size) {
957 CandidateCall,
"inline-cycle-savings-for-test")) {
958 CycleSavings = *AttrCycleSavings;
962 CandidateCall,
"inline-runtime-cost-for-test")) {
963 Size = *AttrRuntimeCost;
970 std::optional<bool> costBenefitAnalysis() {
971 if (!CostBenefitAnalysisEnabled)
982 BlockFrequencyInfo *CalleeBFI = &(GetBFI(
F));
995 APInt CycleSavings(128, 0);
998 APInt CurrentSavings(128, 0);
1002 if (getSimplifiedValue<ConstantInt>(BI->getCondition()))
1005 if (getSimplifiedValue<ConstantInt>(
SI->getCondition()))
1009 if (SimplifiedValues.
count(V)) {
1017 CycleSavings += CurrentSavings;
1021 auto EntryProfileCount =
F.getEntryCount();
1022 assert(EntryProfileCount && EntryProfileCount->getCount());
1023 auto EntryCount = EntryProfileCount->getCount();
1024 CycleSavings += EntryCount / 2;
1025 CycleSavings = CycleSavings.
udiv(EntryCount);
1028 auto *CallerBB = CandidateCall.
getParent();
1029 BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
1036 int Size = Cost - ColdSize;
1042 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
1043 CostBenefit.emplace(APInt(128,
Size), CycleSavings);
1066 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
1069 APInt UpperBoundCycleSavings = CycleSavings;
1070 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1071 if (UpperBoundCycleSavings.
uge(Threshold))
1074 APInt LowerBoundCycleSavings = CycleSavings;
1075 LowerBoundCycleSavings *=
1076 getInliningCostBenefitAnalysisProfitableMultiplier();
1077 if (LowerBoundCycleSavings.
ult(Threshold))
1081 return std::nullopt;
1084 InlineResult finalizeAnalysis()
override {
1091 if (
Caller->hasMinSize()) {
1092 DominatorTree DT(
F);
1095 for (Loop *L : LI) {
1097 if (DeadBlocks.
count(
L->getHeader()))
1107 if (NumVectorInstructions <= NumInstructions / 10)
1108 Threshold -= VectorBonus;
1109 else if (NumVectorInstructions <= NumInstructions / 2)
1110 Threshold -= VectorBonus / 2;
1112 if (std::optional<int> AttrCost =
1119 Cost *= *AttrCostMult;
1121 if (std::optional<int> AttrThreshold =
1123 Threshold = *AttrThreshold;
1125 if (
auto Result = costBenefitAnalysis()) {
1126 DecidedByCostBenefit =
true;
1133 if (IgnoreThreshold)
1136 DecidedByCostThreshold =
true;
1137 return Cost < std::max(1, Threshold)
1139 : InlineResult::
failure(
"Cost over threshold.");
1142 bool shouldStop()
override {
1143 if (IgnoreThreshold || ComputeFullInlineCost)
1147 if (Cost < Threshold)
1149 DecidedByCostThreshold =
true;
1153 void onLoadEliminationOpportunity()
override {
1157 InlineResult onAnalysisStart()
override {
1168 assert(NumInstructions == 0);
1169 assert(NumVectorInstructions == 0);
1172 updateThreshold(CandidateCall,
F);
1178 assert(SingleBBBonus >= 0);
1179 assert(VectorBonus >= 0);
1184 Threshold += (SingleBBBonus + VectorBonus);
1192 if (
F.getCallingConv() == CallingConv::Cold)
1198 if (Cost >= Threshold && !ComputeFullInlineCost)
1205 InlineCostCallAnalyzer(
1206 Function &Callee, CallBase &
Call,
const InlineParams &Params,
1207 const TargetTransformInfo &
TTI,
1208 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1209 function_ref<BlockFrequencyInfo &(Function &)> GetBFI =
nullptr,
1210 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI =
nullptr,
1211 ProfileSummaryInfo *PSI =
nullptr,
1212 OptimizationRemarkEmitter *ORE =
nullptr,
bool BoostIndirect =
true,
1213 bool IgnoreThreshold =
false,
1214 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1216 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1217 ORE, GetEphValuesCache),
1219 Params.ComputeFullInlineCost || ORE ||
1220 isCostBenefitAnalysisEnabled()),
1222 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1223 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1225 AllowRecursiveCall = *Params.AllowRecursiveCall;
1229 InlineCostAnnotationWriter Writer;
1235 void print(raw_ostream &OS);
1237 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1238 auto It = InstructionCostDetailMap.find(
I);
1239 if (It != InstructionCostDetailMap.end())
1241 return std::nullopt;
1244 ~InlineCostCallAnalyzer()
override =
default;
1245 int getThreshold()
const {
return Threshold; }
1246 int getCost()
const {
return Cost; }
1247 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1248 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1249 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1250 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1254static bool isSoleCallToLocalFunction(
const CallBase &CB,
1256 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1260class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1267 static constexpr int JTCostMultiplier = 2;
1268 static constexpr int CaseClusterCostMultiplier = 2;
1269 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1270 static constexpr int SwitchCostMultiplier = 2;
1274 unsigned SROACostSavingOpportunities = 0;
1275 int VectorBonus = 0;
1276 int SingleBBBonus = 0;
1279 DenseMap<AllocaInst *, unsigned> SROACosts;
1282 Cost[
static_cast<size_t>(Feature)] += Delta;
1286 Cost[
static_cast<size_t>(Feature)] =
Value;
1289 void onDisableSROA(AllocaInst *Arg)
override {
1290 auto CostIt = SROACosts.find(Arg);
1291 if (CostIt == SROACosts.end())
1294 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1295 SROACostSavingOpportunities -= CostIt->second;
1296 SROACosts.erase(CostIt);
1299 void onDisableLoadElimination()
override {
1300 set(InlineCostFeatureIndex::load_elimination, 1);
1303 void onCallPenalty()
override {
1304 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1307 void onCallArgumentSetup(
const CallBase &
Call)
override {
1308 increment(InlineCostFeatureIndex::call_argument_setup,
1312 void onLoadRelativeIntrinsic()
override {
1313 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1316 void onLoweredCall(Function *
F, CallBase &
Call,
1318 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1322 InlineParams IndirectCallParams = { 0,
1335 InlineCostCallAnalyzer CA(*
F,
Call, IndirectCallParams,
TTI,
1336 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1338 if (CA.analyze().isSuccess()) {
1339 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1341 increment(InlineCostFeatureIndex::nested_inlines, 1);
1348 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
1349 bool DefaultDestUnreachable)
override {
1350 if (JumpTableSize) {
1351 if (!DefaultDestUnreachable)
1352 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1353 SwitchDefaultDestCostMultiplier *
InstrCost);
1354 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1356 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1360 if (NumCaseCluster <= 3) {
1361 increment(InlineCostFeatureIndex::case_cluster_penalty,
1362 (NumCaseCluster - DefaultDestUnreachable) *
1367 int64_t ExpectedNumberOfCompare =
1368 getExpectedNumberOfCompare(NumCaseCluster);
1370 int64_t SwitchCost =
1371 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1372 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1375 void onMissedSimplification()
override {
1376 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1380 void onInitializeSROAArg(AllocaInst *Arg)
override {
1382 SROACosts[Arg] = SROAArgCost;
1383 SROACostSavingOpportunities += SROAArgCost;
1386 void onAggregateSROAUse(AllocaInst *Arg)
override {
1387 SROACosts.find(Arg)->second +=
InstrCost;
1388 SROACostSavingOpportunities +=
InstrCost;
1391 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1393 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1394 Threshold -= SingleBBBonus;
1397 InlineResult finalizeAnalysis()
override {
1399 if (
Caller->hasMinSize()) {
1400 DominatorTree DT(
F);
1402 for (Loop *L : LI) {
1404 if (DeadBlocks.
count(
L->getHeader()))
1406 increment(InlineCostFeatureIndex::num_loops,
1410 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1411 set(InlineCostFeatureIndex::simplified_instructions,
1412 NumInstructionsSimplified);
1413 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1414 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1415 NumConstantOffsetPtrArgs);
1416 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1418 if (NumVectorInstructions <= NumInstructions / 10)
1419 Threshold -= VectorBonus;
1420 else if (NumVectorInstructions <= NumInstructions / 2)
1421 Threshold -= VectorBonus / 2;
1423 set(InlineCostFeatureIndex::threshold, Threshold);
1428 bool shouldStop()
override {
return false; }
1430 void onLoadEliminationOpportunity()
override {
1431 increment(InlineCostFeatureIndex::load_elimination, 1);
1434 InlineResult onAnalysisStart()
override {
1435 increment(InlineCostFeatureIndex::callsite_cost,
1438 set(InlineCostFeatureIndex::cold_cc_penalty,
1439 (
F.getCallingConv() == CallingConv::Cold));
1441 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1442 isSoleCallToLocalFunction(CandidateCall,
F));
1447 int SingleBBBonusPercent = 50;
1451 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1452 VectorBonus = Threshold * VectorBonusPercent / 100;
1453 Threshold += (SingleBBBonus + VectorBonus);
1459 InlineCostFeaturesAnalyzer(
1460 const TargetTransformInfo &
TTI,
1461 function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1462 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1463 function_ref<
const TargetLibraryInfo &(Function &)> GetTLI,
1464 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
1466 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, GetTLI,
1475bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1476 return SROAArgValues.
count(V);
1479void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1480 onDisableSROA(SROAArg);
1481 EnabledSROAAllocas.
erase(SROAArg);
1482 disableLoadElimination();
1485void InlineCostAnnotationWriter::emitInstructionAnnot(
1486 const Instruction *
I, formatted_raw_ostream &OS) {
1490 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1492 OS <<
"; No analysis for the instruction";
1494 OS <<
"; cost before = " <<
Record->CostBefore
1495 <<
", cost after = " <<
Record->CostAfter
1496 <<
", threshold before = " <<
Record->ThresholdBefore
1497 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1498 OS <<
"cost delta = " <<
Record->getCostDelta();
1499 if (
Record->hasThresholdChanged())
1500 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1502 auto *
V = ICCA->getSimplifiedValueUnchecked(
const_cast<Instruction *
>(
I));
1504 OS <<
", simplified to ";
1507 if (
VI->getFunction() !=
I->getFunction())
1508 OS <<
" (caller instruction)";
1510 if (VArg->getParent() !=
I->getFunction())
1511 OS <<
" (caller argument)";
1518void CallAnalyzer::disableSROA(
Value *V) {
1519 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1520 disableSROAForArg(SROAArg);
1524void CallAnalyzer::disableLoadElimination() {
1525 if (EnableLoadElimination) {
1526 onDisableLoadElimination();
1527 EnableLoadElimination =
false;
1535bool CallAnalyzer::accumulateGEPOffset(GEPOperator &
GEP, APInt &
Offset) {
1536 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1540 GTI != GTE; ++GTI) {
1542 getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand());
1549 if (StructType *STy = GTI.getStructTypeOrNull()) {
1551 const StructLayout *SL =
DL.getStructLayout(STy);
1556 APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(
DL));
1565bool CallAnalyzer::isGEPFree(GetElementPtrInst &
GEP) {
1566 SmallVector<Value *, 4> Operands;
1568 for (
const Use &
Op :
GEP.indices())
1569 if (Constant *SimpleOp = getSimplifiedValue<Constant>(
Op))
1578bool CallAnalyzer::visitAlloca(AllocaInst &
I) {
1579 disableSROA(
I.getOperand(0));
1583 if (
I.isArrayAllocation()) {
1584 Constant *
Size = getSimplifiedValue<Constant>(
I.getArraySize());
1594 Type *Ty =
I.getAllocatedType();
1596 AllocSize->getLimitedValue(),
1597 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1599 HasDynamicAlloca =
true;
1604 if (
I.isStaticAlloca()) {
1616 HasDynamicAlloca =
true;
1622bool CallAnalyzer::visitPHI(PHINode &
I) {
1634 bool CheckSROA =
I.getType()->isPointerTy();
1638 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1639 Value *FirstV =
nullptr;
1641 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1644 if (DeadBlocks.
count(Pred))
1648 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1649 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1652 Value *
V =
I.getIncomingValue(i);
1657 Constant *
C = getDirectOrSimplifiedValue<Constant>(V);
1659 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1660 if (!
C && CheckSROA)
1661 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1663 if (!
C && !BaseAndOffset.first)
1680 if (FirstBaseAndOffset == BaseAndOffset)
1694 FirstBaseAndOffset = BaseAndOffset;
1699 SimplifiedValues[&
I] = FirstC;
1704 if (FirstBaseAndOffset.first) {
1705 ConstantOffsetPtrs[&
I] = std::move(FirstBaseAndOffset);
1707 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1708 SROAArgValues[&
I] = SROAArg;
1718bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &
I) {
1720 std::pair<Value *, APInt> BaseAndOffset =
1721 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1722 if (!BaseAndOffset.first)
1731 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1736bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &
I) {
1737 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1740 auto IsGEPOffsetConstant = [&](GetElementPtrInst &
GEP) {
1741 for (
const Use &
Op :
GEP.indices())
1742 if (!getDirectOrSimplifiedValue<Constant>(
Op))
1751 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1753 SROAArgValues[&
I] = SROAArg;
1761 disableSROAForArg(SROAArg);
1762 return isGEPFree(
I);
1768bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1772 auto *CmpOp =
Cmp.getOperand(0);
1777 auto *CallBB = CandidateCall.
getParent();
1778 auto *Predecessor = CallBB->getSinglePredecessor();
1783 if (!Br || Br->getCondition() != &Cmp)
1788 bool ArgFound =
false;
1789 Value *FuncArg =
nullptr, *CallArg =
nullptr;
1790 for (
unsigned ArgNum = 0;
1791 ArgNum <
F.arg_size() && ArgNum < CandidateCall.
arg_size(); ArgNum++) {
1792 FuncArg =
F.getArg(ArgNum);
1794 if (FuncArg == CmpOp && CallArg != CmpOp) {
1805 CondContext CC(&Cmp);
1806 CC.Invert = (CallBB != Br->getSuccessor(0));
1808 CC.AffectedValues.insert(FuncArg);
1814 if ((ConstVal->isOne() && CC.Invert) ||
1815 (ConstVal->isZero() && !CC.Invert)) {
1816 SimplifiedValues[&
Cmp] = ConstVal;
1824bool CallAnalyzer::simplifyInstruction(Instruction &
I) {
1827 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
1835 SimplifiedValues[&
I] =
C;
1848bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1850 auto *
C = getDirectOrSimplifiedValue<Constant>(Arg);
1853 SimplifiedValues[&CB] = ConstantInt::get(RT,
C ? 1 : 0);
1857bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1867 SimplifiedValues[&CB] =
C;
1871bool CallAnalyzer::visitBitCast(BitCastInst &
I) {
1877 std::pair<Value *, APInt> BaseAndOffset =
1878 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1880 if (BaseAndOffset.first)
1881 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1884 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1885 SROAArgValues[&
I] = SROAArg;
1891bool CallAnalyzer::visitPtrToInt(PtrToIntInst &
I) {
1899 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1900 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1901 std::pair<Value *, APInt> BaseAndOffset =
1902 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1903 if (BaseAndOffset.first)
1904 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1914 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1915 SROAArgValues[&
I] = SROAArg;
1921bool CallAnalyzer::visitIntToPtr(IntToPtrInst &
I) {
1929 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1930 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1931 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1932 if (BaseAndOffset.first)
1933 ConstantOffsetPtrs[&
I] = std::move(BaseAndOffset);
1937 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1938 SROAArgValues[&
I] = SROAArg;
1944bool CallAnalyzer::visitCastInst(CastInst &
I) {
1951 disableSROA(
I.getOperand(0));
1956 switch (
I.getOpcode()) {
1957 case Instruction::FPTrunc:
1958 case Instruction::FPExt:
1959 case Instruction::UIToFP:
1960 case Instruction::SIToFP:
1961 case Instruction::FPToUI:
1962 case Instruction::FPToSI:
1974bool CallAnalyzer::paramHasAttr(Argument *
A, Attribute::AttrKind Attr) {
1978bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1985 if (paramHasAttr(
A, Attribute::NonNull))
1991 if (isAllocaDerivedArg(V))
2000bool CallAnalyzer::allowSizeGrowth(CallBase &
Call) {
2025bool InlineCostCallAnalyzer::isColdCallSite(CallBase &
Call,
2026 BlockFrequencyInfo *CallerBFI) {
2029 if (PSI && PSI->hasProfileSummary())
2030 return PSI->isColdCallSite(
Call, CallerBFI);
2042 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
2043 auto CallerEntryFreq =
2045 return CallSiteFreq < CallerEntryFreq * ColdProb;
2049InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &
Call,
2050 BlockFrequencyInfo *CallerBFI) {
2054 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(
Call, CallerBFI))
2060 return std::nullopt;
2067 BlockFrequency CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
2068 BlockFrequency CallerEntryFreq = CallerBFI->
getEntryFreq();
2070 if (Limit && CallSiteFreq >= *Limit)
2074 return std::nullopt;
2077void InlineCostCallAnalyzer::updateThreshold(CallBase &
Call, Function &Callee) {
2079 if (!allowSizeGrowth(
Call)) {
2087 auto MinIfValid = [](
int A, std::optional<int>
B) {
2088 return B ? std::min(
A, *
B) :
A;
2092 auto MaxIfValid = [](
int A, std::optional<int>
B) {
2093 return B ? std::max(
A, *
B) :
A;
2108 int SingleBBBonusPercent = 50;
2113 auto DisallowAllBonuses = [&]() {
2114 SingleBBBonusPercent = 0;
2115 VectorBonusPercent = 0;
2116 LastCallToStaticBonus = 0;
2121 if (
Caller->hasMinSize()) {
2127 SingleBBBonusPercent = 0;
2128 VectorBonusPercent = 0;
2129 }
else if (
Caller->hasOptSize())
2134 if (!
Caller->hasMinSize()) {
2135 if (
Callee.hasFnAttribute(Attribute::InlineHint))
2145 BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
2160 DisallowAllBonuses();
2165 if (PSI->isFunctionEntryHot(&Callee)) {
2171 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2177 DisallowAllBonuses();
2189 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2190 VectorBonus = Threshold * VectorBonusPercent / 100;
2195 if (isSoleCallToLocalFunction(
Call,
F)) {
2196 addCost(-LastCallToStaticBonus);
2197 StaticBonusApplied = LastCallToStaticBonus;
2201bool CallAnalyzer::visitCmpInst(CmpInst &
I) {
2208 if (simplifyCmpInstForRecCall(
I))
2211 if (
I.getOpcode() == Instruction::FCmp)
2216 Value *LHSBase, *RHSBase;
2217 APInt LHSOffset, RHSOffset;
2218 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
2220 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
2221 if (RHSBase && LHSBase == RHSBase) {
2227 ++NumConstantPtrCmps;
2232 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2233 for (
auto *User :
I.users())
2235 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2243 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2251 if (isImplicitNullCheckCmp(
I))
2257bool CallAnalyzer::visitSub(BinaryOperator &
I) {
2261 Value *LHSBase, *RHSBase;
2262 APInt LHSOffset, RHSOffset;
2263 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(
LHS);
2265 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(
RHS);
2266 if (RHSBase && LHSBase == RHSBase) {
2272 SimplifiedValues[&
I] =
C;
2273 ++NumConstantPtrDiffs;
2281 return Base::visitSub(
I);
2284bool CallAnalyzer::visitBinaryOperator(BinaryOperator &
I) {
2286 Constant *CLHS = getDirectOrSimplifiedValue<Constant>(
LHS);
2287 Constant *CRHS = getDirectOrSimplifiedValue<Constant>(
RHS);
2289 Value *SimpleV =
nullptr;
2292 FI->getFastMathFlags(),
DL);
2298 SimplifiedValues[&
I] =
C;
2310 using namespace llvm::PatternMatch;
2311 if (
I.getType()->isFloatingPointTy() &&
2319bool CallAnalyzer::visitFNeg(UnaryOperator &
I) {
2321 Constant *COp = getDirectOrSimplifiedValue<Constant>(
Op);
2327 SimplifiedValues[&
I] =
C;
2338bool CallAnalyzer::visitLoad(LoadInst &
I) {
2339 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2345 if (EnableLoadElimination &&
2346 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2347 onLoadEliminationOpportunity();
2355bool CallAnalyzer::visitStore(StoreInst &
I) {
2356 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2367 disableLoadElimination();
2373bool CallAnalyzer::visitExtractValue(ExtractValueInst &
I) {
2374 Value *
Op =
I.getAggregateOperand();
2378 if (
Value *SimpleOp = getSimplifiedValueUnchecked(
Op)) {
2379 SimplifyQuery SQ(
DL);
2382 SimplifiedValues[&
I] = SimpleV;
2388 return Base::visitExtractValue(
I);
2391bool CallAnalyzer::visitInsertValue(InsertValueInst &
I) {
2397 return Base::visitInsertValue(
I);
2406bool CallAnalyzer::simplifyCallSite(Function *
F, CallBase &
Call) {
2415 SmallVector<Constant *, 4> ConstantArgs;
2418 Constant *
C = getDirectOrSimplifiedValue<Constant>(
I);
2425 SimplifiedValues[&
Call] =
C;
2432bool CallAnalyzer::isLoweredToCall(Function *
F, CallBase &
Call) {
2433 const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*
F) : nullptr;
2439 case LibFunc_memcpy_chk:
2440 case LibFunc_memmove_chk:
2441 case LibFunc_mempcpy_chk:
2442 case LibFunc_memset_chk: {
2449 auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(
Call.
getOperand(2));
2452 if (LenOp && ObjSizeOp &&
2453 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2465bool CallAnalyzer::visitCallBase(CallBase &
Call) {
2466 if (!onCallBaseVisitStart(
Call))
2470 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2472 ExposesReturnsTwice =
true;
2476 ContainsNoDuplicateCall =
true;
2479 onInlineAsm(*InlineAsmOp);
2487 F = getSimplifiedValue<Function>(Callee);
2489 onCallArgumentSetup(
Call);
2492 disableLoadElimination();
2493 return Base::visitCallBase(
Call);
2497 assert(
F &&
"Expected a call to a known function");
2500 if (simplifyCallSite(
F,
Call))
2506 switch (
II->getIntrinsicID()) {
2509 disableLoadElimination();
2510 return Base::visitCallBase(
Call);
2512 case Intrinsic::load_relative:
2513 onLoadRelativeIntrinsic();
2516 case Intrinsic::memset:
2517 case Intrinsic::memcpy:
2518 case Intrinsic::memmove:
2519 disableLoadElimination();
2522 case Intrinsic::icall_branch_funnel:
2523 case Intrinsic::localescape:
2524 HasUninlineableIntrinsic =
true;
2526 case Intrinsic::vastart:
2527 InitsVargArgs =
true;
2529 case Intrinsic::launder_invariant_group:
2530 case Intrinsic::strip_invariant_group:
2531 if (
auto *SROAArg = getSROAArgForValueOrNull(
II->getOperand(0)))
2532 SROAArgValues[
II] = SROAArg;
2534 case Intrinsic::is_constant:
2535 return simplifyIntrinsicCallIsConstant(
Call);
2536 case Intrinsic::objectsize:
2537 return simplifyIntrinsicCallObjectSize(
Call);
2544 IsRecursiveCall =
true;
2545 if (!AllowRecursiveCall)
2549 if (isLoweredToCall(
F,
Call)) {
2554 disableLoadElimination();
2555 return Base::visitCallBase(
Call);
2558bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2560 bool Free = !HasReturn;
2565bool CallAnalyzer::visitUncondBrInst(UncondBrInst &BI) {
2572bool CallAnalyzer::visitCondBrInst(CondBrInst &BI) {
2574 return getDirectOrSimplifiedValue<ConstantInt>(BI.
getCondition()) ||
2575 BI.getMetadata(LLVMContext::MD_make_implicit);
2578bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2579 bool CheckSROA =
SI.getType()->isPointerTy();
2583 Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
2584 Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
2585 Constant *CondC = getSimplifiedValue<Constant>(
SI.getCondition());
2589 if (TrueC == FalseC && TrueC) {
2590 SimplifiedValues[&
SI] = TrueC;
2595 return Base::visitSelectInst(SI);
2597 std::pair<Value *, APInt> TrueBaseAndOffset =
2598 ConstantOffsetPtrs.
lookup(TrueVal);
2599 std::pair<Value *, APInt> FalseBaseAndOffset =
2600 ConstantOffsetPtrs.
lookup(FalseVal);
2601 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2602 ConstantOffsetPtrs[&
SI] = std::move(TrueBaseAndOffset);
2604 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2605 SROAArgValues[&
SI] = SROAArg;
2609 return Base::visitSelectInst(SI);
2620 if (TrueC && FalseC) {
2622 SimplifiedValues[&
SI] =
C;
2626 return Base::visitSelectInst(SI);
2631 SimplifiedValues[&
SI] = SelectedC;
2638 std::pair<Value *, APInt> BaseAndOffset =
2639 ConstantOffsetPtrs.
lookup(SelectedV);
2640 if (BaseAndOffset.first) {
2641 ConstantOffsetPtrs[&
SI] = std::move(BaseAndOffset);
2643 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2644 SROAArgValues[&
SI] = SROAArg;
2650bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2653 if (getDirectOrSimplifiedValue<ConstantInt>(
SI.getCondition()))
2668 unsigned JumpTableSize = 0;
2669 BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(
F)) : nullptr;
2670 unsigned NumCaseCluster =
2673 onFinalizeSwitch(JumpTableSize, NumCaseCluster,
SI.defaultDestUnreachable());
2677bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2686 HasIndirectBr =
true;
2690bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2696bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2702bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2708bool CallAnalyzer::visitUnreachableInst(UnreachableInst &
I) {
2715bool CallAnalyzer::visitInstruction(Instruction &
I) {
2724 for (
const Use &
Op :
I.operands())
2738CallAnalyzer::analyzeBlock(BasicBlock *BB,
2739 const SmallPtrSetImpl<const Value *> &EphValues) {
2740 for (Instruction &
I : *BB) {
2749 if (
I.isDebugOrPseudoInst())
2758 ++NumVectorInstructions;
2765 onInstructionAnalysisStart(&
I);
2767 if (Base::visit(&
I))
2768 ++NumInstructionsSimplified;
2770 onMissedSimplification();
2772 onInstructionAnalysisFinish(&
I);
2773 using namespace ore;
2776 if (IsRecursiveCall && !AllowRecursiveCall)
2778 else if (ExposesReturnsTwice)
2780 else if (HasDynamicAlloca)
2782 else if (HasIndirectBr)
2784 else if (HasUninlineableIntrinsic)
2786 else if (InitsVargArgs)
2788 if (!
IR.isSuccess()) {
2791 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NeverInline",
2793 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2794 <<
NV(
"InlineResult",
IR.getFailureReason())
2795 <<
") and cost is not fully computed";
2808 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NeverInline",
2810 <<
NV(
"Callee", &
F) <<
" is "
2811 <<
NV(
"InlineResult",
IR.getFailureReason())
2812 <<
". Cost is not fully computed";
2819 "Call site analysis is not favorable to inlining.");
2831ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2832 if (!
V->getType()->isPointerTy())
2835 unsigned AS =
V->getType()->getPointerAddressSpace();
2836 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2841 SmallPtrSet<Value *, 4> Visited;
2845 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2847 V =
GEP->getPointerOperand();
2849 if (GA->isInterposable())
2851 V = GA->getAliasee();
2855 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2856 }
while (Visited.
insert(V).second);
2858 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2869void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2873 if (DeadBlocks.
count(Pred))
2875 BasicBlock *KnownSucc = KnownSuccessors[Pred];
2876 return KnownSucc && KnownSucc != Succ;
2881 return (!DeadBlocks.
count(BB) &&
2883 [&](BasicBlock *
P) {
return IsEdgeDead(
P, BB); }));
2886 for (BasicBlock *Succ :
successors(CurrBB)) {
2887 if (Succ == NextBB || !IsNewlyDead(Succ))
2891 while (!NewDead.
empty()) {
2909InlineResult CallAnalyzer::analyze() {
2912 auto Result = onAnalysisStart();
2921 for (User *U :
Caller->users()) {
2924 IsCallerRecursive =
true;
2932 for (Argument &FAI :
F.args()) {
2934 SimplifiedValues[&FAI] = *CAI;
2938 Value *PtrArg = *CAI;
2939 if (ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2940 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2944 SROAArgValues[&FAI] = SROAArg;
2945 onInitializeSROAArg(SROAArg);
2946 EnabledSROAAllocas.
insert(SROAArg);
2951 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2952 NumAllocaArgs = SROAArgValues.
size();
2956 SmallPtrSet<const Value *, 32> EphValuesStorage;
2957 const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2958 if (GetEphValuesCache)
2959 EphValues = &GetEphValuesCache(
F).ephValues();
2971 typedef SmallSetVector<BasicBlock *, 16> BBSetVector;
2972 BBSetVector BBWorklist;
2973 BBWorklist.insert(&
F.getEntryBlock());
2976 for (
unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2999 InlineResult
IR = analyzeBlock(BB, *EphValues);
3000 if (!
IR.isSuccess())
3009 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3011 BBWorklist.insert(NextBB);
3012 KnownSuccessors[BB] = NextBB;
3013 findDeadBlocks(BB, NextBB);
3018 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(
Cond)) {
3019 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
3020 BBWorklist.insert(NextBB);
3021 KnownSuccessors[BB] = NextBB;
3022 findDeadBlocks(BB, NextBB);
3031 onBlockAnalyzed(BB);
3037 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
3047 FinalStackSizeThreshold = *AttrMaxStackSize;
3048 if (AllocatedSize > FinalStackSizeThreshold)
3051 return finalizeAnalysis();
3054void InlineCostCallAnalyzer::print(raw_ostream &OS) {
3055#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3057 F.print(OS, &Writer);
3072#undef DEBUG_PRINT_STAT
3075#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3089 auto CalleeTLI = GetTLI(*Callee);
3091 TTI.areInlineCompatible(Caller, Callee)) &&
3092 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
3094 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
3100 for (
unsigned I = 0, E =
Call.arg_size();
I != E; ++
I) {
3101 if (
Call.isByValArgument(
I)) {
3107 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
3109 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
3117 NumStores = std::min(NumStores, 8U);
3130 return std::min<int64_t>(
Cost, INT_MAX);
3141 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3162 InlineCostCallAnalyzer CA(*
Call.getCalledFunction(),
Call, Params, CalleeTTI,
3163 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
true,
3165 auto R = CA.analyze();
3167 return std::nullopt;
3168 return CA.getCost();
3177 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3178 PSI, ORE, *
Call.getCalledFunction(),
Call);
3179 auto R = CFA.analyze();
3181 return std::nullopt;
3182 return CFA.features();
3197 if (Callee->isPresplitCoroutine())
3205 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3206 for (
unsigned I = 0, E =
Call.arg_size();
I != E; ++
I)
3207 if (
Call.isByValArgument(
I)) {
3216 if (
Call.hasFnAttr(Attribute::AlwaysInline)) {
3217 if (
Call.getAttributes().hasFnAttr(Attribute::NoInline))
3221 if (IsViable.isSuccess())
3234 if (Caller->hasFnAttribute(Attribute::Flatten)) {
3236 if (IsViable.isSuccess())
3242 if (Caller->hasOptNone())
3247 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3251 if (Callee->isInterposable())
3255 if (Callee->hasFnAttribute(Attribute::NoInline))
3259 if (
Call.isNoInline())
3263 if (Callee->hasFnAttribute(
"loader-replaceable"))
3266 return std::nullopt;
3282 if (UserDecision->isSuccess())
3289 "Inlining forced by -inline-all-viable-calls");
3292 <<
"... (caller:" <<
Call.getCaller()->getName()
3295 InlineCostCallAnalyzer CA(*Callee,
Call, Params, CalleeTTI,
3296 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3306 if (CA.wasDecidedByCostBenefit()) {
3309 CA.getCostBenefitPair());
3314 if (CA.wasDecidedByCostThreshold())
3316 CA.getStaticBonusApplied());
3325 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3335 for (
auto &
II : BB) {
3352 switch (Callee->getIntrinsicID()) {
3355 case llvm::Intrinsic::icall_branch_funnel:
3359 "disallowed inlining of @llvm.icall.branch.funnel");
3360 case llvm::Intrinsic::localescape:
3364 "disallowed inlining of @llvm.localescape");
3365 case llvm::Intrinsic::vastart:
3369 "contains VarArgs initialized with va_start");
3440 unsigned SizeOptLevel) {
3443 if (SizeOptLevel == 1)
3445 if (SizeOptLevel == 2)
3488 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params,
TTI,
3489 GetAssumptionCache,
nullptr,
nullptr, PSI,
3492 OS <<
" Analyzing call of " << CalledFunction->
getName()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI)
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static bool IsIndirectCall(const MachineInstr *MI)
static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
Machine Check Debug Module
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PointerType * getType() const
Overload to return most specific pointer type.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getEntryFreq() const
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool onlyReadsMemory(unsigned OpNo) const
Value * getCalledOperand() const
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
LLVM_ABI bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
A cache of ephemeral values within a function.
Type * getReturnType() const
const BasicBlock & getEntryBlock() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
TypeSize getElementOffset(unsigned Idx) const
Analysis pass providing the TargetTransformInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getZero()
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
LLVM_ABI int getInstrCost()
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
generic_gep_type_iterator<> gep_type_iterator
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Function::ProfileCount ProfileCount
LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
DWARFExpression::Operation Op
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.