LLVM 19.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/IR/MDBuilder.h"
28#include "llvm/IR/PassManager.h"
30#include "llvm/IR/Value.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/Error.h"
40#include <cassert>
41#include <cstdint>
42#include <memory>
43#include <string>
44#include <utility>
45#include <vector>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "pgo-icall-prom"
50
51STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
52STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
53
54// Command line option to disable indirect-call promotion with the default as
55// false. This is for debug purpose.
56static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
57 cl::desc("Disable indirect call promotion"));
58
59// Set the cutoff value for the promotion. If the value is other than 0, we
60// stop the transformation once the total number of promotions equals the cutoff
61// value.
62// For debug use only.
64 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
65 cl::desc("Max number of promotions for this compilation"));
66
67// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
68// For debug use only.
70 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
71 cl::desc("Skip Callsite up to this number for this compilation"));
72
73// Set if the pass is called in LTO optimization. The difference for LTO mode
74// is the pass won't prefix the source module name to the internal linkage
75// symbols.
76static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
77 cl::desc("Run indirect-call promotion in LTO "
78 "mode"));
79
80// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
81// mode is it will add prof metadatato the created direct call.
82static cl::opt<bool>
83 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
84 cl::desc("Run indirect-call promotion in SamplePGO mode"));
85
86// If the option is set to true, only call instructions will be considered for
87// transformation -- invoke instructions will be ignored.
88static cl::opt<bool>
89 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
90 cl::desc("Run indirect-call promotion for call instructions "
91 "only"));
92
93// If the option is set to true, only invoke instructions will be considered for
94// transformation -- call instructions will be ignored.
95static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
97 cl::desc("Run indirect-call promotion for "
98 "invoke instruction only"));
99
100// Dump the function level IR if the transformation happened in this
101// function. For debug use only.
102static cl::opt<bool>
103 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
104 cl::desc("Dump IR after transformation happens"));
105
106namespace {
107
108// Promote indirect calls to conditional direct calls, keeping track of
109// thresholds.
110class IndirectCallPromoter {
111private:
112 Function &F;
113
114 // Symtab that maps indirect call profile values to function names and
115 // defines.
116 InstrProfSymtab *const Symtab;
117
118 const bool SamplePGO;
119
121
122 // A struct that records the direct target and it's call count.
123 struct PromotionCandidate {
124 Function *const TargetFunction;
125 const uint64_t Count;
126
127 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
128 };
129
130 // Check if the indirect-call call site should be promoted. Return the number
131 // of promotions. Inst is the candidate indirect call, ValueDataRef
132 // contains the array of value profile data for profiled targets,
133 // TotalCount is the total profiled count of call executions, and
134 // NumCandidates is the number of candidate entries in ValueDataRef.
135 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
136 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
137 uint64_t TotalCount, uint32_t NumCandidates);
138
139 // Promote a list of targets for one indirect-call callsite. Return
140 // the number of promotions.
141 uint32_t tryToPromote(CallBase &CB,
142 const std::vector<PromotionCandidate> &Candidates,
143 uint64_t &TotalCount);
144
145public:
146 IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
148 : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
149 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
150 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
151
152 bool processFunction(ProfileSummaryInfo *PSI);
153};
154
155} // end anonymous namespace
156
157// Indirect-call promotion heuristic. The direct targets are sorted based on
158// the count. Stop at the first target that is not promoted.
159std::vector<IndirectCallPromoter::PromotionCandidate>
160IndirectCallPromoter::getPromotionCandidatesForCallSite(
161 const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
162 uint64_t TotalCount, uint32_t NumCandidates) {
163 std::vector<PromotionCandidate> Ret;
164
165 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
166 << " Num_targets: " << ValueDataRef.size()
167 << " Num_candidates: " << NumCandidates << "\n");
168 NumOfPGOICallsites++;
169 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
170 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
171 return Ret;
172 }
173
174 for (uint32_t I = 0; I < NumCandidates; I++) {
175 uint64_t Count = ValueDataRef[I].Count;
176 assert(Count <= TotalCount);
177 (void)TotalCount;
178 uint64_t Target = ValueDataRef[I].Value;
179 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
180 << " Target_func: " << Target << "\n");
181
182 if (ICPInvokeOnly && isa<CallInst>(CB)) {
183 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
184 ORE.emit([&]() {
185 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
186 << " Not promote: User options";
187 });
188 break;
189 }
190 if (ICPCallOnly && isa<InvokeInst>(CB)) {
191 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
192 ORE.emit([&]() {
193 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
194 << " Not promote: User options";
195 });
196 break;
197 }
198 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
199 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
200 ORE.emit([&]() {
201 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
202 << " Not promote: Cutoff reached";
203 });
204 break;
205 }
206
207 // Don't promote if the symbol is not defined in the module. This avoids
208 // creating a reference to a symbol that doesn't exist in the module
209 // This can happen when we compile with a sample profile collected from
210 // one binary but used for another, which may have profiled targets that
211 // aren't used in the new binary. We might have a declaration initially in
212 // the case where the symbol is globally dead in the binary and removed by
213 // ThinLTO.
214 Function *TargetFunction = Symtab->getFunction(Target);
215 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
216 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
217 ORE.emit([&]() {
218 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
219 << "Cannot promote indirect call: target with md5sum "
220 << ore::NV("target md5sum", Target) << " not found";
221 });
222 break;
223 }
224
225 const char *Reason = nullptr;
226 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
227 using namespace ore;
228
229 ORE.emit([&]() {
230 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
231 << "Cannot promote indirect call to "
232 << NV("TargetFunction", TargetFunction) << " with count of "
233 << NV("Count", Count) << ": " << Reason;
234 });
235 break;
236 }
237
238 Ret.push_back(PromotionCandidate(TargetFunction, Count));
239 TotalCount -= Count;
240 }
241 return Ret;
242}
243
245 uint64_t Count, uint64_t TotalCount,
246 bool AttachProfToDirectCall,
248
249 uint64_t ElseCount = TotalCount - Count;
250 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
251 uint64_t Scale = calculateCountScale(MaxCount);
252 MDBuilder MDB(CB.getContext());
253 MDNode *BranchWeights = MDB.createBranchWeights(
254 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
255
256 CallBase &NewInst =
257 promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
258
259 if (AttachProfToDirectCall) {
260 setBranchWeights(NewInst, {static_cast<uint32_t>(Count)});
261 }
262
263 using namespace ore;
264
265 if (ORE)
266 ORE->emit([&]() {
267 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
268 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
269 << " with count " << NV("Count", Count) << " out of "
270 << NV("TotalCount", TotalCount);
271 });
272 return NewInst;
273}
274
275// Promote indirect-call to conditional direct-call for one callsite.
276uint32_t IndirectCallPromoter::tryToPromote(
277 CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
278 uint64_t &TotalCount) {
279 uint32_t NumPromoted = 0;
280
281 for (const auto &C : Candidates) {
282 uint64_t Count = C.Count;
283 pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
284 &ORE);
285 assert(TotalCount >= Count);
286 TotalCount -= Count;
287 NumOfPGOICallPromotion++;
288 NumPromoted++;
289 }
290 return NumPromoted;
291}
292
293// Traverse all the indirect-call callsite and get the value profile
294// annotation to perform indirect-call promotion.
295bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
296 bool Changed = false;
297 ICallPromotionAnalysis ICallAnalysis;
298 for (auto *CB : findIndirectCalls(F)) {
299 uint32_t NumVals, NumCandidates;
300 uint64_t TotalCount;
301 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
302 CB, NumVals, TotalCount, NumCandidates);
303 if (!NumCandidates ||
304 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
305 continue;
306 auto PromotionCandidates = getPromotionCandidatesForCallSite(
307 *CB, ICallProfDataRef, TotalCount, NumCandidates);
308 uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount);
309 if (NumPromoted == 0)
310 continue;
311
312 Changed = true;
313 // Adjust the MD.prof metadata. First delete the old one.
314 CB->setMetadata(LLVMContext::MD_prof, nullptr);
315 // If all promoted, we don't need the MD.prof metadata.
316 if (TotalCount == 0 || NumPromoted == NumVals)
317 continue;
318 // Otherwise we need update with the un-promoted records back.
319 annotateValueSite(*F.getParent(), *CB, ICallProfDataRef.slice(NumPromoted),
320 TotalCount, IPVK_IndirectCallTarget, NumCandidates);
321 }
322 return Changed;
323}
324
325// A wrapper function that does the actual work.
326static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
327 bool SamplePGO, ModuleAnalysisManager &MAM) {
328 if (DisableICP)
329 return false;
330 InstrProfSymtab Symtab;
331 if (Error E = Symtab.create(M, InLTO)) {
332 std::string SymtabFailure = toString(std::move(E));
333 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
334 return false;
335 }
336 bool Changed = false;
337 for (auto &F : M) {
338 if (F.isDeclaration() || F.hasOptNone())
339 continue;
340
341 auto &FAM =
344
345 IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
346 bool FuncChanged = CallPromoter.processFunction(PSI);
347 if (ICPDUMPAFTER && FuncChanged) {
348 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
349 LLVM_DEBUG(dbgs() << "\n");
350 }
351 Changed |= FuncChanged;
352 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
353 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
354 break;
355 }
356 }
357 return Changed;
358}
359
363
364 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
365 SamplePGO | ICPSamplePGOMode, MAM))
366 return PreservedAnalyses::all();
367
369}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1461
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
const Function & getFunction() const
Definition: Function.h:160
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:274
ArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:658
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition: InstrProf.h:429
Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Target - Wrapper for Target specific information.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.