LLVM 19.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/InstrTypes.h"
26#include "llvm/IR/LLVMContext.h"
27#include "llvm/IR/MDBuilder.h"
28#include "llvm/IR/PassManager.h"
30#include "llvm/IR/Value.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/Error.h"
40#include <cassert>
41#include <cstdint>
42#include <memory>
43#include <string>
44#include <utility>
45#include <vector>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "pgo-icall-prom"
50
51STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
52STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
53
54// Command line option to disable indirect-call promotion with the default as
55// false. This is for debug purpose.
56static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
57 cl::desc("Disable indirect call promotion"));
58
59// Set the cutoff value for the promotion. If the value is other than 0, we
60// stop the transformation once the total number of promotions equals the cutoff
61// value.
62// For debug use only.
64 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
65 cl::desc("Max number of promotions for this compilation"));
66
67// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
68// For debug use only.
70 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
71 cl::desc("Skip Callsite up to this number for this compilation"));
72
73// Set if the pass is called in LTO optimization. The difference for LTO mode
74// is the pass won't prefix the source module name to the internal linkage
75// symbols.
76static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
77 cl::desc("Run indirect-call promotion in LTO "
78 "mode"));
79
80// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
81// mode is it will add prof metadatato the created direct call.
82static cl::opt<bool>
83 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
84 cl::desc("Run indirect-call promotion in SamplePGO mode"));
85
86// If the option is set to true, only call instructions will be considered for
87// transformation -- invoke instructions will be ignored.
88static cl::opt<bool>
89 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
90 cl::desc("Run indirect-call promotion for call instructions "
91 "only"));
92
93// If the option is set to true, only invoke instructions will be considered for
94// transformation -- call instructions will be ignored.
95static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
97 cl::desc("Run indirect-call promotion for "
98 "invoke instruction only"));
99
100// Dump the function level IR if the transformation happened in this
101// function. For debug use only.
102static cl::opt<bool>
103 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
104 cl::desc("Dump IR after transformation happens"));
105
106namespace {
107
108// Promote indirect calls to conditional direct calls, keeping track of
109// thresholds.
110class IndirectCallPromoter {
111private:
112 Function &F;
113
114 // Symtab that maps indirect call profile values to function names and
115 // defines.
116 InstrProfSymtab *const Symtab;
117
118 const bool SamplePGO;
119
121
122 // A struct that records the direct target and it's call count.
123 struct PromotionCandidate {
124 Function *const TargetFunction;
125 const uint64_t Count;
126
127 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
128 };
129
130 // Check if the indirect-call call site should be promoted. Return the number
131 // of promotions. Inst is the candidate indirect call, ValueDataRef
132 // contains the array of value profile data for profiled targets,
133 // TotalCount is the total profiled count of call executions, and
134 // NumCandidates is the number of candidate entries in ValueDataRef.
135 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
136 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
137 uint64_t TotalCount, uint32_t NumCandidates);
138
139 // Promote a list of targets for one indirect-call callsite by comparing
140 // indirect callee with functions. Returns true if there are IR
141 // transformations and false otherwise.
142 bool tryToPromoteWithFuncCmp(CallBase &CB,
144 uint64_t TotalCount,
145 ArrayRef<InstrProfValueData> ICallProfDataRef,
146 uint32_t NumCandidates);
147
148public:
149 IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
151 : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
152 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
153 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
154
155 bool processFunction(ProfileSummaryInfo *PSI);
156};
157
158} // end anonymous namespace
159
160// Indirect-call promotion heuristic. The direct targets are sorted based on
161// the count. Stop at the first target that is not promoted.
162std::vector<IndirectCallPromoter::PromotionCandidate>
163IndirectCallPromoter::getPromotionCandidatesForCallSite(
164 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
165 uint64_t TotalCount, uint32_t NumCandidates) {
166 std::vector<PromotionCandidate> Ret;
167
168 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
169 << " Num_targets: " << ValueDataRef.size()
170 << " Num_candidates: " << NumCandidates << "\n");
171 NumOfPGOICallsites++;
172 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
173 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
174 return Ret;
175 }
176
177 for (uint32_t I = 0; I < NumCandidates; I++) {
178 uint64_t Count = ValueDataRef[I].Count;
179 assert(Count <= TotalCount);
180 (void)TotalCount;
181 uint64_t Target = ValueDataRef[I].Value;
182 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
183 << " Target_func: " << Target << "\n");
184
185 if (ICPInvokeOnly && isa<CallInst>(CB)) {
186 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
187 ORE.emit([&]() {
188 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
189 << " Not promote: User options";
190 });
191 break;
192 }
193 if (ICPCallOnly && isa<InvokeInst>(CB)) {
194 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
195 ORE.emit([&]() {
196 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
197 << " Not promote: User options";
198 });
199 break;
200 }
201 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
202 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
203 ORE.emit([&]() {
204 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
205 << " Not promote: Cutoff reached";
206 });
207 break;
208 }
209
210 // Don't promote if the symbol is not defined in the module. This avoids
211 // creating a reference to a symbol that doesn't exist in the module
212 // This can happen when we compile with a sample profile collected from
213 // one binary but used for another, which may have profiled targets that
214 // aren't used in the new binary. We might have a declaration initially in
215 // the case where the symbol is globally dead in the binary and removed by
216 // ThinLTO.
217 Function *TargetFunction = Symtab->getFunction(Target);
218 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
219 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
220 ORE.emit([&]() {
221 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
222 << "Cannot promote indirect call: target with md5sum "
223 << ore::NV("target md5sum", Target) << " not found";
224 });
225 break;
226 }
227
228 const char *Reason = nullptr;
229 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
230 using namespace ore;
231
232 ORE.emit([&]() {
233 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
234 << "Cannot promote indirect call to "
235 << NV("TargetFunction", TargetFunction) << " with count of "
236 << NV("Count", Count) << ": " << Reason;
237 });
238 break;
239 }
240
241 Ret.push_back(PromotionCandidate(TargetFunction, Count));
242 TotalCount -= Count;
243 }
244 return Ret;
245}
246
248 uint64_t Count, uint64_t TotalCount,
249 bool AttachProfToDirectCall,
251
252 uint64_t ElseCount = TotalCount - Count;
253 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
254 uint64_t Scale = calculateCountScale(MaxCount);
255 MDBuilder MDB(CB.getContext());
256 MDNode *BranchWeights = MDB.createBranchWeights(
257 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
258
259 CallBase &NewInst =
260 promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
261
262 if (AttachProfToDirectCall) {
263 setBranchWeights(NewInst, {static_cast<uint32_t>(Count)},
264 /*IsExpected=*/false);
265 }
266
267 using namespace ore;
268
269 if (ORE)
270 ORE->emit([&]() {
271 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
272 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
273 << " with count " << NV("Count", Count) << " out of "
274 << NV("TotalCount", TotalCount);
275 });
276 return NewInst;
277}
278
279// Promote indirect-call to conditional direct-call for one callsite.
280bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
281 CallBase &CB, ArrayRef<PromotionCandidate> Candidates, uint64_t TotalCount,
282 ArrayRef<InstrProfValueData> ICallProfDataRef, uint32_t NumCandidates) {
283 uint32_t NumPromoted = 0;
284
285 for (const auto &C : Candidates) {
286 uint64_t Count = C.Count;
287 pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
288 &ORE);
289 assert(TotalCount >= Count);
290 TotalCount -= Count;
291 NumOfPGOICallPromotion++;
292 NumPromoted++;
293 }
294
295 if (NumPromoted == 0)
296 return false;
297
298 // Adjust the MD.prof metadata. First delete the old one.
299 CB.setMetadata(LLVMContext::MD_prof, nullptr);
300
301 assert(NumPromoted <= ICallProfDataRef.size() &&
302 "Number of promoted functions should not be greater than the number "
303 "of values in profile metadata");
304 // Annotate the remaining value profiles if counter is not zero.
305 if (TotalCount != 0)
306 annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
307 TotalCount, IPVK_IndirectCallTarget, NumCandidates);
308
309 return true;
310}
311
312// Traverse all the indirect-call callsite and get the value profile
313// annotation to perform indirect-call promotion.
314bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
315 bool Changed = false;
316 ICallPromotionAnalysis ICallAnalysis;
317 for (auto *CB : findIndirectCalls(F)) {
318 uint32_t NumCandidates;
319 uint64_t TotalCount;
320 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
321 CB, TotalCount, NumCandidates);
322 if (!NumCandidates ||
323 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
324 continue;
325 auto PromotionCandidates = getPromotionCandidatesForCallSite(
326 *CB, ICallProfDataRef, TotalCount, NumCandidates);
327 Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
328 ICallProfDataRef, NumCandidates);
329 }
330 return Changed;
331}
332
333// A wrapper function that does the actual work.
334static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
335 bool SamplePGO, ModuleAnalysisManager &MAM) {
336 if (DisableICP)
337 return false;
338 InstrProfSymtab Symtab;
339 if (Error E = Symtab.create(M, InLTO)) {
340 std::string SymtabFailure = toString(std::move(E));
341 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
342 return false;
343 }
344 bool Changed = false;
345 for (auto &F : M) {
346 if (F.isDeclaration() || F.hasOptNone())
347 continue;
348
349 auto &FAM =
352
353 IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
354 bool FuncChanged = CallPromoter.processFunction(PSI);
355 if (ICPDUMPAFTER && FuncChanged) {
356 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
357 LLVM_DEBUG(dbgs() << "\n");
358 }
359 Changed |= FuncChanged;
360 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
361 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
362 break;
363 }
364 }
365 return Changed;
366}
367
371
372 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
373 SamplePGO | ICPSamplePGOMode, MAM))
374 return PreservedAnalyses::all();
375
377}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
const Function & getFunction() const
Definition: Function.h:163
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
ArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition: InstrProf.h:450
Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
Target - Wrapper for Target specific information.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1273
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.