LLVM 23.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/PassManager.h"
33#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/Error.h"
43#include <cassert>
44#include <cstdint>
45#include <set>
46#include <string>
47#include <utility>
48#include <vector>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "pgo-icall-prom"
53
54STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
55STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
56
57namespace llvm {
59
61} // namespace llvm
62
63// Command line option to disable indirect-call promotion with the default as
64// false. This is for debug purpose.
65static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
66 cl::desc("Disable indirect call promotion"));
67
68// Set the cutoff value for the promotion. If the value is other than 0, we
69// stop the transformation once the total number of promotions equals the cutoff
70// value.
71// For debug use only.
73 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
74 cl::desc("Max number of promotions for this compilation"));
75
76// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
77// For debug use only.
79 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
80 cl::desc("Skip Callsite up to this number for this compilation"));
81
82// ICP the candidate function even when only a declaration is present.
84 "icp-allow-decls", cl::init(false), cl::Hidden,
85 cl::desc("Promote the target candidate even when the definition "
86 " is not available"));
87
88// ICP hot candidate functions only. When setting to false, non-cold functions
89// (warm functions) can also be promoted.
90static cl::opt<bool>
91 ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
92 cl::desc("Promote the target candidate only if it is a "
93 "hot function. Otherwise, warm functions can "
94 "also be promoted"));
95
96// If one target cannot be ICP'd, proceed with the remaining targets instead
97// of exiting the callsite.
99 "icp-allow-candidate-skip", cl::init(false), cl::Hidden,
100 cl::desc("Continue with the remaining targets instead of exiting "
101 "when failing in a candidate"));
102
103// Set if the pass is called in LTO optimization. The difference for LTO mode
104// is the pass won't prefix the source module name to the internal linkage
105// symbols.
106static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
107 cl::desc("Run indirect-call promotion in LTO "
108 "mode"));
109
110// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
111// mode is it will add prof metadatato the created direct call.
112static cl::opt<bool>
113 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
114 cl::desc("Run indirect-call promotion in SamplePGO mode"));
115
116// If the option is set to true, only call instructions will be considered for
117// transformation -- invoke instructions will be ignored.
118static cl::opt<bool>
119 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
120 cl::desc("Run indirect-call promotion for call instructions "
121 "only"));
122
123// If the option is set to true, only invoke instructions will be considered for
124// transformation -- call instructions will be ignored.
125static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
127 cl::desc("Run indirect-call promotion for "
128 "invoke instruction only"));
129
130// Dump the function level IR if the transformation happened in this
131// function. For debug use only.
132static cl::opt<bool>
133 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
134 cl::desc("Dump IR after transformation happens"));
135
136// Indirect call promotion pass will fall back to function-based comparison if
137// vtable-count / function-count is smaller than this threshold.
139 "icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden,
140 cl::desc("The percentage threshold of vtable-count / function-count for "
141 "cost-benefit analysis."));
142
143// Although comparing vtables can save a vtable load, we may need to compare
144// vtable pointer with multiple vtable address points due to class inheritance.
145// Comparing with multiple vtables inserts additional instructions on hot code
146// path, and doing so for an earlier candidate delays the comparisons for later
147// candidates. For the last candidate, only the fallback path is affected.
148// We allow multiple vtable comparison for the last function candidate and use
149// the option below to cap the number of vtables.
151 "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
152 cl::desc("The maximum number of vtable for the last candidate."));
153
155 "icp-ignored-base-types", cl::Hidden,
156 cl::desc(
157 "A list of mangled vtable type info names. Classes specified by the "
158 "type info names and their derived ones will not be vtable-ICP'ed. "
159 "Useful when the profiled types and actual types in the optimized "
160 "binary could be different due to profiling limitations. Type info "
161 "names are those string literals used in LLVM type metadata"));
162
163namespace {
164
165// The key is a vtable global variable, and the value is a map.
166// In the inner map, the key represents address point offsets and the value is a
167// constant for this address point.
168using VTableAddressPointOffsetValMap =
170
171// A struct to collect type information for a virtual call site.
172struct VirtualCallSiteInfo {
173 // The offset from the address point to virtual function in the vtable.
174 uint64_t FunctionOffset;
175 // The instruction that computes the address point of vtable.
176 Instruction *VPtr;
177 // The compatible type used in LLVM type intrinsics.
178 StringRef CompatibleTypeStr;
179};
180
181// The key is a virtual call, and value is its type information.
182using VirtualCallSiteTypeInfoMap =
184
185// The key is vtable GUID, and value is its value profile count.
186using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
187
188// Return the address point offset of the given compatible type.
189//
190// Type metadata of a vtable specifies the types that can contain a pointer to
191// this vtable, for example, `Base*` can be a pointer to an derived type
192// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
193static std::optional<uint64_t>
194getAddressPointOffset(const GlobalVariable &VTableVar,
195 StringRef CompatibleType) {
197 VTableVar.getMetadata(LLVMContext::MD_type, Types);
198
199 for (MDNode *Type : Types)
200 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
201 TypeId && TypeId->getString() == CompatibleType)
202 return cast<ConstantInt>(
203 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
204 ->getZExtValue();
205
206 return std::nullopt;
207}
208
209// Return a constant representing the vtable's address point specified by the
210// offset.
211static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
212 uint32_t AddressPointOffset) {
213 Module &M = *VTable->getParent();
214 LLVMContext &Context = M.getContext();
215 assert(AddressPointOffset < VTable->getGlobalSize(M.getDataLayout()) &&
216 "Out-of-bound access");
217
219 VTable,
220 llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
221}
222
223// Return the basic block in which Use `U` is used via its `UserInst`.
224static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
225 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
226 return PN->getIncomingBlock(U);
227
228 return UserInst->getParent();
229}
230
231// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
232// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
233// is the sole predecessor of `DestBB` and `DestBB` is dominated by
234// `Inst->getParent()`.
235static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
236 // 'BB' is used only by assert.
237 [[maybe_unused]] BasicBlock *BB = Inst->getParent();
238
239 assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
240 DestBB->getUniquePredecessor() == BB &&
241 "Guaranteed by ICP transformation");
242
243 BasicBlock *UserBB = nullptr;
244 for (Use &Use : Inst->uses()) {
245 User *User = Use.getUser();
246 // Do checked cast since IR verifier guarantees that the user of an
247 // instruction must be an instruction. See `Verifier::visitInstruction`.
249 // We can sink debug or pseudo instructions together with Inst.
250 if (UserInst->isDebugOrPseudoInst())
251 continue;
252 UserBB = getUserBasicBlock(Use, UserInst);
253 // Do not sink if Inst is used in a basic block that is not DestBB.
254 // TODO: Sink to the common dominator of all user blocks.
255 if (UserBB != DestBB)
256 return false;
257 }
258 return UserBB != nullptr;
259}
260
261// For the virtual call dispatch sequence, try to sink vtable load instructions
262// to the cold indirect call fallback.
263// FIXME: Move the sink eligibility check below to a utility function in
264// Transforms/Utils/ directory.
265static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
266 if (!isDestBBSuitableForSink(I, DestBlock))
267 return false;
268
269 // Do not move control-flow-involving, volatile loads, vaarg, alloca
270 // instructions, etc.
271 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
273 return false;
274
275 // Do not sink convergent call instructions.
276 if (const auto *C = dyn_cast<CallBase>(I))
277 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
278 return false;
279
280 // Do not move an instruction that may write to memory.
281 if (I->mayWriteToMemory())
282 return false;
283
284 // We can only sink load instructions if there is nothing between the load and
285 // the end of block that could change the value.
286 if (I->mayReadFromMemory()) {
287 // We already know that SrcBlock is the unique predecessor of DestBlock.
288 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
289 E = I->getParent()->end();
290 Scan != E; ++Scan) {
291 // Note analysis analysis can tell whether two pointers can point to the
292 // same object in memory or not thereby find further opportunities to
293 // sink.
294 if (Scan->mayWriteToMemory())
295 return false;
296 }
297 }
298
299 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
300 I->moveBefore(*DestBlock, InsertPos);
301
302 // TODO: Sink debug intrinsic users of I to 'DestBlock'.
303 // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
304 // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
305 // the core logic to do this.
306 return true;
307}
308
309// Try to sink instructions after VPtr to the indirect call fallback.
310// Return the number of sunk IR instructions.
311static int tryToSinkInstructions(BasicBlock *OriginalBB,
312 BasicBlock *IndirectCallBB) {
313 int SinkCount = 0;
314 // Do not sink across a critical edge for simplicity.
315 if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
316 return SinkCount;
317 // Sink all eligible instructions in OriginalBB in reverse order.
318 for (Instruction &I :
320 if (tryToSinkInstruction(&I, IndirectCallBB))
321 SinkCount++;
322
323 return SinkCount;
324}
325
326// Promote indirect calls to conditional direct calls, keeping track of
327// thresholds.
328class IndirectCallPromoter {
329private:
330 Function &F;
331 Module &M;
332
333 // Symtab that maps indirect call profile values to function names and
334 // defines.
335 InstrProfSymtab *const Symtab;
336
337 const bool SamplePGO;
338
339 // A map from a virtual call to its type information.
340 const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
341
342 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
343
344 OptimizationRemarkEmitter &ORE;
345
346 const DenseSet<StringRef> &IgnoredBaseTypes;
347
348 // A struct that records the direct target and it's call count.
349 struct PromotionCandidate {
350 Function *const TargetFunction;
351 const uint64_t Count;
352 const uint32_t Index;
353
354 // The following fields only exists for promotion candidates with vtable
355 // information.
356 //
357 // Due to class inheritance, one virtual call candidate can come from
358 // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
359 // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
360 // points for comparison.
361 VTableGUIDCountsMap VTableGUIDAndCounts;
362 SmallVector<Constant *> AddressPoints;
363
364 PromotionCandidate(Function *F, uint64_t C, uint32_t I)
365 : TargetFunction(F), Count(C), Index(I) {}
366 };
367
368 // Check if the indirect-call call site should be promoted. Return the number
369 // of promotions. Inst is the candidate indirect call, ValueDataRef
370 // contains the array of value profile data for profiled targets,
371 // TotalCount is the total profiled count of call executions, and
372 // NumCandidates is the number of candidate entries in ValueDataRef.
373 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
374 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
375 uint64_t TotalCount, uint32_t NumCandidates);
376
377 // Promote a list of targets for one indirect-call callsite by comparing
378 // indirect callee with functions. Return true if there are IR
379 // transformations and false otherwise.
380 bool tryToPromoteWithFuncCmp(
381 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
382 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
383 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
384
385 // Promote a list of targets for one indirect call by comparing vtables with
386 // functions. Return true if there are IR transformations and false
387 // otherwise.
388 bool tryToPromoteWithVTableCmp(
389 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
390 uint64_t TotalFuncCount, uint32_t NumCandidates,
392 VTableGUIDCountsMap &VTableGUIDCounts);
393
394 // Return true if it's profitable to compare vtables for the callsite.
395 bool isProfitableToCompareVTables(const CallBase &CB,
397
398 // Return true if the vtable corresponding to VTableGUID should be skipped
399 // for vtable-based comparison.
400 bool shouldSkipVTable(uint64_t VTableGUID);
401
402 // Given an indirect callsite and the list of function candidates, compute
403 // the following vtable information in output parameters and return vtable
404 // pointer if type profiles exist.
405 // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
406 // metadata attached on the vtable pointer.
407 // - For each function candidate, finds out the vtables from which it gets
408 // called and stores the <vtable-guid, count> in promotion candidate.
409 Instruction *computeVTableInfos(const CallBase *CB,
410 VTableGUIDCountsMap &VTableGUIDCounts,
411 std::vector<PromotionCandidate> &Candidates);
412
413 Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
414 uint64_t AddressPointOffset);
415
416 void updateFuncValueProfiles(CallBase &CB,
418 uint64_t Sum, uint32_t MaxMDCount);
419
420 void updateVPtrValueProfiles(Instruction *VPtr,
421 VTableGUIDCountsMap &VTableGUIDCounts);
422
423 bool isValidTarget(uint64_t, Function *, const CallBase &, uint64_t);
424
425public:
426 IndirectCallPromoter(
427 Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
428 const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
429 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
430 const DenseSet<StringRef> &IgnoredBaseTypes,
431 OptimizationRemarkEmitter &ORE)
432 : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
433 VirtualCSInfo(VirtualCSInfo),
434 VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
435 IgnoredBaseTypes(IgnoredBaseTypes) {}
436 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
437 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
438
439 bool processFunction(ProfileSummaryInfo *PSI);
440};
441
442} // end anonymous namespace
443
444bool IndirectCallPromoter::isValidTarget(uint64_t Target,
445 Function *TargetFunction,
446 const CallBase &CB, uint64_t Count) {
447 // Don't promote if the symbol is not defined in the module. This avoids
448 // creating a reference to a symbol that doesn't exist in the module
449 // This can happen when we compile with a sample profile collected from
450 // one binary but used for another, which may have profiled targets that
451 // aren't used in the new binary. We might have a declaration initially in
452 // the case where the symbol is globally dead in the binary and removed by
453 // ThinLTO.
454 using namespace ore;
455 if (TargetFunction == nullptr) {
456 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
457 ORE.emit([&]() {
458 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
459 << "Cannot promote indirect call: target with md5sum "
460 << NV("target md5sum", Target)
461 << " not found (count=" << NV("Count", Count) << ")";
462 });
463 return false;
464 }
465 if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
466 LLVM_DEBUG(dbgs() << " Not promote: target definition is not available\n");
467 ORE.emit([&]() {
468 return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
469 << "Do not promote indirect call: target with md5sum "
470 << NV("target md5sum", Target)
471 << " definition not available (count=" << ore::NV("Count", Count)
472 << ")";
473 });
474 return false;
475 }
476
477 const char *Reason = nullptr;
478 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
479
480 ORE.emit([&]() {
481 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
482 << "Cannot promote indirect call to "
483 << NV("TargetFunction", TargetFunction)
484 << " (count=" << NV("Count", Count) << "): " << Reason;
485 });
486 return false;
487 }
488 return true;
489}
490
491// Indirect-call promotion heuristic. The direct targets are sorted based on
492// the count. Stop at the first target that is not promoted.
493std::vector<IndirectCallPromoter::PromotionCandidate>
494IndirectCallPromoter::getPromotionCandidatesForCallSite(
495 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
496 uint64_t TotalCount, uint32_t NumCandidates) {
497 std::vector<PromotionCandidate> Ret;
498
499 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
500 << " Num_targets: " << ValueDataRef.size()
501 << " Num_candidates: " << NumCandidates << "\n");
502 NumOfPGOICallsites++;
503 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
504 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
505 return Ret;
506 }
507
508 for (uint32_t I = 0; I < NumCandidates; I++) {
509 uint64_t Count = ValueDataRef[I].Count;
510 assert(Count <= TotalCount);
511 (void)TotalCount;
512 uint64_t Target = ValueDataRef[I].Value;
513 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
514 << " Target_func: " << Target << "\n");
515
516 if (ICPInvokeOnly && isa<CallInst>(CB)) {
517 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
518 ORE.emit([&]() {
519 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
520 << " Not promote: User options";
521 });
522 break;
523 }
524 if (ICPCallOnly && isa<InvokeInst>(CB)) {
525 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
526 ORE.emit([&]() {
527 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
528 << " Not promote: User options";
529 });
530 break;
531 }
532 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
533 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
534 ORE.emit([&]() {
535 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
536 << " Not promote: Cutoff reached";
537 });
538 break;
539 }
540
541 Function *TargetFunction = Symtab->getFunction(Target);
542 if (!isValidTarget(Target, TargetFunction, CB, Count)) {
544 continue;
545 else
546 break;
547 }
548
549 Ret.push_back(PromotionCandidate(TargetFunction, Count, I));
550 TotalCount -= Count;
551 }
552 return Ret;
553}
554
555Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
556 GlobalVariable *GV, uint64_t AddressPointOffset) {
557 auto [Iter, Inserted] =
558 VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
559 if (Inserted)
560 Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
561 return Iter->second;
562}
563
564Instruction *IndirectCallPromoter::computeVTableInfos(
565 const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
566 std::vector<PromotionCandidate> &Candidates) {
568 return nullptr;
569
570 // Take the following code sequence as an example, here is how the code works
571 // @vtable1 = {[n x ptr] [... ptr @func1]}
572 // @vtable2 = {[m x ptr] [... ptr @func2]}
573 //
574 // %vptr = load ptr, ptr %d, !prof !0
575 // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
576 // tail call void @llvm.assume(i1 %0)
577 // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
578 // %1 = load ptr, ptr %vfn
579 // call void %1(ptr %d), !prof !1
580 //
581 // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
582 // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
583 //
584 // Step 1. Find out the %vptr instruction for indirect call and use its !prof
585 // to populate `GUIDCountsMap`.
586 // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
587 // make vtable definitions visible across modules.
588 // Step 3. Compute the byte offset of the virtual call, by adding vtable
589 // address point offset and function's offset relative to vtable address
590 // point. For each function candidate, this step tells us the vtable from
591 // which it comes from, and the vtable address point to compare %vptr with.
592
593 // Only virtual calls have virtual call site info.
594 auto Iter = VirtualCSInfo.find(CB);
595 if (Iter == VirtualCSInfo.end())
596 return nullptr;
597
598 LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
599 << NumOfPGOICallsites << "\n");
600
601 const auto &VirtualCallInfo = Iter->second;
602 Instruction *VPtr = VirtualCallInfo.VPtr;
603
604 SmallDenseMap<Function *, int, 4> CalleeIndexMap;
605 for (size_t I = 0; I < Candidates.size(); I++)
606 CalleeIndexMap[Candidates[I].TargetFunction] = I;
607
608 uint64_t TotalVTableCount = 0;
609 auto VTableValueDataArray =
610 getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget,
611 MaxNumVTableAnnotations, TotalVTableCount);
612 if (VTableValueDataArray.empty())
613 return VPtr;
614
615 // Compute the functions and counts from by each vtable.
616 for (const auto &V : VTableValueDataArray) {
617 uint64_t VTableVal = V.Value;
618 GUIDCountsMap[VTableVal] = V.Count;
619 GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
620 if (!VTableVar) {
621 LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
622 << "; maybe the vtable isn't imported\n");
623 continue;
624 }
625
626 std::optional<uint64_t> MaybeAddressPointOffset =
627 getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
628 if (!MaybeAddressPointOffset)
629 continue;
630
631 const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
632
633 Function *Callee = nullptr;
634 std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
635 VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
636 if (!Callee)
637 continue;
638 auto CalleeIndexIter = CalleeIndexMap.find(Callee);
639 if (CalleeIndexIter == CalleeIndexMap.end())
640 continue;
641
642 auto &Candidate = Candidates[CalleeIndexIter->second];
643 // There should never be duplicate GUIDs in one !prof metdata, as this is
644 // an IR invariant enforced by the verifier. Assigning counters directly
645 // won't cause overwrite or counter loss.
646 Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
647 Candidate.AddressPoints.push_back(
648 getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
649 }
650
651 return VPtr;
652}
653
654// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
655// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
656static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
657 uint64_t FalseWeight) {
658 MDBuilder MDB(Context);
659 uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
660 return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
661 scaleBranchCount(FalseWeight, Scale));
662}
663
665 uint64_t Count, uint64_t TotalCount,
666 bool AttachProfToDirectCall,
669 CB, DirectCallee,
670 createBranchWeights(CB.getContext(), Count, TotalCount - Count));
671
672 if (AttachProfToDirectCall)
673 setFittedBranchWeights(NewInst, {Count},
674 /*IsExpected=*/false);
675
676 using namespace ore;
677
678 if (ORE)
679 ORE->emit([&]() {
680 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
681 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
682 << " with count " << NV("Count", Count) << " out of "
683 << NV("TotalCount", TotalCount);
684 });
685 return NewInst;
686}
687
688// Promote indirect-call to conditional direct-call for one callsite.
689bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
691 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
692 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
693 uint32_t NumPromoted = 0;
694
695 for (const auto &C : Candidates) {
696 uint64_t FuncCount = C.Count;
697 pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
698 SamplePGO, &ORE);
699 assert(TotalCount >= FuncCount);
700 TotalCount -= FuncCount;
701 NumOfPGOICallPromotion++;
702 NumPromoted++;
703
704 // Update the count and this entry will be erased later.
705 ICallProfDataRef[C.Index].Count = 0;
706 if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
707 continue;
708
709 // After a virtual call candidate gets promoted, update the vtable's counts
710 // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
711 // a vtable from which the virtual call is loaded. Compute the sum and use
712 // 128-bit APInt to improve accuracy.
713 uint64_t SumVTableCount = 0;
714 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
715 SumVTableCount += VTableCount;
716
717 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
718 APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
719 APFuncCount *= VTableCount;
720 VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
721 }
722 }
723 if (NumPromoted == 0)
724 return false;
725
726 assert(NumPromoted <= ICallProfDataRef.size() &&
727 "Number of promoted functions should not be greater than the number "
728 "of values in profile metadata");
729
730 updateFuncValueProfiles(CB, ICallProfDataRef, TotalCount, NumCandidates);
731 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
732 return true;
733}
734
735void IndirectCallPromoter::updateFuncValueProfiles(
736 CallBase &CB, MutableArrayRef<InstrProfValueData> CallVDs,
737 uint64_t TotalCount, uint32_t MaxMDCount) {
738 // First clear the existing !prof.
739 CB.setMetadata(LLVMContext::MD_prof, nullptr);
740
741 // Sort value profiles by count in descending order.
742 llvm::stable_sort(CallVDs, [](const InstrProfValueData &LHS,
743 const InstrProfValueData &RHS) {
744 return LHS.Count > RHS.Count;
745 });
746 // Drop the <target-value, count> pair if count is zero.
748 CallVDs.begin(),
749 llvm::upper_bound(CallVDs, 0U,
750 [](uint64_t Count, const InstrProfValueData &ProfData) {
751 return ProfData.Count <= Count;
752 }));
753
754 // Annotate the remaining value profiles if counter is not zero.
755 if (TotalCount != 0)
756 annotateValueSite(M, CB, VDs, TotalCount, IPVK_IndirectCallTarget,
757 MaxMDCount);
758}
759
760void IndirectCallPromoter::updateVPtrValueProfiles(
761 Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
762 if (!EnableVTableProfileUse || VPtr == nullptr ||
763 !VPtr->getMetadata(LLVMContext::MD_prof))
764 return;
765 VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
766 std::vector<InstrProfValueData> VTableValueProfiles;
767 uint64_t TotalVTableCount = 0;
768 for (auto [GUID, Count] : VTableGUIDCounts) {
769 if (Count == 0)
770 continue;
771
772 VTableValueProfiles.push_back({GUID, Count});
773 TotalVTableCount += Count;
774 }
775 llvm::sort(VTableValueProfiles,
776 [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
777 return LHS.Count > RHS.Count;
778 });
779
780 annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
781 IPVK_VTableTarget, VTableValueProfiles.size());
782}
783
784bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
785 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
786 uint64_t TotalFuncCount, uint32_t NumCandidates,
788 VTableGUIDCountsMap &VTableGUIDCounts) {
789 SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
790
791 for (const auto &Candidate : Candidates) {
792 for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
793 VTableGUIDCounts[GUID] -= Count;
794
795 // 'OriginalBB' is the basic block of indirect call. After each candidate
796 // is promoted, a new basic block is created for the indirect fallback basic
797 // block and indirect call `CB` is moved into this new BB.
798 BasicBlock *OriginalBB = CB.getParent();
800 CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
801 createBranchWeights(CB.getContext(), Candidate.Count,
802 TotalFuncCount - Candidate.Count));
803
804 int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
805
806 ORE.emit([&]() {
807 OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
808
809 const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
810 Remark << "Promote indirect call to "
811 << ore::NV("DirectCallee", Candidate.TargetFunction)
812 << " with count " << ore::NV("Count", Candidate.Count)
813 << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
814 << ore::NV("SinkCount", SinkCount)
815 << " instruction(s) and compare "
816 << ore::NV("VTable", VTableGUIDAndCounts.size())
817 << " vtable(s): {";
818
819 // Sort GUIDs so remark message is deterministic.
820 std::set<uint64_t> GUIDSet;
821 for (auto [GUID, Count] : VTableGUIDAndCounts)
822 GUIDSet.insert(GUID);
823 for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
824 if (Iter != GUIDSet.begin())
825 Remark << ", ";
826 Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
827 }
828
829 Remark << "}";
830
831 return Remark;
832 });
833
834 PromotedFuncCount.push_back({Candidate.Index, Candidate.Count});
835
836 assert(TotalFuncCount >= Candidate.Count &&
837 "Within one prof metadata, total count is the sum of counts from "
838 "individual <target, count> pairs");
839 // Use std::min since 'TotalFuncCount' is the saturated sum of individual
840 // counts, see
841 // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
842 TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
843 NumOfPGOICallPromotion++;
844 }
845
846 if (PromotedFuncCount.empty())
847 return false;
848
849 // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
850 // a distinct 'VPtr'.
851 // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
852 // used to load multiple virtual functions. The vtable profiles needs to be
853 // updated properly in that case (e.g, for each indirect call annotate both
854 // type profiles and function profiles in one !prof).
855 for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
856 uint32_t Index = PromotedFuncCount[I].first;
857 ICallProfDataRef[Index].Count -=
858 std::max(PromotedFuncCount[I].second, ICallProfDataRef[Index].Count);
859 }
860 updateFuncValueProfiles(CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
861 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
862 return true;
863}
864
865// Traverse all the indirect-call callsite and get the value profile
866// annotation to perform indirect-call promotion.
867bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
868 bool Changed = false;
869 ICallPromotionAnalysis ICallAnalysis;
870 for (auto *CB : findIndirectCalls(F)) {
871 uint32_t NumCandidates;
872 uint64_t TotalCount;
873 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
874 CB, TotalCount, NumCandidates);
875 if (!NumCandidates)
876 continue;
877 if (PSI && PSI->hasProfileSummary()) {
878 // Don't promote cold candidates.
879 if (PSI->isColdCount(TotalCount)) {
880 LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
881 << TotalCount << "\n");
882 continue;
883 }
884 // Only pormote hot if ICPAllowHotOnly is true.
885 if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
886 LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
887 << TotalCount << "\n");
888 continue;
889 }
890 }
891
892 auto PromotionCandidates = getPromotionCandidatesForCallSite(
893 *CB, ICallProfDataRef, TotalCount, NumCandidates);
894
895 VTableGUIDCountsMap VTableGUIDCounts;
896 Instruction *VPtr =
897 computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
898
899 if (isProfitableToCompareVTables(*CB, PromotionCandidates))
900 Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
901 TotalCount, NumCandidates,
902 ICallProfDataRef, VTableGUIDCounts);
903 else
904 Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
905 TotalCount, ICallProfDataRef,
906 NumCandidates, VTableGUIDCounts);
907 }
908 return Changed;
909}
910
911// TODO: Return false if the function addressing and vtable load instructions
912// cannot sink to indirect fallback.
913bool IndirectCallPromoter::isProfitableToCompareVTables(
914 const CallBase &CB, ArrayRef<PromotionCandidate> Candidates) {
915 if (!EnableVTableProfileUse || Candidates.empty())
916 return false;
917 LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
918 << NumOfPGOICallsites << CB << "\n");
919 const size_t CandidateSize = Candidates.size();
920 for (size_t I = 0; I < CandidateSize; I++) {
921 auto &Candidate = Candidates[I];
922 auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
923
924 LLVM_DEBUG({
925 dbgs() << " Candidate " << I << " FunctionCount: " << Candidate.Count
926 << ", VTableCounts:";
927 for (const auto &[GUID, Count] : VTableGUIDAndCounts)
928 dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() << ", "
929 << Count << "}";
930 dbgs() << "\n";
931 });
932
933 uint64_t CandidateVTableCount = 0;
934
935 for (auto &[GUID, Count] : VTableGUIDAndCounts) {
936 CandidateVTableCount += Count;
937
938 if (shouldSkipVTable(GUID))
939 return false;
940 }
941
942 if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
944 dbgs() << " function count " << Candidate.Count
945 << " and its vtable sum count " << CandidateVTableCount
946 << " have discrepancies. Bail out vtable comparison.\n");
947 return false;
948 }
949
950 // 'MaxNumVTable' limits the number of vtables to make vtable comparison
951 // profitable. Comparing multiple vtables for one function candidate will
952 // insert additional instructions on the hot path, and allowing more than
953 // one vtable for non last candidates may or may not elongate the dependency
954 // chain for the subsequent candidates. Set its value to 1 for non-last
955 // candidate and allow option to override it for the last candidate.
956 int MaxNumVTable = 1;
957 if (I == CandidateSize - 1)
958 MaxNumVTable = ICPMaxNumVTableLastCandidate;
959
960 if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
961 LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
962 << Candidate.AddressPoints.size()
963 << " vtables. Bail out for vtable comparison.\n");
964 return false;
965 }
966 }
967
968 return true;
969}
970
971bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
972 if (IgnoredBaseTypes.empty())
973 return false;
974
975 auto *VTableVar = Symtab->getGlobalVariable(VTableGUID);
976
977 assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
978
980 VTableVar->getMetadata(LLVMContext::MD_type, Types);
981
982 for (auto *Type : Types)
983 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
984 if (IgnoredBaseTypes.contains(TypeId->getString())) {
985 LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
986 "out of vtable comparison.");
987 return true;
988 }
989 return false;
990}
991
992// For virtual calls in the module, collect per-callsite information which will
993// be used to associate an ICP candidate with a vtable and a specific function
994// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
995// calls in a compile-time efficient manner (by iterating its users) and more
996// importantly use the compatible type later to figure out the function byte
997// offset relative to the start of vtables.
998static void
1000 VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
1001 // Right now only llvm.type.test is used to find out virtual call sites.
1002 // With ThinLTO and whole-program-devirtualization, llvm.type.test and
1003 // llvm.public.type.test are emitted, and llvm.public.type.test is either
1004 // refined to llvm.type.test or dropped before indirect-call-promotion pass.
1005 //
1006 // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
1007 // Find out virtual calls by looking at users of llvm.type.checked.load in
1008 // that case.
1009 Function *TypeTestFunc =
1010 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
1011 if (!TypeTestFunc || TypeTestFunc->use_empty())
1012 return;
1013
1014 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1015 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
1016 return FAM.getResult<DominatorTreeAnalysis>(F);
1017 };
1018 // Iterate all type.test calls to find all indirect calls.
1019 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
1020 auto *CI = dyn_cast<CallInst>(U.getUser());
1021 if (!CI)
1022 continue;
1023 auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
1024 if (!TypeMDVal)
1025 continue;
1026 auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
1027 if (!CompatibleTypeId)
1028 continue;
1029
1030 // Find out all devirtualizable call sites given a llvm.type.test
1031 // intrinsic call.
1034 auto &DT = LookupDomTree(*CI->getFunction());
1035 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1036
1037 for (auto &DevirtCall : DevirtCalls) {
1038 CallBase &CB = DevirtCall.CB;
1039 // Given an indirect call, try find the instruction which loads a
1040 // pointer to virtual table.
1041 Instruction *VTablePtr =
1043 if (!VTablePtr)
1044 continue;
1045 VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
1046 CompatibleTypeId->getString()};
1047 }
1048 }
1049}
1050
1051// A wrapper function that does the actual work.
1052static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
1053 bool SamplePGO, ModuleAnalysisManager &MAM) {
1054 if (DisableICP)
1055 return false;
1056 InstrProfSymtab Symtab;
1057 if (Error E = Symtab.create(M, InLTO)) {
1058 std::string SymtabFailure = toString(std::move(E));
1059 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
1060 return false;
1061 }
1062 bool Changed = false;
1063 VirtualCallSiteTypeInfoMap VirtualCSInfo;
1064
1065 DenseSet<StringRef> IgnoredBaseTypes;
1066
1068 computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
1069
1070 IgnoredBaseTypes.insert_range(ICPIgnoredBaseTypes);
1071 }
1072
1073 // VTableAddressPointOffsetVal stores the vtable address points. The vtable
1074 // address point of a given <vtable, address point offset> is static (doesn't
1075 // change after being computed once).
1076 // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
1077 // entry the first time a <vtable, offset> pair is seen, as
1078 // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
1079 // repeatedly on each function.
1080 VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
1081
1082 for (auto &F : M) {
1083 if (F.isDeclaration() || F.hasOptNone())
1084 continue;
1085
1086 auto &FAM =
1087 MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1088 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1089
1090 IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
1091 VTableAddressPointOffsetVal,
1092 IgnoredBaseTypes, ORE);
1093 bool FuncChanged = CallPromoter.processFunction(PSI);
1094 if (ICPDUMPAFTER && FuncChanged) {
1095 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
1096 LLVM_DEBUG(dbgs() << "\n");
1097 }
1098 Changed |= FuncChanged;
1099 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
1100 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
1101 break;
1102 }
1103 }
1104 return Changed;
1105}
1106
1109 ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
1110
1111 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
1112 SamplePGO | ICPSamplePGOMode, MAM))
1113 return PreservedAnalyses::all();
1114
1115 return PreservedAnalyses::none();
1116}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
#define DEBUG_TYPE
This header defines various interfaces for pass management in LLVM.
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static cl::opt< bool > ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden, cl::desc("Promote the target candidate only if it is a " "hot function. Otherwise, warm functions can " "also be promoted"))
static cl::opt< float > ICPVTablePercentageThreshold("icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden, cl::desc("The percentage threshold of vtable-count / function-count for " "cost-benefit analysis."))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static void computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, VirtualCallSiteTypeInfoMap &VirtualCSInfo)
static cl::opt< bool > ICPAllowDecls("icp-allow-decls", cl::init(false), cl::Hidden, cl::desc("Promote the target candidate even when the definition " " is not available"))
static MDNode * createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, uint64_t FalseWeight)
static cl::opt< bool > ICPAllowCandidateSkip("icp-allow-candidate-skip", cl::init(false), cl::Hidden, cl::desc("Continue with the remaining targets instead of exiting " "when failing in a candidate"))
static cl::list< std::string > ICPIgnoredBaseTypes("icp-ignored-base-types", cl::Hidden, cl::desc("A list of mangled vtable type info names. Classes specified by the " "type info names and their derived ones will not be vtable-ICP'ed. " "Useful when the profiled types and actual types in the optimized " "binary could be different due to profiling limitations. Type info " "names are those string literals used in LLVM type metadata"))
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< int > ICPMaxNumVTableLastCandidate("icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, cl::desc("The maximum number of vtable for the last candidate."))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static Constant * getInBoundsPtrAdd(Constant *Ptr, Constant *Offset)
Create a getelementptr inbounds i8, ptr, offset constant expression.
Definition Constants.h:1518
This is an important base class in LLVM.
Definition Constant.h:43
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator end()
Definition DenseMap.h:143
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
LLVM_ABI MutableArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates, unsigned MaxNumValueData=0)
Returns reference to array of InstrProfValueData for the given instruction I.
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition InstrProf.h:517
GlobalVariable * getGlobalVariable(uint64_t MD5Hash) const
Return the global variable corresponding to md5 hash.
Definition InstrProf.h:809
LLVM_ABI Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
Function * getFunction(uint64_t FuncMD5Hash) const
Return function from the name's md5 hash. Return nullptr if not found.
Definition InstrProf.h:799
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator begin() const
Definition ArrayRef.h:338
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
void insert_range(Range &&R)
Definition DenseSet.h:238
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
initializer< Ty > init(const Ty &Val)
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
void stable_sort(R &&Range)
Definition STLExtras.h:2115
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< unsigned > MaxNumVTableAnnotations("icp-max-num-vtables", cl::init(6), cl::Hidden, cl::desc("Max number of vtables annotated for a vtable load instruction."))
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2064
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI CallBase & promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, Function *Callee, ArrayRef< Constant * > AddressPoints, MDNode *BranchWeights)
This is similar to promoteCallWithIfThenElse except that the condition to promote a virtual call is t...
LLVM_ABI void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
LLVM_ABI std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
static Instruction * tryGetVTableInstruction(CallBase *CB)