LLVM 20.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/PassManager.h"
33#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/Error.h"
43#include <cassert>
44#include <cstdint>
45#include <memory>
46#include <set>
47#include <string>
48#include <unordered_map>
49#include <utility>
50#include <vector>
51
52using namespace llvm;
53
54#define DEBUG_TYPE "pgo-icall-prom"
55
56STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
57STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
58
60
61namespace llvm {
63}
64
65// Command line option to disable indirect-call promotion with the default as
66// false. This is for debug purpose.
67static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
68 cl::desc("Disable indirect call promotion"));
69
70// Set the cutoff value for the promotion. If the value is other than 0, we
71// stop the transformation once the total number of promotions equals the cutoff
72// value.
73// For debug use only.
75 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
76 cl::desc("Max number of promotions for this compilation"));
77
78// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
79// For debug use only.
81 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
82 cl::desc("Skip Callsite up to this number for this compilation"));
83
84// Set if the pass is called in LTO optimization. The difference for LTO mode
85// is the pass won't prefix the source module name to the internal linkage
86// symbols.
87static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
88 cl::desc("Run indirect-call promotion in LTO "
89 "mode"));
90
91// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
92// mode is it will add prof metadatato the created direct call.
93static cl::opt<bool>
94 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
95 cl::desc("Run indirect-call promotion in SamplePGO mode"));
96
97// If the option is set to true, only call instructions will be considered for
98// transformation -- invoke instructions will be ignored.
99static cl::opt<bool>
100 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
101 cl::desc("Run indirect-call promotion for call instructions "
102 "only"));
103
104// If the option is set to true, only invoke instructions will be considered for
105// transformation -- call instructions will be ignored.
106static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
108 cl::desc("Run indirect-call promotion for "
109 "invoke instruction only"));
110
111// Dump the function level IR if the transformation happened in this
112// function. For debug use only.
113static cl::opt<bool>
114 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
115 cl::desc("Dump IR after transformation happens"));
116
117// Indirect call promotion pass will fall back to function-based comparison if
118// vtable-count / function-count is smaller than this threshold.
120 "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden,
121 cl::desc("The percentage threshold of vtable-count / function-count for "
122 "cost-benefit analysis."));
123
124// Although comparing vtables can save a vtable load, we may need to compare
125// vtable pointer with multiple vtable address points due to class inheritance.
126// Comparing with multiple vtables inserts additional instructions on hot code
127// path, and doing so for an earlier candidate delays the comparisons for later
128// candidates. For the last candidate, only the fallback path is affected.
129// We allow multiple vtable comparison for the last function candidate and use
130// the option below to cap the number of vtables.
132 "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
133 cl::desc("The maximum number of vtable for the last candidate."));
134
135namespace {
136
137// The key is a vtable global variable, and the value is a map.
138// In the inner map, the key represents address point offsets and the value is a
139// constant for this address point.
140using VTableAddressPointOffsetValMap =
142
143// A struct to collect type information for a virtual call site.
144struct VirtualCallSiteInfo {
145 // The offset from the address point to virtual function in the vtable.
146 uint64_t FunctionOffset;
147 // The instruction that computes the address point of vtable.
148 Instruction *VPtr;
149 // The compatible type used in LLVM type intrinsics.
150 StringRef CompatibleTypeStr;
151};
152
153// The key is a virtual call, and value is its type information.
154using VirtualCallSiteTypeInfoMap =
156
157// The key is vtable GUID, and value is its value profile count.
158using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
159
160// Return the address point offset of the given compatible type.
161//
162// Type metadata of a vtable specifies the types that can contain a pointer to
163// this vtable, for example, `Base*` can be a pointer to an derived type
164// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
165static std::optional<uint64_t>
166getAddressPointOffset(const GlobalVariable &VTableVar,
167 StringRef CompatibleType) {
169 VTableVar.getMetadata(LLVMContext::MD_type, Types);
170
171 for (MDNode *Type : Types)
172 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
173 TypeId && TypeId->getString() == CompatibleType)
174 return cast<ConstantInt>(
175 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
176 ->getZExtValue();
177
178 return std::nullopt;
179}
180
181// Return a constant representing the vtable's address point specified by the
182// offset.
183static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
184 uint32_t AddressPointOffset) {
185 Module &M = *VTable->getParent();
186 LLVMContext &Context = M.getContext();
187 assert(AddressPointOffset <
188 M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
189 "Out-of-bound access");
190
192 Type::getInt8Ty(Context), VTable,
193 llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
194}
195
196// Return the basic block in which Use `U` is used via its `UserInst`.
197static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
198 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
199 return PN->getIncomingBlock(U);
200
201 return UserInst->getParent();
202}
203
204// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
205// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
206// is the sole predecessor of `DestBB` and `DestBB` is dominated by
207// `Inst->getParent()`.
208static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
209 // 'BB' is used only by assert.
210 [[maybe_unused]] BasicBlock *BB = Inst->getParent();
211
212 assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
213 DestBB->getUniquePredecessor() == BB &&
214 "Guaranteed by ICP transformation");
215
216 BasicBlock *UserBB = nullptr;
217 for (Use &Use : Inst->uses()) {
218 User *User = Use.getUser();
219 // Do checked cast since IR verifier guarantees that the user of an
220 // instruction must be an instruction. See `Verifier::visitInstruction`.
221 Instruction *UserInst = cast<Instruction>(User);
222 // We can sink debug or pseudo instructions together with Inst.
223 if (UserInst->isDebugOrPseudoInst())
224 continue;
225 UserBB = getUserBasicBlock(Use, UserInst);
226 // Do not sink if Inst is used in a basic block that is not DestBB.
227 // TODO: Sink to the common dominator of all user blocks.
228 if (UserBB != DestBB)
229 return false;
230 }
231 return UserBB != nullptr;
232}
233
234// For the virtual call dispatch sequence, try to sink vtable load instructions
235// to the cold indirect call fallback.
236// FIXME: Move the sink eligibility check below to a utility function in
237// Transforms/Utils/ directory.
238static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
239 if (!isDestBBSuitableForSink(I, DestBlock))
240 return false;
241
242 // Do not move control-flow-involving, volatile loads, vaarg, alloca
243 // instructions, etc.
244 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
245 isa<AllocaInst>(I))
246 return false;
247
248 // Do not sink convergent call instructions.
249 if (const auto *C = dyn_cast<CallBase>(I))
250 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
251 return false;
252
253 // Do not move an instruction that may write to memory.
254 if (I->mayWriteToMemory())
255 return false;
256
257 // We can only sink load instructions if there is nothing between the load and
258 // the end of block that could change the value.
259 if (I->mayReadFromMemory()) {
260 // We already know that SrcBlock is the unique predecessor of DestBlock.
261 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
262 E = I->getParent()->end();
263 Scan != E; ++Scan) {
264 // Note analysis analysis can tell whether two pointers can point to the
265 // same object in memory or not thereby find further opportunities to
266 // sink.
267 if (Scan->mayWriteToMemory())
268 return false;
269 }
270 }
271
272 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
273 I->moveBefore(*DestBlock, InsertPos);
274
275 // TODO: Sink debug intrinsic users of I to 'DestBlock'.
276 // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
277 // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
278 // the core logic to do this.
279 return true;
280}
281
282// Try to sink instructions after VPtr to the indirect call fallback.
283// Return the number of sunk IR instructions.
284static int tryToSinkInstructions(BasicBlock *OriginalBB,
285 BasicBlock *IndirectCallBB) {
286 int SinkCount = 0;
287 // Do not sink across a critical edge for simplicity.
288 if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
289 return SinkCount;
290 // Sink all eligible instructions in OriginalBB in reverse order.
291 for (Instruction &I :
293 if (tryToSinkInstruction(&I, IndirectCallBB))
294 SinkCount++;
295
296 return SinkCount;
297}
298
299// Promote indirect calls to conditional direct calls, keeping track of
300// thresholds.
301class IndirectCallPromoter {
302private:
303 Function &F;
304 Module &M;
305
306 ProfileSummaryInfo *PSI = nullptr;
307
308 // Symtab that maps indirect call profile values to function names and
309 // defines.
310 InstrProfSymtab *const Symtab;
311
312 const bool SamplePGO;
313
314 // A map from a virtual call to its type information.
315 const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
316
317 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
318
320
321 // A struct that records the direct target and it's call count.
322 struct PromotionCandidate {
323 Function *const TargetFunction;
324 const uint64_t Count;
325
326 // The following fields only exists for promotion candidates with vtable
327 // information.
328 //
329 // Due to class inheritance, one virtual call candidate can come from
330 // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
331 // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
332 // points for comparison.
333 VTableGUIDCountsMap VTableGUIDAndCounts;
334 SmallVector<Constant *> AddressPoints;
335
336 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
337 };
338
339 // Check if the indirect-call call site should be promoted. Return the number
340 // of promotions. Inst is the candidate indirect call, ValueDataRef
341 // contains the array of value profile data for profiled targets,
342 // TotalCount is the total profiled count of call executions, and
343 // NumCandidates is the number of candidate entries in ValueDataRef.
344 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
345 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
346 uint64_t TotalCount, uint32_t NumCandidates);
347
348 // Promote a list of targets for one indirect-call callsite by comparing
349 // indirect callee with functions. Return true if there are IR
350 // transformations and false otherwise.
351 bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
353 uint64_t TotalCount,
354 ArrayRef<InstrProfValueData> ICallProfDataRef,
355 uint32_t NumCandidates,
356 VTableGUIDCountsMap &VTableGUIDCounts);
357
358 // Promote a list of targets for one indirect call by comparing vtables with
359 // functions. Return true if there are IR transformations and false
360 // otherwise.
361 bool tryToPromoteWithVTableCmp(
363 uint64_t TotalFuncCount, uint32_t NumCandidates,
365 VTableGUIDCountsMap &VTableGUIDCounts);
366
367 // Return true if it's profitable to compare vtables for the callsite.
368 bool isProfitableToCompareVTables(const CallBase &CB,
370 uint64_t TotalCount);
371
372 // Given an indirect callsite and the list of function candidates, compute
373 // the following vtable information in output parameters and return vtable
374 // pointer if type profiles exist.
375 // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
376 // metadata attached on the vtable pointer.
377 // - For each function candidate, finds out the vtables from which it gets
378 // called and stores the <vtable-guid, count> in promotion candidate.
379 Instruction *computeVTableInfos(const CallBase *CB,
380 VTableGUIDCountsMap &VTableGUIDCounts,
381 std::vector<PromotionCandidate> &Candidates);
382
383 Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
384 uint64_t AddressPointOffset);
385
386 void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
387 uint64_t Sum, uint32_t MaxMDCount);
388
389 void updateVPtrValueProfiles(Instruction *VPtr,
390 VTableGUIDCountsMap &VTableGUIDCounts);
391
392public:
393 IndirectCallPromoter(
394 Function &Func, Module &M, ProfileSummaryInfo *PSI,
395 InstrProfSymtab *Symtab, bool SamplePGO,
396 const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
397 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
399 : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
400 VirtualCSInfo(VirtualCSInfo),
401 VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
402 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
403 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
404
405 bool processFunction(ProfileSummaryInfo *PSI);
406};
407
408} // end anonymous namespace
409
410// Indirect-call promotion heuristic. The direct targets are sorted based on
411// the count. Stop at the first target that is not promoted.
412std::vector<IndirectCallPromoter::PromotionCandidate>
413IndirectCallPromoter::getPromotionCandidatesForCallSite(
414 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
415 uint64_t TotalCount, uint32_t NumCandidates) {
416 std::vector<PromotionCandidate> Ret;
417
418 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
419 << " Num_targets: " << ValueDataRef.size()
420 << " Num_candidates: " << NumCandidates << "\n");
421 NumOfPGOICallsites++;
422 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
423 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
424 return Ret;
425 }
426
427 for (uint32_t I = 0; I < NumCandidates; I++) {
428 uint64_t Count = ValueDataRef[I].Count;
429 assert(Count <= TotalCount);
430 (void)TotalCount;
431 uint64_t Target = ValueDataRef[I].Value;
432 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
433 << " Target_func: " << Target << "\n");
434
435 if (ICPInvokeOnly && isa<CallInst>(CB)) {
436 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
437 ORE.emit([&]() {
438 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
439 << " Not promote: User options";
440 });
441 break;
442 }
443 if (ICPCallOnly && isa<InvokeInst>(CB)) {
444 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
445 ORE.emit([&]() {
446 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
447 << " Not promote: User options";
448 });
449 break;
450 }
451 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
452 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
453 ORE.emit([&]() {
454 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
455 << " Not promote: Cutoff reached";
456 });
457 break;
458 }
459
460 // Don't promote if the symbol is not defined in the module. This avoids
461 // creating a reference to a symbol that doesn't exist in the module
462 // This can happen when we compile with a sample profile collected from
463 // one binary but used for another, which may have profiled targets that
464 // aren't used in the new binary. We might have a declaration initially in
465 // the case where the symbol is globally dead in the binary and removed by
466 // ThinLTO.
467 Function *TargetFunction = Symtab->getFunction(Target);
468 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
469 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
470 ORE.emit([&]() {
471 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
472 << "Cannot promote indirect call: target with md5sum "
473 << ore::NV("target md5sum", Target) << " not found";
474 });
475 break;
476 }
477
478 const char *Reason = nullptr;
479 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
480 using namespace ore;
481
482 ORE.emit([&]() {
483 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
484 << "Cannot promote indirect call to "
485 << NV("TargetFunction", TargetFunction) << " with count of "
486 << NV("Count", Count) << ": " << Reason;
487 });
488 break;
489 }
490
491 Ret.push_back(PromotionCandidate(TargetFunction, Count));
492 TotalCount -= Count;
493 }
494 return Ret;
495}
496
497Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
498 GlobalVariable *GV, uint64_t AddressPointOffset) {
499 auto [Iter, Inserted] =
500 VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
501 if (Inserted)
502 Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
503 return Iter->second;
504}
505
506Instruction *IndirectCallPromoter::computeVTableInfos(
507 const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
508 std::vector<PromotionCandidate> &Candidates) {
510 return nullptr;
511
512 // Take the following code sequence as an example, here is how the code works
513 // @vtable1 = {[n x ptr] [... ptr @func1]}
514 // @vtable2 = {[m x ptr] [... ptr @func2]}
515 //
516 // %vptr = load ptr, ptr %d, !prof !0
517 // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
518 // tail call void @llvm.assume(i1 %0)
519 // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
520 // %1 = load ptr, ptr %vfn
521 // call void %1(ptr %d), !prof !1
522 //
523 // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
524 // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
525 //
526 // Step 1. Find out the %vptr instruction for indirect call and use its !prof
527 // to populate `GUIDCountsMap`.
528 // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
529 // make vtable definitions visible across modules.
530 // Step 3. Compute the byte offset of the virtual call, by adding vtable
531 // address point offset and function's offset relative to vtable address
532 // point. For each function candidate, this step tells us the vtable from
533 // which it comes from, and the vtable address point to compare %vptr with.
534
535 // Only virtual calls have virtual call site info.
536 auto Iter = VirtualCSInfo.find(CB);
537 if (Iter == VirtualCSInfo.end())
538 return nullptr;
539
540 LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
541 << NumOfPGOICallsites << "\n");
542
543 const auto &VirtualCallInfo = Iter->second;
544 Instruction *VPtr = VirtualCallInfo.VPtr;
545
547 for (size_t I = 0; I < Candidates.size(); I++)
548 CalleeIndexMap[Candidates[I].TargetFunction] = I;
549
550 uint64_t TotalVTableCount = 0;
551 auto VTableValueDataArray =
552 getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget,
553 MaxNumVTableAnnotations, TotalVTableCount);
554 if (VTableValueDataArray.empty())
555 return VPtr;
556
557 // Compute the functions and counts from by each vtable.
558 for (const auto &V : VTableValueDataArray) {
559 uint64_t VTableVal = V.Value;
560 GUIDCountsMap[VTableVal] = V.Count;
561 GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
562 if (!VTableVar) {
563 LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
564 << "; maybe the vtable isn't imported\n");
565 continue;
566 }
567
568 std::optional<uint64_t> MaybeAddressPointOffset =
569 getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
570 if (!MaybeAddressPointOffset)
571 continue;
572
573 const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
574
575 Function *Callee = nullptr;
576 std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
577 VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
578 if (!Callee)
579 continue;
580 auto CalleeIndexIter = CalleeIndexMap.find(Callee);
581 if (CalleeIndexIter == CalleeIndexMap.end())
582 continue;
583
584 auto &Candidate = Candidates[CalleeIndexIter->second];
585 // There shouldn't be duplicate GUIDs in one !prof metadata (except
586 // duplicated zeros), so assign counters directly won't cause overwrite or
587 // counter loss.
588 Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
589 Candidate.AddressPoints.push_back(
590 getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
591 }
592
593 return VPtr;
594}
595
596// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
597// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
598static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
599 uint64_t FalseWeight) {
600 MDBuilder MDB(Context);
601 uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
602 return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
603 scaleBranchCount(FalseWeight, Scale));
604}
605
607 uint64_t Count, uint64_t TotalCount,
608 bool AttachProfToDirectCall,
611 CB, DirectCallee,
612 createBranchWeights(CB.getContext(), Count, TotalCount - Count));
613
614 if (AttachProfToDirectCall)
615 setBranchWeights(NewInst, {static_cast<uint32_t>(Count)},
616 /*IsExpected=*/false);
617
618 using namespace ore;
619
620 if (ORE)
621 ORE->emit([&]() {
622 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
623 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
624 << " with count " << NV("Count", Count) << " out of "
625 << NV("TotalCount", TotalCount);
626 });
627 return NewInst;
628}
629
630// Promote indirect-call to conditional direct-call for one callsite.
631bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
633 uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
634 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
635 uint32_t NumPromoted = 0;
636
637 for (const auto &C : Candidates) {
638 uint64_t FuncCount = C.Count;
639 pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
640 SamplePGO, &ORE);
641 assert(TotalCount >= FuncCount);
642 TotalCount -= FuncCount;
643 NumOfPGOICallPromotion++;
644 NumPromoted++;
645
646 if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
647 continue;
648
649 // After a virtual call candidate gets promoted, update the vtable's counts
650 // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
651 // a vtable from which the virtual call is loaded. Compute the sum and use
652 // 128-bit APInt to improve accuracy.
653 uint64_t SumVTableCount = 0;
654 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
655 SumVTableCount += VTableCount;
656
657 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
658 APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
659 APFuncCount *= VTableCount;
660 VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
661 }
662 }
663 if (NumPromoted == 0)
664 return false;
665
666 assert(NumPromoted <= ICallProfDataRef.size() &&
667 "Number of promoted functions should not be greater than the number "
668 "of values in profile metadata");
669
670 // Update value profiles on the indirect call.
671 updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
672 NumCandidates);
673 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
674 return true;
675}
676
677void IndirectCallPromoter::updateFuncValueProfiles(
678 CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
679 uint32_t MaxMDCount) {
680 // First clear the existing !prof.
681 CB.setMetadata(LLVMContext::MD_prof, nullptr);
682 // Annotate the remaining value profiles if counter is not zero.
683 if (TotalCount != 0)
684 annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
685 MaxMDCount);
686}
687
688void IndirectCallPromoter::updateVPtrValueProfiles(
689 Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
690 if (!EnableVTableProfileUse || VPtr == nullptr ||
691 !VPtr->getMetadata(LLVMContext::MD_prof))
692 return;
693 VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
694 std::vector<InstrProfValueData> VTableValueProfiles;
695 uint64_t TotalVTableCount = 0;
696 for (auto [GUID, Count] : VTableGUIDCounts) {
697 if (Count == 0)
698 continue;
699
700 VTableValueProfiles.push_back({GUID, Count});
701 TotalVTableCount += Count;
702 }
703 llvm::sort(VTableValueProfiles,
704 [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
705 return LHS.Count > RHS.Count;
706 });
707
708 annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
709 IPVK_VTableTarget, VTableValueProfiles.size());
710}
711
712bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
714 uint64_t TotalFuncCount, uint32_t NumCandidates,
716 VTableGUIDCountsMap &VTableGUIDCounts) {
717 SmallVector<uint64_t, 4> PromotedFuncCount;
718
719 for (const auto &Candidate : Candidates) {
720 for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
721 VTableGUIDCounts[GUID] -= Count;
722
723 // 'OriginalBB' is the basic block of indirect call. After each candidate
724 // is promoted, a new basic block is created for the indirect fallback basic
725 // block and indirect call `CB` is moved into this new BB.
726 BasicBlock *OriginalBB = CB.getParent();
728 CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
729 createBranchWeights(CB.getContext(), Candidate.Count,
730 TotalFuncCount - Candidate.Count));
731
732 int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
733
734 ORE.emit([&]() {
735 OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
736
737 const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
738 Remark << "Promote indirect call to "
739 << ore::NV("DirectCallee", Candidate.TargetFunction)
740 << " with count " << ore::NV("Count", Candidate.Count)
741 << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
742 << ore::NV("SinkCount", SinkCount)
743 << " instruction(s) and compare "
744 << ore::NV("VTable", VTableGUIDAndCounts.size())
745 << " vtable(s): {";
746
747 // Sort GUIDs so remark message is deterministic.
748 std::set<uint64_t> GUIDSet;
749 for (auto [GUID, Count] : VTableGUIDAndCounts)
750 GUIDSet.insert(GUID);
751 for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
752 if (Iter != GUIDSet.begin())
753 Remark << ", ";
754 Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
755 }
756
757 Remark << "}";
758
759 return Remark;
760 });
761
762 PromotedFuncCount.push_back(Candidate.Count);
763
764 assert(TotalFuncCount >= Candidate.Count &&
765 "Within one prof metadata, total count is the sum of counts from "
766 "individual <target, count> pairs");
767 // Use std::min since 'TotalFuncCount' is the saturated sum of individual
768 // counts, see
769 // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
770 TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
771 NumOfPGOICallPromotion++;
772 }
773
774 if (PromotedFuncCount.empty())
775 return false;
776
777 // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
778 // a distinct 'VPtr'.
779 // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
780 // used to load multiple virtual functions. The vtable profiles needs to be
781 // updated properly in that case (e.g, for each indirect call annotate both
782 // type profiles and function profiles in one !prof).
783 for (size_t I = 0; I < PromotedFuncCount.size(); I++)
784 ICallProfDataRef[I].Count -=
785 std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
786 // Sort value profiles by count in descending order.
787 llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
788 const InstrProfValueData &RHS) {
789 return LHS.Count > RHS.Count;
790 });
791 // Drop the <target-value, count> pair if count is zero.
793 ICallProfDataRef.begin(),
794 llvm::upper_bound(ICallProfDataRef, 0U,
795 [](uint64_t Count, const InstrProfValueData &ProfData) {
796 return ProfData.Count <= Count;
797 }));
798 updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
799 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
800 return true;
801}
802
803// Traverse all the indirect-call callsite and get the value profile
804// annotation to perform indirect-call promotion.
805bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
806 bool Changed = false;
807 ICallPromotionAnalysis ICallAnalysis;
808 for (auto *CB : findIndirectCalls(F)) {
809 uint32_t NumCandidates;
810 uint64_t TotalCount;
811 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
812 CB, TotalCount, NumCandidates);
813 if (!NumCandidates ||
814 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
815 continue;
816
817 auto PromotionCandidates = getPromotionCandidatesForCallSite(
818 *CB, ICallProfDataRef, TotalCount, NumCandidates);
819
820 VTableGUIDCountsMap VTableGUIDCounts;
821 Instruction *VPtr =
822 computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
823
824 if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount))
825 Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
826 TotalCount, NumCandidates,
827 ICallProfDataRef, VTableGUIDCounts);
828 else
829 Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
830 TotalCount, ICallProfDataRef,
831 NumCandidates, VTableGUIDCounts);
832 }
833 return Changed;
834}
835
836// TODO: Return false if the function addressing and vtable load instructions
837// cannot sink to indirect fallback.
838bool IndirectCallPromoter::isProfitableToCompareVTables(
839 const CallBase &CB, ArrayRef<PromotionCandidate> Candidates,
840 uint64_t TotalCount) {
841 if (!EnableVTableProfileUse || Candidates.empty())
842 return false;
843 LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
844 << NumOfPGOICallsites << CB << "\n");
845 uint64_t RemainingVTableCount = TotalCount;
846 const size_t CandidateSize = Candidates.size();
847 for (size_t I = 0; I < CandidateSize; I++) {
848 auto &Candidate = Candidates[I];
849 auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
850
851 LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: "
852 << Candidate.Count << ", VTableCounts:");
853 // Add [[maybe_unused]] since <GUID, Count> are only used by LLVM_DEBUG.
854 for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts)
855 LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName()
856 << ", " << Count << "}");
857 LLVM_DEBUG(dbgs() << "\n");
858
859 uint64_t CandidateVTableCount = 0;
860 for (auto &[GUID, Count] : VTableGUIDAndCounts)
861 CandidateVTableCount += Count;
862
863 if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
865 dbgs() << " function count " << Candidate.Count
866 << " and its vtable sum count " << CandidateVTableCount
867 << " have discrepancies. Bail out vtable comparison.\n");
868 return false;
869 }
870
871 RemainingVTableCount -= Candidate.Count;
872
873 // 'MaxNumVTable' limits the number of vtables to make vtable comparison
874 // profitable. Comparing multiple vtables for one function candidate will
875 // insert additional instructions on the hot path, and allowing more than
876 // one vtable for non last candidates may or may not elongate the dependency
877 // chain for the subsequent candidates. Set its value to 1 for non-last
878 // candidate and allow option to override it for the last candidate.
879 int MaxNumVTable = 1;
880 if (I == CandidateSize - 1)
881 MaxNumVTable = ICPMaxNumVTableLastCandidate;
882
883 if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
884 LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
885 << Candidate.AddressPoints.size()
886 << " vtables. Bail out for vtable comparison.\n");
887 return false;
888 }
889 }
890
891 // If the indirect fallback is not cold, don't compare vtables.
892 if (PSI && PSI->hasProfileSummary() &&
893 !PSI->isColdCount(RemainingVTableCount)) {
894 LLVM_DEBUG(dbgs() << " Indirect fallback basic block is not cold. Bail "
895 "out for vtable comparison.\n");
896 return false;
897 }
898
899 return true;
900}
901
902// For virtual calls in the module, collect per-callsite information which will
903// be used to associate an ICP candidate with a vtable and a specific function
904// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
905// calls in a compile-time efficient manner (by iterating its users) and more
906// importantly use the compatible type later to figure out the function byte
907// offset relative to the start of vtables.
908static void
910 VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
911 // Right now only llvm.type.test is used to find out virtual call sites.
912 // With ThinLTO and whole-program-devirtualization, llvm.type.test and
913 // llvm.public.type.test are emitted, and llvm.public.type.test is either
914 // refined to llvm.type.test or dropped before indirect-call-promotion pass.
915 //
916 // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
917 // Find out virtual calls by looking at users of llvm.type.checked.load in
918 // that case.
919 Function *TypeTestFunc =
920 M.getFunction(Intrinsic::getName(Intrinsic::type_test));
921 if (!TypeTestFunc || TypeTestFunc->use_empty())
922 return;
923
924 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
925 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
927 };
928 // Iterate all type.test calls to find all indirect calls.
929 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
930 auto *CI = dyn_cast<CallInst>(U.getUser());
931 if (!CI)
932 continue;
933 auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
934 if (!TypeMDVal)
935 continue;
936 auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
937 if (!CompatibleTypeId)
938 continue;
939
940 // Find out all devirtualizable call sites given a llvm.type.test
941 // intrinsic call.
944 auto &DT = LookupDomTree(*CI->getFunction());
945 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
946
947 for (auto &DevirtCall : DevirtCalls) {
948 CallBase &CB = DevirtCall.CB;
949 // Given an indirect call, try find the instruction which loads a
950 // pointer to virtual table.
951 Instruction *VTablePtr =
953 if (!VTablePtr)
954 continue;
955 VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
956 CompatibleTypeId->getString()};
957 }
958 }
959}
960
961// A wrapper function that does the actual work.
962static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
963 bool SamplePGO, ModuleAnalysisManager &MAM) {
964 if (DisableICP)
965 return false;
966 InstrProfSymtab Symtab;
967 if (Error E = Symtab.create(M, InLTO)) {
968 std::string SymtabFailure = toString(std::move(E));
969 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
970 return false;
971 }
972 bool Changed = false;
973 VirtualCallSiteTypeInfoMap VirtualCSInfo;
974
976 computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
977
978 // VTableAddressPointOffsetVal stores the vtable address points. The vtable
979 // address point of a given <vtable, address point offset> is static (doesn't
980 // change after being computed once).
981 // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
982 // entry the first time a <vtable, offset> pair is seen, as
983 // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
984 // repeatedly on each function.
985 VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
986
987 for (auto &F : M) {
988 if (F.isDeclaration() || F.hasOptNone())
989 continue;
990
991 auto &FAM =
994
995 IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO,
996 VirtualCSInfo,
997 VTableAddressPointOffsetVal, ORE);
998 bool FuncChanged = CallPromoter.processFunction(PSI);
999 if (ICPDUMPAFTER && FuncChanged) {
1000 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
1001 LLVM_DEBUG(dbgs() << "\n");
1002 }
1003 Changed |= FuncChanged;
1004 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
1005 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
1006 break;
1007 }
1008 }
1009 return Changed;
1010}
1011
1015
1016 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
1017 SamplePGO | ICPSamplePGOMode, MAM))
1018 return PreservedAnalyses::all();
1019
1020 return PreservedAnalyses::none();
1021}
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< float > ICPVTablePercentageThreshold("icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden, cl::desc("The percentage threshold of vtable-count / function-count for " "cost-benefit analysis."))
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static void computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, VirtualCallSiteTypeInfoMap &VirtualCSInfo)
static MDNode * createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, uint64_t FalseWeight)
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< int > ICPMaxNumVTableLastCandidate("icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, cl::desc("The maximum number of vtable for the last candidate."))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1280
This is an important base class in LLVM.
Definition: Constant.h:42
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
const Function & getFunction() const
Definition: Function.h:170
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:565
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
MutableArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition: InstrProf.h:454
Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:381
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator begin() const
Definition: ArrayRef.h:356
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
const ParentTy * getParent() const
Definition: ilist_node.h:32
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1096
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void stable_sort(R &&Range)
Definition: STLExtras.h:2020
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1974
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1282
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
Definition: InstrProf.cpp:1350
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
CallBase & promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, Function *Callee, ArrayRef< Constant * > AddressPoints, MDNode *BranchWeights)
This is similar to promoteCallWithIfThenElse except that the condition to promote a virtual call is t...
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
static Instruction * tryGetVTableInstruction(CallBase *CB)