LLVM 20.0.0git
IndirectCallPromotion.cpp
Go to the documentation of this file.
1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/PassManager.h"
33#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/Error.h"
43#include <cassert>
44#include <cstdint>
45#include <set>
46#include <string>
47#include <unordered_map>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "pgo-icall-prom"
54
55STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
56STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
57
59
60namespace llvm {
62}
63
64// Command line option to disable indirect-call promotion with the default as
65// false. This is for debug purpose.
66static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
67 cl::desc("Disable indirect call promotion"));
68
69// Set the cutoff value for the promotion. If the value is other than 0, we
70// stop the transformation once the total number of promotions equals the cutoff
71// value.
72// For debug use only.
74 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
75 cl::desc("Max number of promotions for this compilation"));
76
77// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
78// For debug use only.
80 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
81 cl::desc("Skip Callsite up to this number for this compilation"));
82
83// Set if the pass is called in LTO optimization. The difference for LTO mode
84// is the pass won't prefix the source module name to the internal linkage
85// symbols.
86static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
87 cl::desc("Run indirect-call promotion in LTO "
88 "mode"));
89
90// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
91// mode is it will add prof metadatato the created direct call.
92static cl::opt<bool>
93 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
94 cl::desc("Run indirect-call promotion in SamplePGO mode"));
95
96// If the option is set to true, only call instructions will be considered for
97// transformation -- invoke instructions will be ignored.
98static cl::opt<bool>
99 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
100 cl::desc("Run indirect-call promotion for call instructions "
101 "only"));
102
103// If the option is set to true, only invoke instructions will be considered for
104// transformation -- call instructions will be ignored.
105static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
107 cl::desc("Run indirect-call promotion for "
108 "invoke instruction only"));
109
110// Dump the function level IR if the transformation happened in this
111// function. For debug use only.
112static cl::opt<bool>
113 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
114 cl::desc("Dump IR after transformation happens"));
115
116// Indirect call promotion pass will fall back to function-based comparison if
117// vtable-count / function-count is smaller than this threshold.
119 "icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden,
120 cl::desc("The percentage threshold of vtable-count / function-count for "
121 "cost-benefit analysis."));
122
123// Although comparing vtables can save a vtable load, we may need to compare
124// vtable pointer with multiple vtable address points due to class inheritance.
125// Comparing with multiple vtables inserts additional instructions on hot code
126// path, and doing so for an earlier candidate delays the comparisons for later
127// candidates. For the last candidate, only the fallback path is affected.
128// We allow multiple vtable comparison for the last function candidate and use
129// the option below to cap the number of vtables.
131 "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
132 cl::desc("The maximum number of vtable for the last candidate."));
133
135 "icp-ignored-base-types", cl::Hidden,
136 cl::desc(
137 "A list of mangled vtable type info names. Classes specified by the "
138 "type info names and their derived ones will not be vtable-ICP'ed. "
139 "Useful when the profiled types and actual types in the optimized "
140 "binary could be different due to profiling limitations. Type info "
141 "names are those string literals used in LLVM type metadata"));
142
143namespace {
144
145// The key is a vtable global variable, and the value is a map.
146// In the inner map, the key represents address point offsets and the value is a
147// constant for this address point.
148using VTableAddressPointOffsetValMap =
150
151// A struct to collect type information for a virtual call site.
152struct VirtualCallSiteInfo {
153 // The offset from the address point to virtual function in the vtable.
154 uint64_t FunctionOffset;
155 // The instruction that computes the address point of vtable.
156 Instruction *VPtr;
157 // The compatible type used in LLVM type intrinsics.
158 StringRef CompatibleTypeStr;
159};
160
161// The key is a virtual call, and value is its type information.
162using VirtualCallSiteTypeInfoMap =
164
165// The key is vtable GUID, and value is its value profile count.
166using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
167
168// Return the address point offset of the given compatible type.
169//
170// Type metadata of a vtable specifies the types that can contain a pointer to
171// this vtable, for example, `Base*` can be a pointer to an derived type
172// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
173static std::optional<uint64_t>
174getAddressPointOffset(const GlobalVariable &VTableVar,
175 StringRef CompatibleType) {
177 VTableVar.getMetadata(LLVMContext::MD_type, Types);
178
179 for (MDNode *Type : Types)
180 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
181 TypeId && TypeId->getString() == CompatibleType)
182 return cast<ConstantInt>(
183 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
184 ->getZExtValue();
185
186 return std::nullopt;
187}
188
189// Return a constant representing the vtable's address point specified by the
190// offset.
191static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
192 uint32_t AddressPointOffset) {
193 Module &M = *VTable->getParent();
194 LLVMContext &Context = M.getContext();
195 assert(AddressPointOffset <
196 M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
197 "Out-of-bound access");
198
200 Type::getInt8Ty(Context), VTable,
201 llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
202}
203
204// Return the basic block in which Use `U` is used via its `UserInst`.
205static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
206 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
207 return PN->getIncomingBlock(U);
208
209 return UserInst->getParent();
210}
211
212// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
213// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
214// is the sole predecessor of `DestBB` and `DestBB` is dominated by
215// `Inst->getParent()`.
216static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
217 // 'BB' is used only by assert.
218 [[maybe_unused]] BasicBlock *BB = Inst->getParent();
219
220 assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
221 DestBB->getUniquePredecessor() == BB &&
222 "Guaranteed by ICP transformation");
223
224 BasicBlock *UserBB = nullptr;
225 for (Use &Use : Inst->uses()) {
226 User *User = Use.getUser();
227 // Do checked cast since IR verifier guarantees that the user of an
228 // instruction must be an instruction. See `Verifier::visitInstruction`.
229 Instruction *UserInst = cast<Instruction>(User);
230 // We can sink debug or pseudo instructions together with Inst.
231 if (UserInst->isDebugOrPseudoInst())
232 continue;
233 UserBB = getUserBasicBlock(Use, UserInst);
234 // Do not sink if Inst is used in a basic block that is not DestBB.
235 // TODO: Sink to the common dominator of all user blocks.
236 if (UserBB != DestBB)
237 return false;
238 }
239 return UserBB != nullptr;
240}
241
242// For the virtual call dispatch sequence, try to sink vtable load instructions
243// to the cold indirect call fallback.
244// FIXME: Move the sink eligibility check below to a utility function in
245// Transforms/Utils/ directory.
246static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
247 if (!isDestBBSuitableForSink(I, DestBlock))
248 return false;
249
250 // Do not move control-flow-involving, volatile loads, vaarg, alloca
251 // instructions, etc.
252 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
253 isa<AllocaInst>(I))
254 return false;
255
256 // Do not sink convergent call instructions.
257 if (const auto *C = dyn_cast<CallBase>(I))
258 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
259 return false;
260
261 // Do not move an instruction that may write to memory.
262 if (I->mayWriteToMemory())
263 return false;
264
265 // We can only sink load instructions if there is nothing between the load and
266 // the end of block that could change the value.
267 if (I->mayReadFromMemory()) {
268 // We already know that SrcBlock is the unique predecessor of DestBlock.
269 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
270 E = I->getParent()->end();
271 Scan != E; ++Scan) {
272 // Note analysis analysis can tell whether two pointers can point to the
273 // same object in memory or not thereby find further opportunities to
274 // sink.
275 if (Scan->mayWriteToMemory())
276 return false;
277 }
278 }
279
280 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
281 I->moveBefore(*DestBlock, InsertPos);
282
283 // TODO: Sink debug intrinsic users of I to 'DestBlock'.
284 // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
285 // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
286 // the core logic to do this.
287 return true;
288}
289
290// Try to sink instructions after VPtr to the indirect call fallback.
291// Return the number of sunk IR instructions.
292static int tryToSinkInstructions(BasicBlock *OriginalBB,
293 BasicBlock *IndirectCallBB) {
294 int SinkCount = 0;
295 // Do not sink across a critical edge for simplicity.
296 if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
297 return SinkCount;
298 // Sink all eligible instructions in OriginalBB in reverse order.
299 for (Instruction &I :
301 if (tryToSinkInstruction(&I, IndirectCallBB))
302 SinkCount++;
303
304 return SinkCount;
305}
306
307// Promote indirect calls to conditional direct calls, keeping track of
308// thresholds.
309class IndirectCallPromoter {
310private:
311 Function &F;
312 Module &M;
313
314 // Symtab that maps indirect call profile values to function names and
315 // defines.
316 InstrProfSymtab *const Symtab;
317
318 const bool SamplePGO;
319
320 // A map from a virtual call to its type information.
321 const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
322
323 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
324
326
327 const DenseSet<StringRef> &IgnoredBaseTypes;
328
329 // A struct that records the direct target and it's call count.
330 struct PromotionCandidate {
331 Function *const TargetFunction;
332 const uint64_t Count;
333
334 // The following fields only exists for promotion candidates with vtable
335 // information.
336 //
337 // Due to class inheritance, one virtual call candidate can come from
338 // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
339 // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
340 // points for comparison.
341 VTableGUIDCountsMap VTableGUIDAndCounts;
342 SmallVector<Constant *> AddressPoints;
343
344 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
345 };
346
347 // Check if the indirect-call call site should be promoted. Return the number
348 // of promotions. Inst is the candidate indirect call, ValueDataRef
349 // contains the array of value profile data for profiled targets,
350 // TotalCount is the total profiled count of call executions, and
351 // NumCandidates is the number of candidate entries in ValueDataRef.
352 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
353 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
354 uint64_t TotalCount, uint32_t NumCandidates);
355
356 // Promote a list of targets for one indirect-call callsite by comparing
357 // indirect callee with functions. Return true if there are IR
358 // transformations and false otherwise.
359 bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
361 uint64_t TotalCount,
362 ArrayRef<InstrProfValueData> ICallProfDataRef,
363 uint32_t NumCandidates,
364 VTableGUIDCountsMap &VTableGUIDCounts);
365
366 // Promote a list of targets for one indirect call by comparing vtables with
367 // functions. Return true if there are IR transformations and false
368 // otherwise.
369 bool tryToPromoteWithVTableCmp(
371 uint64_t TotalFuncCount, uint32_t NumCandidates,
373 VTableGUIDCountsMap &VTableGUIDCounts);
374
375 // Return true if it's profitable to compare vtables for the callsite.
376 bool isProfitableToCompareVTables(const CallBase &CB,
378
379 // Return true if the vtable corresponding to VTableGUID should be skipped
380 // for vtable-based comparison.
381 bool shouldSkipVTable(uint64_t VTableGUID);
382
383 // Given an indirect callsite and the list of function candidates, compute
384 // the following vtable information in output parameters and return vtable
385 // pointer if type profiles exist.
386 // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
387 // metadata attached on the vtable pointer.
388 // - For each function candidate, finds out the vtables from which it gets
389 // called and stores the <vtable-guid, count> in promotion candidate.
390 Instruction *computeVTableInfos(const CallBase *CB,
391 VTableGUIDCountsMap &VTableGUIDCounts,
392 std::vector<PromotionCandidate> &Candidates);
393
394 Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
395 uint64_t AddressPointOffset);
396
397 void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
398 uint64_t Sum, uint32_t MaxMDCount);
399
400 void updateVPtrValueProfiles(Instruction *VPtr,
401 VTableGUIDCountsMap &VTableGUIDCounts);
402
403public:
404 IndirectCallPromoter(
405 Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
406 const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
407 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
408 const DenseSet<StringRef> &IgnoredBaseTypes,
410 : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
411 VirtualCSInfo(VirtualCSInfo),
412 VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
413 IgnoredBaseTypes(IgnoredBaseTypes) {}
414 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
415 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
416
417 bool processFunction(ProfileSummaryInfo *PSI);
418};
419
420} // end anonymous namespace
421
422// Indirect-call promotion heuristic. The direct targets are sorted based on
423// the count. Stop at the first target that is not promoted.
424std::vector<IndirectCallPromoter::PromotionCandidate>
425IndirectCallPromoter::getPromotionCandidatesForCallSite(
426 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
427 uint64_t TotalCount, uint32_t NumCandidates) {
428 std::vector<PromotionCandidate> Ret;
429
430 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
431 << " Num_targets: " << ValueDataRef.size()
432 << " Num_candidates: " << NumCandidates << "\n");
433 NumOfPGOICallsites++;
434 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
435 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
436 return Ret;
437 }
438
439 for (uint32_t I = 0; I < NumCandidates; I++) {
440 uint64_t Count = ValueDataRef[I].Count;
441 assert(Count <= TotalCount);
442 (void)TotalCount;
443 uint64_t Target = ValueDataRef[I].Value;
444 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
445 << " Target_func: " << Target << "\n");
446
447 if (ICPInvokeOnly && isa<CallInst>(CB)) {
448 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
449 ORE.emit([&]() {
450 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
451 << " Not promote: User options";
452 });
453 break;
454 }
455 if (ICPCallOnly && isa<InvokeInst>(CB)) {
456 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
457 ORE.emit([&]() {
458 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
459 << " Not promote: User options";
460 });
461 break;
462 }
463 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
464 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
465 ORE.emit([&]() {
466 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
467 << " Not promote: Cutoff reached";
468 });
469 break;
470 }
471
472 // Don't promote if the symbol is not defined in the module. This avoids
473 // creating a reference to a symbol that doesn't exist in the module
474 // This can happen when we compile with a sample profile collected from
475 // one binary but used for another, which may have profiled targets that
476 // aren't used in the new binary. We might have a declaration initially in
477 // the case where the symbol is globally dead in the binary and removed by
478 // ThinLTO.
479 Function *TargetFunction = Symtab->getFunction(Target);
480 if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
481 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
482 ORE.emit([&]() {
483 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
484 << "Cannot promote indirect call: target with md5sum "
485 << ore::NV("target md5sum", Target) << " not found";
486 });
487 break;
488 }
489
490 const char *Reason = nullptr;
491 if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
492 using namespace ore;
493
494 ORE.emit([&]() {
495 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
496 << "Cannot promote indirect call to "
497 << NV("TargetFunction", TargetFunction) << " with count of "
498 << NV("Count", Count) << ": " << Reason;
499 });
500 break;
501 }
502
503 Ret.push_back(PromotionCandidate(TargetFunction, Count));
504 TotalCount -= Count;
505 }
506 return Ret;
507}
508
509Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
510 GlobalVariable *GV, uint64_t AddressPointOffset) {
511 auto [Iter, Inserted] =
512 VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
513 if (Inserted)
514 Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
515 return Iter->second;
516}
517
518Instruction *IndirectCallPromoter::computeVTableInfos(
519 const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
520 std::vector<PromotionCandidate> &Candidates) {
522 return nullptr;
523
524 // Take the following code sequence as an example, here is how the code works
525 // @vtable1 = {[n x ptr] [... ptr @func1]}
526 // @vtable2 = {[m x ptr] [... ptr @func2]}
527 //
528 // %vptr = load ptr, ptr %d, !prof !0
529 // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
530 // tail call void @llvm.assume(i1 %0)
531 // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
532 // %1 = load ptr, ptr %vfn
533 // call void %1(ptr %d), !prof !1
534 //
535 // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
536 // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
537 //
538 // Step 1. Find out the %vptr instruction for indirect call and use its !prof
539 // to populate `GUIDCountsMap`.
540 // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
541 // make vtable definitions visible across modules.
542 // Step 3. Compute the byte offset of the virtual call, by adding vtable
543 // address point offset and function's offset relative to vtable address
544 // point. For each function candidate, this step tells us the vtable from
545 // which it comes from, and the vtable address point to compare %vptr with.
546
547 // Only virtual calls have virtual call site info.
548 auto Iter = VirtualCSInfo.find(CB);
549 if (Iter == VirtualCSInfo.end())
550 return nullptr;
551
552 LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
553 << NumOfPGOICallsites << "\n");
554
555 const auto &VirtualCallInfo = Iter->second;
556 Instruction *VPtr = VirtualCallInfo.VPtr;
557
559 for (size_t I = 0; I < Candidates.size(); I++)
560 CalleeIndexMap[Candidates[I].TargetFunction] = I;
561
562 uint64_t TotalVTableCount = 0;
563 auto VTableValueDataArray =
564 getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget,
565 MaxNumVTableAnnotations, TotalVTableCount);
566 if (VTableValueDataArray.empty())
567 return VPtr;
568
569 // Compute the functions and counts from by each vtable.
570 for (const auto &V : VTableValueDataArray) {
571 uint64_t VTableVal = V.Value;
572 GUIDCountsMap[VTableVal] = V.Count;
573 GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
574 if (!VTableVar) {
575 LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
576 << "; maybe the vtable isn't imported\n");
577 continue;
578 }
579
580 std::optional<uint64_t> MaybeAddressPointOffset =
581 getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
582 if (!MaybeAddressPointOffset)
583 continue;
584
585 const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
586
587 Function *Callee = nullptr;
588 std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
589 VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
590 if (!Callee)
591 continue;
592 auto CalleeIndexIter = CalleeIndexMap.find(Callee);
593 if (CalleeIndexIter == CalleeIndexMap.end())
594 continue;
595
596 auto &Candidate = Candidates[CalleeIndexIter->second];
597 // There shouldn't be duplicate GUIDs in one !prof metadata (except
598 // duplicated zeros), so assign counters directly won't cause overwrite or
599 // counter loss.
600 Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
601 Candidate.AddressPoints.push_back(
602 getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
603 }
604
605 return VPtr;
606}
607
608// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
609// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
610static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
611 uint64_t FalseWeight) {
612 MDBuilder MDB(Context);
613 uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
614 return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
615 scaleBranchCount(FalseWeight, Scale));
616}
617
619 uint64_t Count, uint64_t TotalCount,
620 bool AttachProfToDirectCall,
623 CB, DirectCallee,
624 createBranchWeights(CB.getContext(), Count, TotalCount - Count));
625
626 if (AttachProfToDirectCall)
627 setBranchWeights(NewInst, {static_cast<uint32_t>(Count)},
628 /*IsExpected=*/false);
629
630 using namespace ore;
631
632 if (ORE)
633 ORE->emit([&]() {
634 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
635 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
636 << " with count " << NV("Count", Count) << " out of "
637 << NV("TotalCount", TotalCount);
638 });
639 return NewInst;
640}
641
642// Promote indirect-call to conditional direct-call for one callsite.
643bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
645 uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
646 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
647 uint32_t NumPromoted = 0;
648
649 for (const auto &C : Candidates) {
650 uint64_t FuncCount = C.Count;
651 pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
652 SamplePGO, &ORE);
653 assert(TotalCount >= FuncCount);
654 TotalCount -= FuncCount;
655 NumOfPGOICallPromotion++;
656 NumPromoted++;
657
658 if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
659 continue;
660
661 // After a virtual call candidate gets promoted, update the vtable's counts
662 // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
663 // a vtable from which the virtual call is loaded. Compute the sum and use
664 // 128-bit APInt to improve accuracy.
665 uint64_t SumVTableCount = 0;
666 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
667 SumVTableCount += VTableCount;
668
669 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
670 APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
671 APFuncCount *= VTableCount;
672 VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
673 }
674 }
675 if (NumPromoted == 0)
676 return false;
677
678 assert(NumPromoted <= ICallProfDataRef.size() &&
679 "Number of promoted functions should not be greater than the number "
680 "of values in profile metadata");
681
682 // Update value profiles on the indirect call.
683 updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
684 NumCandidates);
685 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
686 return true;
687}
688
689void IndirectCallPromoter::updateFuncValueProfiles(
690 CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
691 uint32_t MaxMDCount) {
692 // First clear the existing !prof.
693 CB.setMetadata(LLVMContext::MD_prof, nullptr);
694 // Annotate the remaining value profiles if counter is not zero.
695 if (TotalCount != 0)
696 annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
697 MaxMDCount);
698}
699
700void IndirectCallPromoter::updateVPtrValueProfiles(
701 Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
702 if (!EnableVTableProfileUse || VPtr == nullptr ||
703 !VPtr->getMetadata(LLVMContext::MD_prof))
704 return;
705 VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
706 std::vector<InstrProfValueData> VTableValueProfiles;
707 uint64_t TotalVTableCount = 0;
708 for (auto [GUID, Count] : VTableGUIDCounts) {
709 if (Count == 0)
710 continue;
711
712 VTableValueProfiles.push_back({GUID, Count});
713 TotalVTableCount += Count;
714 }
715 llvm::sort(VTableValueProfiles,
716 [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
717 return LHS.Count > RHS.Count;
718 });
719
720 annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
721 IPVK_VTableTarget, VTableValueProfiles.size());
722}
723
724bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
726 uint64_t TotalFuncCount, uint32_t NumCandidates,
728 VTableGUIDCountsMap &VTableGUIDCounts) {
729 SmallVector<uint64_t, 4> PromotedFuncCount;
730
731 for (const auto &Candidate : Candidates) {
732 for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
733 VTableGUIDCounts[GUID] -= Count;
734
735 // 'OriginalBB' is the basic block of indirect call. After each candidate
736 // is promoted, a new basic block is created for the indirect fallback basic
737 // block and indirect call `CB` is moved into this new BB.
738 BasicBlock *OriginalBB = CB.getParent();
740 CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
741 createBranchWeights(CB.getContext(), Candidate.Count,
742 TotalFuncCount - Candidate.Count));
743
744 int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
745
746 ORE.emit([&]() {
747 OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
748
749 const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
750 Remark << "Promote indirect call to "
751 << ore::NV("DirectCallee", Candidate.TargetFunction)
752 << " with count " << ore::NV("Count", Candidate.Count)
753 << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
754 << ore::NV("SinkCount", SinkCount)
755 << " instruction(s) and compare "
756 << ore::NV("VTable", VTableGUIDAndCounts.size())
757 << " vtable(s): {";
758
759 // Sort GUIDs so remark message is deterministic.
760 std::set<uint64_t> GUIDSet;
761 for (auto [GUID, Count] : VTableGUIDAndCounts)
762 GUIDSet.insert(GUID);
763 for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
764 if (Iter != GUIDSet.begin())
765 Remark << ", ";
766 Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
767 }
768
769 Remark << "}";
770
771 return Remark;
772 });
773
774 PromotedFuncCount.push_back(Candidate.Count);
775
776 assert(TotalFuncCount >= Candidate.Count &&
777 "Within one prof metadata, total count is the sum of counts from "
778 "individual <target, count> pairs");
779 // Use std::min since 'TotalFuncCount' is the saturated sum of individual
780 // counts, see
781 // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
782 TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
783 NumOfPGOICallPromotion++;
784 }
785
786 if (PromotedFuncCount.empty())
787 return false;
788
789 // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
790 // a distinct 'VPtr'.
791 // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
792 // used to load multiple virtual functions. The vtable profiles needs to be
793 // updated properly in that case (e.g, for each indirect call annotate both
794 // type profiles and function profiles in one !prof).
795 for (size_t I = 0; I < PromotedFuncCount.size(); I++)
796 ICallProfDataRef[I].Count -=
797 std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
798 // Sort value profiles by count in descending order.
799 llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
800 const InstrProfValueData &RHS) {
801 return LHS.Count > RHS.Count;
802 });
803 // Drop the <target-value, count> pair if count is zero.
805 ICallProfDataRef.begin(),
806 llvm::upper_bound(ICallProfDataRef, 0U,
807 [](uint64_t Count, const InstrProfValueData &ProfData) {
808 return ProfData.Count <= Count;
809 }));
810 updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
811 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
812 return true;
813}
814
815// Traverse all the indirect-call callsite and get the value profile
816// annotation to perform indirect-call promotion.
817bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
818 bool Changed = false;
819 ICallPromotionAnalysis ICallAnalysis;
820 for (auto *CB : findIndirectCalls(F)) {
821 uint32_t NumCandidates;
822 uint64_t TotalCount;
823 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
824 CB, TotalCount, NumCandidates);
825 if (!NumCandidates ||
826 (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
827 continue;
828
829 auto PromotionCandidates = getPromotionCandidatesForCallSite(
830 *CB, ICallProfDataRef, TotalCount, NumCandidates);
831
832 VTableGUIDCountsMap VTableGUIDCounts;
833 Instruction *VPtr =
834 computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
835
836 if (isProfitableToCompareVTables(*CB, PromotionCandidates))
837 Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
838 TotalCount, NumCandidates,
839 ICallProfDataRef, VTableGUIDCounts);
840 else
841 Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
842 TotalCount, ICallProfDataRef,
843 NumCandidates, VTableGUIDCounts);
844 }
845 return Changed;
846}
847
848// TODO: Return false if the function addressing and vtable load instructions
849// cannot sink to indirect fallback.
850bool IndirectCallPromoter::isProfitableToCompareVTables(
851 const CallBase &CB, ArrayRef<PromotionCandidate> Candidates) {
852 if (!EnableVTableProfileUse || Candidates.empty())
853 return false;
854 LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
855 << NumOfPGOICallsites << CB << "\n");
856 const size_t CandidateSize = Candidates.size();
857 for (size_t I = 0; I < CandidateSize; I++) {
858 auto &Candidate = Candidates[I];
859 auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
860
861 LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: "
862 << Candidate.Count << ", VTableCounts:");
863 // Add [[maybe_unused]] since <GUID, Count> are only used by LLVM_DEBUG.
864 for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts)
865 LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName()
866 << ", " << Count << "}");
867 LLVM_DEBUG(dbgs() << "\n");
868
869 uint64_t CandidateVTableCount = 0;
870
871 for (auto &[GUID, Count] : VTableGUIDAndCounts) {
872 CandidateVTableCount += Count;
873
874 if (shouldSkipVTable(GUID))
875 return false;
876 }
877
878 if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
880 dbgs() << " function count " << Candidate.Count
881 << " and its vtable sum count " << CandidateVTableCount
882 << " have discrepancies. Bail out vtable comparison.\n");
883 return false;
884 }
885
886 // 'MaxNumVTable' limits the number of vtables to make vtable comparison
887 // profitable. Comparing multiple vtables for one function candidate will
888 // insert additional instructions on the hot path, and allowing more than
889 // one vtable for non last candidates may or may not elongate the dependency
890 // chain for the subsequent candidates. Set its value to 1 for non-last
891 // candidate and allow option to override it for the last candidate.
892 int MaxNumVTable = 1;
893 if (I == CandidateSize - 1)
894 MaxNumVTable = ICPMaxNumVTableLastCandidate;
895
896 if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
897 LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
898 << Candidate.AddressPoints.size()
899 << " vtables. Bail out for vtable comparison.\n");
900 return false;
901 }
902 }
903
904 return true;
905}
906
907bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
908 if (IgnoredBaseTypes.empty())
909 return false;
910
911 auto *VTableVar = Symtab->getGlobalVariable(VTableGUID);
912
913 assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
914
916 VTableVar->getMetadata(LLVMContext::MD_type, Types);
917
918 for (auto *Type : Types)
919 if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
920 if (IgnoredBaseTypes.contains(TypeId->getString())) {
921 LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
922 "out of vtable comparison.");
923 return true;
924 }
925 return false;
926}
927
928// For virtual calls in the module, collect per-callsite information which will
929// be used to associate an ICP candidate with a vtable and a specific function
930// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
931// calls in a compile-time efficient manner (by iterating its users) and more
932// importantly use the compatible type later to figure out the function byte
933// offset relative to the start of vtables.
934static void
936 VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
937 // Right now only llvm.type.test is used to find out virtual call sites.
938 // With ThinLTO and whole-program-devirtualization, llvm.type.test and
939 // llvm.public.type.test are emitted, and llvm.public.type.test is either
940 // refined to llvm.type.test or dropped before indirect-call-promotion pass.
941 //
942 // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
943 // Find out virtual calls by looking at users of llvm.type.checked.load in
944 // that case.
945 Function *TypeTestFunc =
946 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
947 if (!TypeTestFunc || TypeTestFunc->use_empty())
948 return;
949
950 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
951 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
953 };
954 // Iterate all type.test calls to find all indirect calls.
955 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
956 auto *CI = dyn_cast<CallInst>(U.getUser());
957 if (!CI)
958 continue;
959 auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
960 if (!TypeMDVal)
961 continue;
962 auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
963 if (!CompatibleTypeId)
964 continue;
965
966 // Find out all devirtualizable call sites given a llvm.type.test
967 // intrinsic call.
970 auto &DT = LookupDomTree(*CI->getFunction());
971 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
972
973 for (auto &DevirtCall : DevirtCalls) {
974 CallBase &CB = DevirtCall.CB;
975 // Given an indirect call, try find the instruction which loads a
976 // pointer to virtual table.
977 Instruction *VTablePtr =
979 if (!VTablePtr)
980 continue;
981 VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
982 CompatibleTypeId->getString()};
983 }
984 }
985}
986
987// A wrapper function that does the actual work.
988static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
989 bool SamplePGO, ModuleAnalysisManager &MAM) {
990 if (DisableICP)
991 return false;
992 InstrProfSymtab Symtab;
993 if (Error E = Symtab.create(M, InLTO)) {
994 std::string SymtabFailure = toString(std::move(E));
995 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
996 return false;
997 }
998 bool Changed = false;
999 VirtualCallSiteTypeInfoMap VirtualCSInfo;
1000
1001 DenseSet<StringRef> IgnoredBaseTypes;
1002
1004 computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
1005
1006 for (StringRef Str : ICPIgnoredBaseTypes)
1007 IgnoredBaseTypes.insert(Str);
1008 }
1009
1010 // VTableAddressPointOffsetVal stores the vtable address points. The vtable
1011 // address point of a given <vtable, address point offset> is static (doesn't
1012 // change after being computed once).
1013 // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
1014 // entry the first time a <vtable, offset> pair is seen, as
1015 // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
1016 // repeatedly on each function.
1017 VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
1018
1019 for (auto &F : M) {
1020 if (F.isDeclaration() || F.hasOptNone())
1021 continue;
1022
1023 auto &FAM =
1026
1027 IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
1028 VTableAddressPointOffsetVal,
1029 IgnoredBaseTypes, ORE);
1030 bool FuncChanged = CallPromoter.processFunction(PSI);
1031 if (ICPDUMPAFTER && FuncChanged) {
1032 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
1033 LLVM_DEBUG(dbgs() << "\n");
1034 }
1035 Changed |= FuncChanged;
1036 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
1037 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
1038 break;
1039 }
1040 }
1041 return Changed;
1042}
1043
1047
1048 if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
1049 SamplePGO | ICPSamplePGOMode, MAM))
1050 return PreservedAnalyses::all();
1051
1052 return PreservedAnalyses::none();
1053}
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This header defines various interfaces for pass management in LLVM.
Interface to identify indirect call promotion candidates.
static cl::opt< bool > ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for call instructions " "only"))
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > ICPInvokeOnly("icp-invoke-only", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion for " "invoke instruction only"))
static cl::opt< unsigned > ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::desc("Skip Callsite up to this number for this compilation"))
static cl::opt< bool > ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens"))
static cl::opt< float > ICPVTablePercentageThreshold("icp-vtable-percentage-threshold", cl::init(0.995), cl::Hidden, cl::desc("The percentage threshold of vtable-count / function-count for " "cost-benefit analysis."))
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM)
static void computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, VirtualCallSiteTypeInfoMap &VirtualCSInfo)
static MDNode * createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, uint64_t FalseWeight)
static cl::list< std::string > ICPIgnoredBaseTypes("icp-ignored-base-types", cl::Hidden, cl::desc("A list of mangled vtable type info names. Classes specified by the " "type info names and their derived ones will not be vtable-ICP'ed. " "Useful when the profiled types and actual types in the optimized " "binary could be different due to profiling limitations. Type info " "names are those string literals used in LLVM type metadata"))
static cl::opt< bool > ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in LTO " "mode"))
#define DEBUG_TYPE
static cl::opt< bool > DisableICP("disable-icp", cl::init(false), cl::Hidden, cl::desc("Disable indirect call promotion"))
static cl::opt< unsigned > ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::desc("Max number of promotions for this compilation"))
static cl::opt< int > ICPMaxNumVTableLastCandidate("icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, cl::desc("The maximum number of vtable for the last candidate."))
static cl::opt< bool > ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, cl::desc("Run indirect-call promotion in SamplePGO mode"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
static Constant * getInBoundsGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList)
Create an "inbounds" getelementptr.
Definition: Constants.h:1294
This is an important base class in LLVM.
Definition: Constant.h:42
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
const Function & getFunction() const
Definition: Function.h:171
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:565
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:296
MutableArrayRef< InstrProfValueData > getPromotionCandidatesForInstruction(const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates)
Returns reference to array of InstrProfValueData for the given instruction I.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
A symbol table used for function [IR]PGO name look-up with keys (such as pointers,...
Definition: InstrProf.h:460
Error create(object::SectionRef &Section)
Create InstrProfSymtab from an object file section which contains function PGO names.
bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:386
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator begin() const
Definition: ArrayRef.h:359
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
iterator_range< use_iterator > uses()
Definition: Value.h:376
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
const ParentTy * getParent() const
Definition: ilist_node.h:32
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition: Intrinsics.cpp:746
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
std::vector< CallBase * > findIndirectCalls(Function &F)
CallBase & promoteCallWithIfThenElse(CallBase &CB, Function *Callee, MDNode *BranchWeights=nullptr)
Promote the given indirect call site to conditionally call Callee.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1991
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1301
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
Definition: InstrProf.cpp:1369
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
const char * toString(DWARFSectionKind Kind)
CallBase & promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, Function *Callee, ArrayRef< Constant * > AddressPoints, MDNode *BranchWeights)
This is similar to promoteCallWithIfThenElse except that the condition to promote a virtual call is t...
void findDevirtualizableCallsForTypeTest(SmallVectorImpl< DevirtCallSite > &DevirtCalls, SmallVectorImpl< CallInst * > &Assumes, const CallInst *CI, DominatorTree &DT)
Given a call to the intrinsic @llvm.type.test, find all devirtualizable call sites based on the call ...
std::pair< Function *, Constant * > getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset, Module &M)
Given a vtable and a specified offset, returns the function and the trivial pointer at the specified ...
static Instruction * tryGetVTableInstruction(CallBase *CB)