LLVM 22.0.0git
PGOCtxProfFlattening.cpp
Go to the documentation of this file.
1//===- PGOCtxProfFlattening.cpp - Contextual Instr. Flattening ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Flattens the contextual profile and lowers it to MD_prof.
10// This should happen after all IPO (which is assumed to have maintained the
11// contextual profile) happened. Flattening consists of summing the values at
12// the same index of the counters belonging to all the contexts of a function.
13// The lowering consists of materializing the counter values to function
14// entrypoint counts and branch probabilities.
15//
16// This pass also removes contextual instrumentation, which has been kept around
17// to facilitate its functionality.
18//
19//===----------------------------------------------------------------------===//
20
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/ScopeExit.h"
24#include "llvm/Analysis/CFG.h"
27#include "llvm/IR/Analysis.h"
28#include "llvm/IR/CFG.h"
29#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Module.h"
33#include "llvm/IR/PassManager.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "ctx_prof_flatten"
43
44namespace {
45
46/// Assign branch weights and function entry count. Also update the PSI
47/// builder.
48void assignProfileData(Function &F, ArrayRef<uint64_t> RawCounters) {
49 assert(!RawCounters.empty());
50 ProfileAnnotator PA(F, RawCounters);
51
52 F.setEntryCount(RawCounters[0]);
53 SmallVector<uint64_t, 2> ProfileHolder;
54
55 for (auto &BB : F) {
56 for (auto &I : BB)
57 if (auto *SI = dyn_cast<SelectInst>(&I)) {
58 uint64_t TrueCount, FalseCount = 0;
59 if (!PA.getSelectInstrProfile(*SI, TrueCount, FalseCount))
60 continue;
61 setProfMetadata(SI, {TrueCount, FalseCount},
62 std::max(TrueCount, FalseCount));
63 }
64 if (succ_size(&BB) < 2)
65 continue;
66 uint64_t MaxCount = 0;
67 if (!PA.getOutgoingBranchWeights(BB, ProfileHolder, MaxCount))
68 continue;
69 assert(MaxCount > 0);
70 setProfMetadata(BB.getTerminator(), ProfileHolder, MaxCount);
71 }
72}
73
74[[maybe_unused]] bool areAllBBsReachable(const Function &F,
76 auto &DT = FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
77 return llvm::all_of(
78 F, [&](const BasicBlock &BB) { return DT.isReachableFromEntry(&BB); });
79}
80
81void clearColdFunctionProfile(Function &F) {
82 for (auto &BB : F)
83 BB.getTerminator()->setMetadata(LLVMContext::MD_prof, nullptr);
84 F.setEntryCount(0U);
85}
86
87void removeInstrumentation(Function &F) {
88 for (auto &BB : F)
89 for (auto &I : llvm::make_early_inc_range(BB))
91 I.eraseFromParent();
92}
93
94void annotateIndirectCall(
95 Module &M, CallBase &CB,
97 const InstrProfCallsite &Ins) {
98 auto Idx = Ins.getIndex()->getZExtValue();
99 auto FIt = FlatProf.find(Idx);
100 if (FIt == FlatProf.end())
101 return;
102 const auto &Targets = FIt->second;
104 uint64_t Sum = 0;
105 for (auto &[Guid, Count] : Targets) {
106 Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
107 Sum += Count;
108 }
109
111 [](const InstrProfValueData &A, const InstrProfValueData &B) {
112 return A.Count > B.Count;
113 });
114 llvm::annotateValueSite(M, CB, Data, Sum,
115 InstrProfValueKind::IPVK_IndirectCallTarget,
116 Data.size());
117 LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
118 << CB.getMetadata(LLVMContext::MD_prof) << "\n");
119}
120
121// We normally return a "Changed" bool, but the calling pass' run assumes
122// something will change - some profile will be added - so this won't add much
123// by returning false when applicable.
124void annotateIndirectCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
125 const auto FlatIndCalls = CtxProf.flattenVirtCalls();
126 for (auto &F : M) {
127 if (F.isDeclaration())
128 continue;
129 auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
130 if (FlatProfIter == FlatIndCalls.end())
131 continue;
132 const auto &FlatProf = FlatProfIter->second;
133 for (auto &BB : F) {
134 for (auto &I : BB) {
135 auto *CB = dyn_cast<CallBase>(&I);
136 if (!CB || !CB->isIndirectCall())
137 continue;
139 annotateIndirectCall(M, *CB, FlatProf, *Ins);
140 }
141 }
142 }
143}
144
145} // namespace
146
149 // Ensure in all cases the instrumentation is removed: if this module had no
150 // roots, the contextual profile would evaluate to false, but there would
151 // still be instrumentation.
152 // Note: in such cases we leave as-is any other profile info (if present -
153 // e.g. synthetic weights, etc) because it wouldn't interfere with the
154 // contextual - based one (which would be in other modules)
155 auto OnExit = llvm::make_scope_exit([&]() {
156 if (IsPreThinlink)
157 return;
158 for (auto &F : M)
159 removeInstrumentation(F);
160 });
161 auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
162 // post-thinlink, we only reprocess for the module(s) containing the
163 // contextual tree. For everything else, OnExit will just clean the
164 // instrumentation.
165 if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
167
168 if (IsPreThinlink)
169 annotateIndirectCalls(M, CtxProf);
170 const auto FlattenedProfile = CtxProf.flatten();
171
172 for (auto &F : M) {
173 if (F.isDeclaration())
174 continue;
175
176 assert(areAllBBsReachable(
178 .getManager()) &&
179 "Function has unreacheable basic blocks. The expectation was that "
180 "DCE was run before.");
181
182 auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F));
183 // If this function didn't appear in the contextual profile, it's cold.
184 if (It == FlattenedProfile.end())
185 clearColdFunctionProfile(F);
186 else
187 assignProfileData(F, It->second);
188 }
190 // use here the flat profiles just so the importer doesn't complain about
191 // how different the PSIs are between the module with the roots and the
192 // various modules it imports.
193 for (auto &C : FlattenedProfile) {
194 PB.addEntryCount(C.second[0]);
195 for (auto V : llvm::drop_begin(C.second))
196 PB.addInternalCount(V);
197 }
198
199 M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
203 MAM.invalidate(M, PA);
204 auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
205 PSI.refresh(PB.getSummary());
207}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
#define LLVM_DEBUG(...)
Definition Debug.h:114
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
static LLVM_ABI uint64_t getGUID(const Function &F)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
PGOContextualProfile Result
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
This represents the llvm.instrprof.callsite intrinsic.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVM_ABI const CtxProfFlatIndirectCallProfile flattenVirtCalls() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
PreservedAnalyses & abandon()
Mark an analysis as abandoned.
Definition Analysis.h:171
LLVM_ABI bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, uint64_t &FalseCount) const
LLVM_ABI bool getOutgoingBranchWeights(BasicBlock &BB, SmallVectorImpl< uint64_t > &Profile, uint64_t &MaxCount) const
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static LLVM_ABI const ArrayRef< uint32_t > DefaultCutoffs
A vector of useful cutoff values for detailed summary.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Pass manager infrastructure for declaring and invalidating analyses.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39