44class ProfileAnnotator final {
49 std::optional<uint64_t> Count;
51 explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
55 std::optional<uint64_t> Count;
63 size_t UnknownCountOutEdges = 0;
64 size_t UnknownCountInEdges = 0;
71 bool AssumeAllKnown)
const {
72 std::optional<uint64_t> Sum;
73 for (
const auto *E : Edges) {
78 *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
85 assert(!Count.has_value());
86 Count = getEdgeSum(Edges,
true);
87 return Count.has_value();
91 uint64_t KnownSum = getEdgeSum(Edges,
false).value_or(0U);
92 uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0
U;
93 EdgeInfo *E =
nullptr;
95 if (
I && !
I->Count.has_value()) {
101 "Expected exactly one edge to have an unknown count, "
102 "found a second one");
106 assert(E &&
"Expected exactly one edge to have an unknown count");
107 assert(!E->Count.has_value());
109 assert(E->Src->UnknownCountOutEdges > 0);
110 assert(E->Dest->UnknownCountInEdges > 0);
111 --E->Src->UnknownCountOutEdges;
112 --E->Dest->UnknownCountInEdges;
116 BBInfo(
size_t NumInEdges,
size_t NumOutEdges, std::optional<uint64_t> Count)
124 OutEdges.
resize(NumOutEdges);
127 bool tryTakeCountFromKnownOutEdges(
const BasicBlock &BB) {
128 if (!UnknownCountOutEdges) {
129 return computeCountFrom(OutEdges);
134 bool tryTakeCountFromKnownInEdges(
const BasicBlock &BB) {
135 if (!UnknownCountInEdges) {
136 return computeCountFrom(InEdges);
141 void addInEdge(EdgeInfo &Info) {
143 ++UnknownCountInEdges;
150 void addOutEdge(
size_t Index, EdgeInfo &Info) {
152 ++UnknownCountOutEdges;
155 bool hasCount()
const {
return Count.has_value(); }
157 uint64_t getCount()
const {
return *Count; }
159 bool trySetSingleUnknownInEdgeCount() {
160 if (UnknownCountInEdges == 1) {
161 setSingleUnknownEdgeCount(InEdges);
167 bool trySetSingleUnknownOutEdgeCount() {
168 if (UnknownCountOutEdges == 1) {
169 setSingleUnknownEdgeCount(OutEdges);
174 size_t getNumOutEdges()
const {
return OutEdges.
size(); }
176 uint64_t getEdgeCount(
size_t Index)
const {
177 if (
auto *E = OutEdges[Index])
186 std::map<const BasicBlock *, BBInfo> BBInfos;
187 std::vector<EdgeInfo> EdgeInfos;
193 bool KeepGoing =
true;
196 for (
const auto &BB : F) {
197 auto &
Info = getBBInfo(BB);
198 if (!
Info.hasCount())
199 KeepGoing |=
Info.tryTakeCountFromKnownOutEdges(BB) ||
200 Info.tryTakeCountFromKnownInEdges(BB);
201 if (
Info.hasCount()) {
202 KeepGoing |=
Info.trySetSingleUnknownOutEdgeCount();
203 KeepGoing |=
Info.trySetSingleUnknownInEdgeCount();
214 BBInfo &getBBInfo(
const BasicBlock &BB) {
return BBInfos.find(&BB)->second; }
216 const BBInfo &getBBInfo(
const BasicBlock &BB)
const {
217 return BBInfos.find(&BB)->second;
222 bool allCountersAreAssigned()
const {
223 for (
const auto &BBInfo : BBInfos)
224 if (!BBInfo.second.hasCount())
226 for (
const auto &EdgeInfo : EdgeInfos)
227 if (!EdgeInfo.Count.has_value())
234 bool allTakenPathsExit()
const {
235 std::deque<const BasicBlock *> Worklist;
237 Worklist.push_back(&
F.getEntryBlock());
238 bool HitExit =
false;
239 while (!Worklist.empty()) {
240 const auto *BB = Worklist.
front();
241 Worklist.pop_front();
242 if (!Visited.
insert(BB).second)
254 const auto &BBInfo = getBBInfo(*BB);
255 bool HasAWayOut =
false;
258 if (!shouldExcludeEdge(*BB, *Succ)) {
259 if (BBInfo.getEdgeCount(
I) > 0) {
261 Worklist.push_back(Succ);
271 bool allNonColdSelectsHaveProfile()
const {
272 for (
const auto &BB : F) {
273 if (getBBInfo(BB).getCount() > 0) {
274 for (
const auto &
I : BB) {
275 if (
const auto *SI = dyn_cast<SelectInst>(&
I)) {
276 if (!
SI->getMetadata(LLVMContext::MD_prof)) {
293 for (
const auto &BB : F) {
294 std::optional<uint64_t> Count;
297 auto Index =
Ins->getIndex()->getZExtValue();
299 "The index must be inside the counters vector by construction - "
300 "tripping this assertion indicates a bug in how the contextual "
301 "profile is managed by IPO transforms");
304 }
else if (isa<UnreachableInst>(BB.getTerminator())) {
311 assert(Ins &&
"We iterate through the function's BBs, no reason to "
312 "insert one more than once");
314 return !shouldExcludeEdge(BB, *Succ);
318 EdgeInfos.reserve(NrEdges);
319 for (
const auto &BB : F) {
320 auto &
Info = getBBInfo(BB);
321 for (
auto I = 0U;
I < BB.getTerminator()->getNumSuccessors(); ++
I) {
322 const auto *Succ = BB.getTerminator()->getSuccessor(
I);
323 if (!shouldExcludeEdge(BB, *Succ)) {
324 auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
325 Info.addOutEdge(
I, EI);
326 getBBInfo(*Succ).addInEdge(EI);
330 assert(EdgeInfos.capacity() == NrEdges &&
331 "The capacity of EdgeInfos should have stayed unchanged it was "
332 "populated, because we need pointers to its contents to be stable");
335 void setProfileForSelectInstructions(
BasicBlock &BB,
const BBInfo &BBInfo) {
336 if (BBInfo.getCount() == 0)
340 if (
auto *SI = dyn_cast<SelectInst>(&
I)) {
342 auto Index = Step->getIndex()->getZExtValue();
344 "The index of the step instruction must be inside the "
345 "counters vector by "
346 "construction - tripping this assertion indicates a bug in "
347 "how the contextual profile is managed by IPO transforms");
348 auto TotalCount = BBInfo.getCount();
351 (TotalCount > TrueCount ? TotalCount - TrueCount : 0
U);
353 std::max(TrueCount, FalseCount));
354 PB.addInternalCount(TrueCount);
355 PB.addInternalCount(FalseCount);
363 void assignProfileData() {
365 propagateCounterValues(Counters);
366 F.setEntryCount(Counters[0]);
367 PB.addEntryCount(Counters[0]);
370 const auto &BBInfo = getBBInfo(BB);
371 setProfileForSelectInstructions(BB, BBInfo);
374 auto *
Term = BB.getTerminator();
378 for (
unsigned SuccIdx = 0,
Size = BBInfo.getNumOutEdges(); SuccIdx <
Size;
380 uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
381 if (EdgeCount > MaxCount)
382 MaxCount = EdgeCount;
383 EdgeCounts[SuccIdx] = EdgeCount;
384 PB.addInternalCount(EdgeCount);
390 assert(allCountersAreAssigned() &&
391 "[ctx-prof] Expected all counters have been assigned.");
392 assert(allTakenPathsExit() &&
393 "[ctx-prof] Encountered a BB with more than one successor, where "
394 "all outgoing edges have a 0 count. This occurs in non-exiting "
395 "functions (message pumps, usually) which are not supported in the "
396 "contextual profiling case");
397 assert(allNonColdSelectsHaveProfile() &&
398 "[ctx-prof] All non-cold select instructions were expected to have "
403[[maybe_unused]]
bool areAllBBsReachable(
const Function &
F,
410void clearColdFunctionProfile(
Function &
F) {
419 if (isa<InstrProfCntrInstBase>(
I))
435 removeInstrumentation(
F);
441 const auto FlattenedProfile = CtxProf.flatten();
445 if (
F.isDeclaration())
448 assert(areAllBBsReachable(
451 "Function has unreacheable basic blocks. The expectation was that "
452 "DCE was run before.");
456 if (It == FlattenedProfile.end())
457 clearColdFunctionProfile(
F);
459 ProfileAnnotator S(
F, It->second,
PB);
460 S.assignProfileData();
466 M.setProfileSummary(
PB.getSummary()->getMD(M.getContext()),
Analysis containing CSE Info
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static uint64_t getGUID(const Function &F)
LLVM Basic Block Representation.
const Instruction & front() const
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A Module instance is used to store all the information related to an LLVM module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static const ArrayRef< uint32_t > DefaultCutoffs
A vector of useful cutoff values for detailed summary.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > insert(const ValueT &V)
Pass manager infrastructure for declaring and invalidating analyses.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto pred_size(const MachineBasicBlock *BB)
auto succ_size(const MachineBasicBlock *BB)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)