LLVM  14.0.0git
SyntheticCountsPropagation.cpp
Go to the documentation of this file.
1 //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a transformation that synthesizes entry counts for
10 // functions and attaches !prof metadata to functions with the synthesized
11 // counts. The presence of !prof metadata with counter name set to
12 // 'synthesized_function_entry_count' indicate that the value of the counter is
13 // an estimation of the likely execution count of the function. This transform
14 // is applied only in non PGO mode as functions get 'real' profile-based
15 // function entry counts in the PGO mode.
16 //
17 // The transformation works by first assigning some initial values to the entry
18 // counts of all functions and then doing a top-down traversal of the
19 // callgraph-scc to propagate the counts. For each function the set of callsites
20 // and their relative block frequency is gathered. The relative block frequency
21 // multiplied by the entry count of the caller and added to the callee's entry
22 // count. For non-trivial SCCs, the new counts are computed from the previous
23 // counts and updated in one shot.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/DenseSet.h"
29 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/Module.h"
38 #include "llvm/Support/Debug.h"
40 
41 using namespace llvm;
44 
45 #define DEBUG_TYPE "synthetic-counts-propagation"
46 
47 namespace llvm {
49  InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
51  cl::desc("Initial value of synthetic entry count"));
52 } // namespace llvm
53 
54 /// Initial synthetic count assigned to inline functions.
56  "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
57  cl::desc("Initial synthetic entry count for inline functions."));
58 
59 /// Initial synthetic count assigned to cold functions.
61  "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
62  cl::desc("Initial synthetic entry count for cold functions."));
63 
64 // Assign initial synthetic entry counts to functions.
65 static void
67  auto MayHaveIndirectCalls = [](Function &F) {
68  for (auto *U : F.users()) {
69  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
70  return true;
71  }
72  return false;
73  };
74 
75  for (Function &F : M) {
76  uint64_t InitialCount = InitialSyntheticCount;
77  if (F.isDeclaration())
78  continue;
79  if (F.hasFnAttribute(Attribute::AlwaysInline) ||
80  F.hasFnAttribute(Attribute::InlineHint)) {
81  // Use a higher value for inline functions to account for the fact that
82  // these are usually beneficial to inline.
83  InitialCount = InlineSyntheticCount;
84  } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
85  // Local functions without inline hints get counts only through
86  // propagation.
87  InitialCount = 0;
88  } else if (F.hasFnAttribute(Attribute::Cold) ||
89  F.hasFnAttribute(Attribute::NoInline)) {
90  // Use a lower value for noinline and cold functions.
91  InitialCount = ColdSyntheticCount;
92  }
93  SetCount(&F, InitialCount);
94  }
95 }
96 
102  // Set initial entry counts.
104  M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
105 
106  // Edge includes information about the source. Hence ignore the first
107  // parameter.
108  auto GetCallSiteProfCount = [&](const CallGraphNode *,
109  const CallGraphNode::CallRecord &Edge) {
110  Optional<Scaled64> Res = None;
111  if (!Edge.first)
112  return Res;
113  CallBase &CB = *cast<CallBase>(*Edge.first);
114  Function *Caller = CB.getCaller();
115  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
116 
117  // Now compute the callsite count from relative frequency and
118  // entry count:
119  BasicBlock *CSBB = CB.getParent();
120  Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
121  Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
122  BBCount /= EntryFreq;
123  BBCount *= Counts[Caller];
124  return Optional<Scaled64>(BBCount);
125  };
126 
127  CallGraph CG(M);
128  // Propgate the entry counts on the callgraph.
130  &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
131  auto F = N->getFunction();
132  if (!F || F->isDeclaration())
133  return;
134 
135  Counts[F] += New;
136  });
137 
138  // Set the counts as metadata.
139  for (auto Entry : Counts) {
140  Entry.first->setEntryCount(ProfileCount(
141  Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic));
142  }
143 
144  return PreservedAnalyses::all();
145 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::Function
Definition: Function.h:62
llvm::CallGraphNode::CallRecord
std::pair< Optional< WeakTrackingVH >, CallGraphNode * > CallRecord
A pair of the calling instruction (a call or invoke) and the call graph node being called.
Definition: CallGraph.h:179
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::SyntheticCountsPropagation::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition: SyntheticCountsPropagation.cpp:97
Module.h
llvm::Optional
Definition: APInt.h:33
llvm::SyntheticCountsUtils::propagate
static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount, AddCountTy AddCount)
Propgate synthetic entry counts on a callgraph CG.
Definition: SyntheticCountsUtils.cpp:86
STLExtras.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
CommandLine.h
llvm::Function::PCT_Synthetic
@ PCT_Synthetic
Definition: Function.h:250
ColdSyntheticCount
static cl::opt< int > ColdSyntheticCount("cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for cold functions."))
Initial synthetic count assigned to cold functions.
MAM
ModuleAnalysisManager MAM
Definition: PassBuilderBindings.cpp:61
DenseSet.h
llvm::InitialSyntheticCount
cl::opt< int > InitialSyntheticCount
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::None
const NoneType None
Definition: None.h:23
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::ProfileCount
Function::ProfileCount ProfileCount
Definition: SampleProfileLoaderBaseImpl.h:46
ProfileCount
Function::ProfileCount ProfileCount
Definition: SyntheticCountsPropagation.cpp:43
uint64_t
ProfileSummaryInfo.h
InlineSyntheticCount
static cl::opt< int > InlineSyntheticCount("inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for inline functions."))
Initial synthetic count assigned to inline functions.
llvm::DenseMap
Definition: DenseMap.h:714
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
SyntheticCountsPropagation.h
SyntheticCountsUtils.h
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:431
llvm::ScaledNumber< uint64_t >
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Function.h
Scaled64
ScaledNumber< uint64_t > Scaled64
Definition: SyntheticCountsPropagation.cpp:42
CallGraph.h
Instructions.h
initializeCounts
static void initializeCounts(Module &M, function_ref< void(Function *, uint64_t)> SetCount)
Definition: SyntheticCountsPropagation.cpp:66
N
#define N
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:255
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
Debug.h