15#include "llvm/Config/llvm-config.h"
25#include <system_error>
28using namespace sampleprof;
32 cl::desc(
"Cutoff value about how many symbols in profile symbol list "
33 "will be used. This is very useful for performance debugging"));
36 "generate-merged-base-profiles",
37 cl::desc(
"When generating nested context-sensitive profiles, always "
38 "generate extra base profile for function with all its context "
39 "profiles merged into it."));
57class SampleProfErrorCategoryType :
public std::error_category {
58 const char *
name()
const noexcept
override {
return "llvm.sampleprof"; }
60 std::string message(
int IE)
const override {
63 case sampleprof_error::success:
65 case sampleprof_error::bad_magic:
66 return "Invalid sample profile data (bad magic)";
67 case sampleprof_error::unsupported_version:
68 return "Unsupported sample profile format version";
69 case sampleprof_error::too_large:
70 return "Too much profile data";
71 case sampleprof_error::truncated:
72 return "Truncated profile data";
73 case sampleprof_error::malformed:
74 return "Malformed sample profile data";
75 case sampleprof_error::unrecognized_format:
76 return "Unrecognized sample profile encoding format";
77 case sampleprof_error::unsupported_writing_format:
78 return "Profile encoding format unsupported for writing operations";
79 case sampleprof_error::truncated_name_table:
80 return "Truncated function name table";
81 case sampleprof_error::not_implemented:
82 return "Unimplemented feature";
83 case sampleprof_error::counter_overflow:
84 return "Counter overflow";
85 case sampleprof_error::ostream_seek_unsupported:
86 return "Ostream does not support seek";
87 case sampleprof_error::uncompress_failed:
88 return "Uncompress failure";
89 case sampleprof_error::zlib_unavailable:
90 return "Zlib is unavailable";
91 case sampleprof_error::hash_mismatch:
92 return "Function hash mismatch";
101 static SampleProfErrorCategoryType ErrorCategory;
102 return ErrorCategory;
123 for (
const auto &
I :
Other.getCallTargets()) {
129#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
139 OS <<
" " <<
I.first <<
":" <<
I.second;
144#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
159 OS << TotalSamples <<
", " << TotalHeadSamples <<
", " << BodySamples.size()
160 <<
" sampled lines\n";
163 if (!BodySamples.empty()) {
164 OS <<
"Samples collected in the function's body {\n";
166 for (
const auto &
SI : SortedBodySamples.
get()) {
168 OS <<
SI->first <<
": " <<
SI->second;
173 OS <<
"No samples collected in the function's body\n";
177 if (!CallsiteSamples.empty()) {
178 OS <<
"Samples collected in inlined callsites {\n";
181 for (
const auto &CS : SortedCallsiteSamples.
get()) {
182 for (
const auto &FS : CS->second) {
184 OS << CS->first <<
": inlined callee: " << FS.second.getName() <<
": ";
185 FS.second.print(
OS, Indent + 4);
191 OS <<
"No inlined callsites in this function\n";
203 std::vector<NameFunctionSamples> &SortedProfiles) {
204 for (
const auto &
I : ProfileMap) {
205 assert(
I.first ==
I.second.getContext() &&
"Inconsistent profile map");
206 SortedProfiles.push_back(std::make_pair(
I.second.getContext(), &
I.second));
210 if (
A.second->getTotalSamples() ==
B.second->getTotalSamples())
211 return A.first <
B.first;
212 return A.second->getTotalSamples() >
B.second->getTotalSamples();
217 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
229 DIL->getDiscriminator()),
232 unsigned Discriminator =
240 uint64_t NameHash = std::hash<std::string>{}(CalleeName.
str());
243 return NameHash + (LocId << 5) + LocId;
252 for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
254 StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
256 Name = PrevDIL->getScope()->getSubprogram()->getName();
266 for (
int i = S.
size() - 1; i >= 0 && FS !=
nullptr; i--) {
267 FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper);
274 for (
const auto &BS : BodySamples)
275 for (
const auto &TS : BS.second.getCallTargets())
276 NameSet.
insert(TS.getKey());
278 for (
const auto &CS : CallsiteSamples) {
279 for (
const auto &NameFS : CS.second) {
280 NameSet.
insert(NameFS.first);
281 NameFS.second.findAllNames(NameSet);
291 std::string CalleeGUID;
295 if (iter == CallsiteSamples.end())
297 auto FS = iter->second.find(CalleeName);
298 if (FS != iter->second.end())
302 auto FS = iter->second.find(*NameInProfile);
303 if (FS != iter->second.end())
310 if (!CalleeName.
empty())
314 for (
const auto &NameFS : iter->second)
315 if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
316 MaxTotalSamples = NameFS.second.getTotalSamples();
322#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
328 const char *ListStart =
reinterpret_cast<const char *
>(
Data);
334 Size += Str.size() + 1;
344 uint32_t ColdContextFrameLength,
bool TrimBaseProfileOnly) {
345 if (!TrimColdContext && !MergeColdContext)
355 if (MergeColdContext)
356 TrimBaseProfileOnly =
false;
360 std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
361 for (
const auto &
I : ProfileMap) {
365 (!TrimBaseProfileOnly ||
Context.isBaseContext()))
366 ColdProfiles.emplace_back(
Context, &
I.second);
372 for (
const auto &
I : ColdProfiles) {
373 if (MergeColdContext) {
379 MergedProfile.
merge(*
I.second);
381 ProfileMap.erase(
I.first);
385 for (
const auto &
I : MergedProfileMap) {
388 ProfileMap.find(
I.first) == ProfileMap.end())
399 OrigProfile.
merge(
I.second);
404 std::vector<SampleContext> ProfilesToBeRemoved;
406 for (
auto &
I : ProfileMap) {
424 auto Ret = ProfilesToBeAdded.emplace(
Context, FProfile);
426 assert(Ret.second &&
"Context conflict during canonicalization");
427 ProfilesToBeRemoved.push_back(
I.first);
430 for (
auto &
I : ProfilesToBeRemoved) {
434 for (
auto &
I : ProfilesToBeAdded) {
435 ProfileMap.emplace(
I.first,
I.second);
442 std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
445 std::string OutputString;
446 for (
auto &
Sym : SortedList) {
447 OutputString.append(
Sym.str());
448 OutputString.append(1,
'\0');
456 OS <<
"======== Dump profile symbol list ========\n";
457 std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
460 for (
auto &
Sym : SortedList)
470 assert(It->second.FuncName == CalleeName &&
471 "Hash collision for child context node");
480 : ProfileMap(Profiles) {
481 for (
auto &FuncSample : Profiles) {
483 auto *NewNode = getOrCreateContextPath(FSamples->
getContext());
484 assert(!NewNode->FuncSamples &&
"New node cannot have sample profile");
485 NewNode->FuncSamples = FSamples;
490ProfileConverter::getOrCreateContextPath(
const SampleContext &Context) {
491 auto Node = &RootFrame;
493 for (
auto &Callsite :
Context.getContextFrames()) {
494 Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.FuncName);
495 CallSiteLoc = Callsite.Location;
504 auto *NodeProfile =
Node.FuncSamples;
505 for (
auto &It :
Node.AllChildFrames) {
506 auto &ChildNode = It.second;
508 auto *ChildProfile = ChildNode.FuncSamples;
516 auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
517 SamplesMap.emplace(OrigChildContext.
getName().
str(), *ChildProfile);
518 NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
521 auto Count = NodeProfile->removeCalledTargetAndBodySample(
522 ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,
524 NodeProfile->removeTotalSamples(Count);
534 ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
536 ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
537 auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
538 SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
543 ProfileMap.erase(OrigChildContext);
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > GenerateMergedBaseProfiles("generate-merged-base-profiles", cl::desc("When generating nested context-sensitive profiles, always " "generate extra base profile for function with all its context " "profiles merged into it."))
static cl::opt< uint64_t > ProfileSymbolListCutOff("profile-symbol-list-cutoff", cl::Hidden, cl::init(-1), cl::desc("Cutoff value about how many symbols in profile symbol list " "will be used. This is very useful for performance debugging"))
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
Implements a dense probed hash-table based set.
reference emplace_back(ArgTypes &&... Args)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Representation of the samples collected for a function.
static bool ProfileIsPreInlined
const LineLocation & mapIRLocToProfileLoc(const LineLocation &IRLoc) const
uint64_t getFunctionHash() const
void findAllNames(DenseSet< StringRef > &NameSet) const
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, SampleProfileReaderItaniumRemapper *Remapper) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName.
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static uint64_t getCallSiteHash(StringRef CalleeName, const LineLocation &Callsite)
Returns a unique hash code for a combination of a callsite location and the callee function name.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
SampleContext & getContext() const
static bool HasUniqSuffix
Whether the profile contains any ".__uniq." suffix in a name.
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
void print(raw_ostream &OS=dbgs(), unsigned Indent=0) const
Print the samples collected for a function on stream OS.
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
void setContext(const SampleContext &FContext)
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
const FunctionSamples * findFunctionSamples(const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper=nullptr) const
Get the FunctionSamples of the inline instance where DIL originates from.
StringRef getName() const
Return the function name.
static bool UseMD5
Whether the profile uses MD5 to represent string.
ProfileConverter(SampleProfileMap &Profiles)
std::error_code write(raw_ostream &OS)
void dump(raw_ostream &OS=dbgs()) const
void add(StringRef Name, bool copy=false)
copy indicates whether we need to copy the underlying memory for the input Name.
std::error_code read(const uint8_t *Data, uint64_t ListSize)
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext, uint32_t ColdContextFrameLength, bool TrimBaseProfileOnly)
void canonicalizeContextProfiles()
StringRef getName() const
void setName(StringRef FunctionName)
Set the name of the function and clear the current context.
SampleProfileReaderItaniumRemapper remaps the profile data from a sample profile data reader,...
std::optional< StringRef > lookUpNameInProfile(StringRef FunctionName)
Return the equivalent name in the profile for FunctionName if it exists.
Representation of a single sample record.
bool hasCalls() const
Return true if this sample record contains function calls.
sampleprof_error merge(const SampleRecord &Other, uint64_t Weight=1)
Merge the samples in Other into this record.
sampleprof_error addSamples(uint64_t S, uint64_t Weight=1)
Increment the number of samples for this record by S.
sampleprof_error addCalledTarget(StringRef F, uint64_t S, uint64_t Weight=1)
Add called function F with samples S.
const SortedCallTargetSet getSortedCallTargets() const
void print(raw_ostream &OS, unsigned Indent) const
Print the sample record to the stream OS indented by Indent.
Sort a LocationT->SampleT map by LocationT.
const SamplesWithLocList & get() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
void sortFuncProfiles(const SampleProfileMap &ProfileMap, std::vector< NameFunctionSamples > &SortedProfiles)
std::unordered_map< SampleContext, FunctionSamples, SampleContext::Hash > SampleProfileMap
std::pair< SampleContext, const FunctionSamples * > NameFunctionSamples
raw_ostream & operator<<(raw_ostream &OS, const LineLocation &Loc)
@ ContextDuplicatedIntoBase
static StringRef getRepInFormat(StringRef Name, bool UseMD5, std::string &GUIDBuf)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const std::error_category & sampleprof_category()
sampleprof_error MergeResult(sampleprof_error &Accumulator, sampleprof_error Result)
static uint32_t extractProbeIndex(uint32_t Value)
Represents the relative location of an instruction.
void print(raw_ostream &OS) const
FrameNode * getOrCreateChildFrame(const LineLocation &CallSite, StringRef CalleeName)
std::map< uint64_t, FrameNode > AllChildFrames