Go to the documentation of this file.
15 #include "llvm/Config/llvm-config.h"
26 #include <system_error>
29 using namespace sampleprof;
33 cl::desc(
"Cutoff value about how many symbols in profile symbol list "
34 "will be used. This is very useful for performance debugging"));
37 "generate-merged-base-profiles",
38 cl::desc(
"When generating nested context-sensitive profiles, always "
39 "generate extra base profile for function with all its context "
40 "profiles merged into it."));
43 namespace sampleprof {
59 const char *
name()
const noexcept
override {
return "llvm.sampleprof"; }
61 std::string
message(
int IE)
const override {
67 return "Invalid sample profile data (bad magic)";
69 return "Unsupported sample profile format version";
71 return "Too much profile data";
73 return "Truncated profile data";
75 return "Malformed sample profile data";
77 return "Unrecognized sample profile encoding format";
79 return "Profile encoding format unsupported for writing operations";
81 return "Truncated function name table";
83 return "Unimplemented feature";
85 return "Counter overflow";
87 return "Ostream does not support seek";
89 return "Uncompress failure";
91 return "Zlib is unavailable";
93 return "Function hash mismatch";
125 for (
const auto &
I :
Other.getCallTargets()) {
131 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
141 OS <<
" " <<
I.first <<
":" <<
I.second;
146 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
161 OS << TotalSamples <<
", " << TotalHeadSamples <<
", " << BodySamples.size()
162 <<
" sampled lines\n";
165 if (!BodySamples.empty()) {
166 OS <<
"Samples collected in the function's body {\n";
168 for (
const auto &
SI : SortedBodySamples.
get()) {
170 OS <<
SI->first <<
": " <<
SI->second;
175 OS <<
"No samples collected in the function's body\n";
179 if (!CallsiteSamples.empty()) {
180 OS <<
"Samples collected in inlined callsites {\n";
183 for (
const auto &CS : SortedCallsiteSamples.
get()) {
184 for (
const auto &
FS : CS->second) {
186 OS << CS->first <<
": inlined callee: " <<
FS.second.getName() <<
": ";
187 FS.second.print(OS, Indent + 4);
193 OS <<
"No inlined callsites in this function\n";
205 std::vector<NameFunctionSamples> &SortedProfiles) {
206 for (
const auto &
I : ProfileMap) {
207 assert(
I.first ==
I.second.getContext() &&
"Inconsistent profile map");
208 SortedProfiles.push_back(std::make_pair(
I.second.getContext(), &
I.second));
212 if (A.second->getTotalSamples() ==
B.second->getTotalSamples())
213 return A.first <
B.first;
214 return A.second->getTotalSamples() >
B.second->getTotalSamples();
219 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
231 DIL->getDiscriminator()),
234 unsigned Discriminator =
242 uint64_t NameHash = std::hash<std::string>{}(CalleeName.
str());
245 return NameHash + (LocId << 5) + LocId;
254 for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
256 StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
258 Name = PrevDIL->getScope()->getSubprogram()->getName();
268 for (
int i =
S.size() - 1;
i >= 0 &&
FS !=
nullptr;
i--) {
269 FS =
FS->findFunctionSamplesAt(
S[
i].first,
S[
i].second, Remapper);
276 for (
const auto &BS : BodySamples)
277 for (
const auto &TS : BS.second.getCallTargets())
278 NameSet.
insert(TS.getKey());
280 for (
const auto &CS : CallsiteSamples) {
281 for (
const auto &NameFS : CS.second) {
282 NameSet.
insert(NameFS.first);
283 NameFS.second.findAllNames(NameSet);
293 std::string CalleeGUID;
296 auto iter = CallsiteSamples.find(Loc);
297 if (iter == CallsiteSamples.end())
299 auto FS = iter->second.find(CalleeName);
300 if (
FS != iter->second.end())
304 auto FS = iter->second.find(*NameInProfile);
305 if (
FS != iter->second.end())
312 if (!CalleeName.
empty())
316 for (
const auto &NameFS : iter->second)
317 if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
318 MaxTotalSamples = NameFS.second.getTotalSamples();
324 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
330 const char *ListStart =
reinterpret_cast<const char *
>(
Data);
336 Size += Str.size() + 1;
346 uint32_t ColdContextFrameLength,
bool TrimBaseProfileOnly) {
347 if (!TrimColdContext && !MergeColdContext)
357 if (MergeColdContext)
358 TrimBaseProfileOnly =
false;
362 std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
363 for (
const auto &
I : ProfileMap) {
367 (!TrimBaseProfileOnly ||
Context.isBaseContext()))
368 ColdProfiles.emplace_back(
Context, &
I.second);
374 for (
const auto &
I : ColdProfiles) {
375 if (MergeColdContext) {
381 MergedProfile.
merge(*
I.second);
383 ProfileMap.erase(
I.first);
387 for (
const auto &
I : MergedProfileMap) {
390 ProfileMap.find(
I.first) == ProfileMap.end())
401 OrigProfile.
merge(
I.second);
406 std::vector<SampleContext> ProfilesToBeRemoved;
408 for (
auto &
I : ProfileMap) {
426 auto Ret = ProfilesToBeAdded.emplace(
Context, FProfile);
428 assert(
Ret.second &&
"Context conflict during canonicalization");
429 ProfilesToBeRemoved.push_back(
I.first);
432 for (
auto &
I : ProfilesToBeRemoved) {
436 for (
auto &
I : ProfilesToBeAdded) {
437 ProfileMap.emplace(
I.first,
I.second);
444 std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
447 std::string OutputString;
448 for (
auto &Sym : SortedList) {
449 OutputString.append(Sym.str());
450 OutputString.append(1,
'\0');
458 OS <<
"======== Dump profile symbol list ========\n";
459 std::vector<StringRef> SortedList(Syms.begin(), Syms.end());
462 for (
auto &Sym : SortedList)
472 assert(It->second.FuncName == CalleeName &&
473 "Hash collision for child context node");
482 : ProfileMap(Profiles) {
483 for (
auto &FuncSample : Profiles) {
485 auto *NewNode = getOrCreateContextPath(FSamples->
getContext());
486 assert(!NewNode->FuncSamples &&
"New node cannot have sample profile");
487 NewNode->FuncSamples = FSamples;
493 auto Node = &RootFrame;
495 for (
auto &Callsite :
Context.getContextFrames()) {
496 Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.FuncName);
497 CallSiteLoc = Callsite.Location;
506 auto *NodeProfile = Node.FuncSamples;
507 for (
auto &It : Node.AllChildFrames) {
508 auto &ChildNode = It.second;
510 auto *ChildProfile = ChildNode.FuncSamples;
518 auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
519 SamplesMap.emplace(OrigChildContext.
getName().
str(), *ChildProfile);
520 NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
523 auto Count = NodeProfile->removeCalledTargetAndBodySample(
524 ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,
526 NodeProfile->removeTotalSamples(Count);
536 ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
538 ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
539 auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
540 SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
545 ProfileMap.erase(OrigChildContext);
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This is an optimization pass for GlobalISel generic memory operations.
const SamplesWithLocList & get() const
@ ContextDuplicatedIntoBase
static bool ProfileIsProbeBased
static ManagedStatic< SampleProfErrorCategoryType > ErrorCategory
bool hasCalls() const
Return true if this sample record contains function calls.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< SampleContext, const FunctionSamples * > NameFunctionSamples
std::unordered_map< SampleContext, FunctionSamples, SampleContext::Hash > SampleProfileMap
void findAllNames(DenseSet< StringRef > &NameSet) const
StringRef getName() const
Return the function name.
std::pair< iterator, bool > insert(const ValueT &V)
FrameNode * getOrCreateChildFrame(const LineLocation &CallSite, StringRef CalleeName)
cl::opt< bool > GenerateMergedBaseProfiles("generate-merged-base-profiles", cl::desc("When generating nested context-sensitive profiles, always " "generate extra base profile for function with all its context " "profiles merged into it."))
static cl::opt< uint64_t > ProfileSymbolListCutOff("profile-symbol-list-cutoff", cl::Hidden, cl::init(-1), cl::desc("Cutoff value about how many symbols in profile symbol list " "will be used. This is very useful for performance debugging"))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool HasUniqSuffix
Whether the profile contains any ".__uniq." suffix in a name.
@ unsupported_writing_format
static StringRef getRepInFormat(StringRef Name, bool UseMD5, std::string &GUIDBuf)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ManagedStatic - This transparently changes the behavior of global statics to be lazily constructed on...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Optional< StringRef > lookUpNameInProfile(StringRef FunctionName)
Return the equivalent name in the profile for FunctionName if it exists.
const std::error_category & sampleprof_category()
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
This class implements an extremely fast bulk output stream that can only output to a stream.
std::error_code read(const uint8_t *Data, uint64_t ListSize)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
std::map< uint64_t, FrameNode > AllChildFrames
void sortFuncProfiles(const SampleProfileMap &ProfileMap, std::vector< NameFunctionSamples > &SortedProfiles)
static bool ProfileIsFS
If this profile uses flow sensitive discriminators.
Implements a dense probed hash-table based set.
void setName(StringRef FunctionName)
Set the name of the function and clear the current context.
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Representation of the samples collected for a function.
static uint64_t getCallSiteHash(StringRef CalleeName, const LineLocation &Callsite)
Returns a unique hash code for a combination of a callsite location and the callee function name.
initializer< Ty > init(const Ty &Val)
void setContext(const SampleContext &FContext)
uint64_t getFunctionHash() const
const SortedCallTargetSet getSortedCallTargets() const
message(STATUS "Targeting ${t}") add_subdirectory($
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StringRef getName() const
StandardInstrumentations SI(Debug, VerifyEach)
raw_ostream & operator<<(raw_ostream &OS, const LineLocation &Loc)
static bool UseMD5
Whether the profile uses MD5 to represent string.
static uint32_t extractProbeIndex(uint32_t Value)
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Represents the relative location of an instruction.
sampleprof_error MergeResult(sampleprof_error &Accumulator, sampleprof_error Result)
StringRef - Represent a constant reference to a string, i.e.
Sort a LocationT->SampleT map by LocationT.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void add(StringRef Name, bool copy=false)
copy indicates whether we need to copy the underlying memory for the input Name.
void print(raw_ostream &OS=dbgs(), unsigned Indent=0) const
Print the samples collected for a function on stream OS.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, SampleProfileReaderItaniumRemapper *Remapper) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName.
@ ostream_seek_unsupported
void stable_sort(R &&Range)
std::error_code write(raw_ostream &OS)
void print(raw_ostream &OS) const
void print(raw_ostream &OS, unsigned Indent) const
Print the sample record to the stream OS indented by Indent.
void sort(IteratorTy Start, IteratorTy End)
Representation of a single sample record.
FrameNode(StringRef FName=StringRef(), FunctionSamples *FSamples=nullptr, LineLocation CallLoc={0, 0})
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext, uint32_t ColdContextFrameLength, bool TrimBaseProfileOnly)
sampleprof_error addCalledTarget(StringRef F, uint64_t S, uint64_t Weight=1)
Add called function F with samples S.
SampleContext & getContext() const
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
void canonicalizeContextProfiles()
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
sampleprof_error merge(const SampleRecord &Other, uint64_t Weight=1)
Merge the samples in Other into this record.
SampleProfileReaderItaniumRemapper remaps the profile data from a sample profile data reader,...
void dump(raw_ostream &OS=dbgs()) const
static ManagedStatic< _object_error_category > error_category
sampleprof_error addSamples(uint64_t S, uint64_t Weight=1)
Increment the number of samples for this record by S.
CSProfileConverter(SampleProfileMap &Profiles)
Optional< std::vector< StOtherPiece > > Other
static LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
const FunctionSamples * findFunctionSamples(const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper=nullptr) const
Get the FunctionSamples of the inline instance where DIL originates from.
static bool ProfileIsPreInlined