41#define DEBUG_TYPE "memprof"
53 "memprof-match-hot-cold-new",
55 "Match allocation profiles onto existing hot/cold operator new calls"),
60 cl::desc(
"Print matching stats for each allocation "
61 "context in this module's profiles"),
66 cl::desc(
"Salvage stale MemProf profile"),
70 "memprof-attach-calleeguids",
72 "Attach calleeguids as value profile metadata for indirect calls."),
77 cl::desc(
"Min percent of cold bytes matched to hint allocation cold"));
81 cl::desc(
"If true, annotate the static data section prefix"));
84STATISTIC(NumOfMemProfMissing,
"Number of functions without memory profile.");
86 "Number of functions having mismatched memory profile hash.");
87STATISTIC(NumOfMemProfFunc,
"Number of functions having valid memory profile.");
89 "Number of alloc contexts in memory profile.");
91 "Number of callsites in memory profile.");
93 "Number of matched memory profile alloc contexts.");
95 "Number of matched memory profile allocs.");
97 "Number of matched memory profile callsites.");
99 "Number of global vars annotated with 'hot' section prefix.");
101 "Number of global vars annotated with 'unlikely' section prefix.");
103 "Number of global vars with unknown hotness (no section prefix).");
105 "Number of global vars with user-specified section (not annotated).");
110 I.setMetadata(LLVMContext::MD_callsite,
121 std::memcpy(&Id, Hash.data(),
sizeof(Hash));
133 for (
const auto &StackFrame :
AllocInfo->CallStack)
138 std::vector<ContextTotalSize> ContextSizeInfo;
140 auto TotalSize =
AllocInfo->Info.getTotalSize();
143 ContextSizeInfo.push_back({FullStackId, TotalSize});
155 return ProfileCallStack.
size() >= InlinedCallStack.
size() &&
158 return computeStackId(F) == StackId;
171 case LibFunc_ZnwmRKSt9nothrow_t:
172 case LibFunc_ZnwmSt11align_val_t:
173 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
175 case LibFunc_ZnamRKSt9nothrow_t:
176 case LibFunc_ZnamSt11align_val_t:
177 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
178 case LibFunc_size_returning_new:
179 case LibFunc_size_returning_new_aligned:
181 case LibFunc_Znwm12__hot_cold_t:
182 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
183 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
184 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
185 case LibFunc_Znam12__hot_cold_t:
186 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
187 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
188 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
189 case LibFunc_size_returning_new_hot_cold:
190 case LibFunc_size_returning_new_aligned_hot_cold:
208 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
213 if (
F.isDeclaration())
218 if (!isa<CallBase>(&
I) || isa<IntrinsicInst>(&
I))
221 auto *CB = dyn_cast<CallBase>(&
I);
222 auto *CalledFunction = CB->getCalledFunction();
224 if (!CalledFunction || CalledFunction->isIntrinsic())
227 StringRef CalleeName = CalledFunction->getName();
235 DIL = DIL->getInlinedAt()) {
236 StringRef CallerName = DIL->getSubprogramLinkageName();
238 "Be sure to enable -fdebug-info-for-profiling");
248 }
else if (!IsPresentInProfile(CalleeGUID)) {
260 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
261 CalleeName = CallerName;
269 for (
auto &[CallerGUID, CallList] : Calls) {
286 return CallsFromProfile.
contains(GUID);
290 for (
const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
291 auto It = CallsFromProfile.
find(CallerGUID);
292 if (It == CallsFromProfile.
end())
294 const auto &ProfileAnchors = It->second;
297 longestCommonSequence<LineLocation, GlobalValue::GUID>(
298 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
300 [[maybe_unused]]
bool Inserted =
301 UndriftMaps.
try_emplace(CallerGUID, std::move(Matchings)).second;
316 auto UndriftCallStack = [&](std::vector<Frame> &
CallStack) {
318 auto I = UndriftMaps.
find(
F.Function);
319 if (
I == UndriftMaps.
end())
322 if (J ==
I->second.end())
324 auto &NewLoc = J->second;
325 F.LineOffset = NewLoc.LineOffset;
326 F.Column = NewLoc.Column;
331 UndriftCallStack(AS.CallStack);
334 UndriftCallStack(CS.Frames);
343 if (
I.getMetadata(LLVMContext::MD_prof)) {
351 if (!ExistingVD.empty()) {
360 InstrProfValueData VD;
361 VD.Value = CalleeGUID;
368 TotalCount += VD.Count;
381 const std::set<const AllocationInfo *> &AllocInfoSet,
383 &FullStackIdToAllocMatchInfo) {
396 NumOfMemProfMatchedAllocContexts++;
401 TotalSize +=
AllocInfo->Info.getTotalSize();
403 TotalColdSize +=
AllocInfo->Info.getTotalSize();
408 FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
409 InlinedCallStack.
size())] = {
426 if (!AllocTrie.
empty()) {
427 NumOfMemProfMatchedAllocs++;
431 assert(MemprofMDAttached ==
I.hasMetadata(LLVMContext::MD_memprof));
432 if (MemprofMDAttached) {
473 const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
474 Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites) {
475 auto &Ctx = M.getContext();
481 NumOfMemProfMatchedCallSites++;
495 MatchedCallSites.insert(std::move(
CallStack));
506 &FullStackIdToAllocMatchInfo,
507 std::set<std::vector<uint64_t>> &MatchedCallSites,
510 auto &Ctx = M.getContext();
518 auto FuncName =
F.getName();
519 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
520 std::optional<memprof::MemProfRecord> MemProfRec;
521 auto Err =
MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
524 auto Err = IPE.
get();
525 bool SkipWarning =
false;
526 LLVM_DEBUG(
dbgs() <<
"Error in reading profile for Func " << FuncName
528 if (Err == instrprof_error::unknown_function) {
529 NumOfMemProfMissing++;
532 }
else if (Err == instrprof_error::hash_mismatch) {
533 NumOfMemProfMismatch++;
539 LLVM_DEBUG(
dbgs() <<
"hash mismatch (skip=" << SkipWarning <<
")");
545 std::string Msg = (IPE.
message() +
Twine(
" ") +
F.getName().str() +
546 Twine(
" Hash = ") + std::to_string(FuncGUID))
566 bool ProfileHasColumns =
false;
570 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
574 std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
576 for (
auto &AI : MemProfRec->AllocSites) {
577 NumOfMemProfAllocContextProfiles++;
582 LocHashToAllocInfo[StackId].insert(&AI);
583 ProfileHasColumns |= AI.CallStack[0].Column;
585 for (
auto &CS : MemProfRec->CallSites) {
586 NumOfMemProfCallSiteProfiles++;
590 for (
auto &StackFrame : CS.Frames) {
594 LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
596 ProfileHasColumns |= StackFrame.Column;
598 if (StackFrame.Function == FuncGUID)
601 assert(
Idx <= CS.Frames.size() && CS.Frames[
Idx - 1].Function == FuncGUID);
605 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
613 if (
I.isDebugOrPseudoInst())
617 auto *CI = dyn_cast<CallBase>(&
I);
620 auto *CalledFunction = CI->getCalledFunction();
621 if (CalledFunction && CalledFunction->isIntrinsic())
627 bool LeafFound =
false;
633 auto AllocInfoIter = LocHashToAllocInfo.
end();
634 auto CallSitesIter = LocHashToCallSites.end();
635 for (
const DILocation *DIL =
I.getDebugLoc(); DIL !=
nullptr;
636 DIL = DIL->getInlinedAt()) {
639 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
641 Name = DIL->getScope()->getSubprogram()->getName();
642 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(
Name);
644 ProfileHasColumns ? DIL->getColumn() : 0);
650 AllocInfoIter = LocHashToAllocInfo.find(StackId);
651 CallSitesIter = LocHashToCallSites.find(StackId);
652 if (AllocInfoIter != LocHashToAllocInfo.end() ||
653 CallSitesIter != LocHashToCallSites.end())
667 if (AllocInfoIter != LocHashToAllocInfo.end() &&
671 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
672 else if (CallSitesIter != LocHashToCallSites.end())
677 CallSitesIter->second, M, MatchedCallSites);
684 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
692 if (M.empty() && M.globals().empty())
696 auto &Ctx = M.getContext();
698 if (
Error E = ReaderOrErr.takeError()) {
707 std::move(ReaderOrErr.get());
710 MemoryProfileFileName.data(),
StringRef(
"Cannot get MemProfReader")));
716 "Not a memory profile"));
721 annotateGlobalVariables(M,
MemProfReader->getDataAccessProfileData());
739 FullStackIdToAllocMatchInfo;
743 std::set<std::vector<uint64_t>> MatchedCallSites;
747 MaxColdSize = MemProfSum->getMaxColdTotalSize();
750 if (
F.isDeclaration())
756 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
760 for (
const auto &[IdLengthPair,
Info] : FullStackIdToAllocMatchInfo) {
761 auto [Id,
Length] = IdLengthPair;
763 <<
" context with id " << Id <<
" has total profiled size "
764 <<
Info.TotalSize <<
" is matched with " <<
Length <<
" frames\n";
767 for (
const auto &
CallStack : MatchedCallSites) {
768 errs() <<
"MemProf callsite match for inline call stack";
770 errs() <<
" " << StackId;
788 if (Attrs.hasAttribute(
"bss-section") || Attrs.hasAttribute(
"data-section") ||
789 Attrs.hasAttribute(
"relro-section") ||
790 Attrs.hasAttribute(
"rodata-section"))
795bool MemProfUsePass::annotateGlobalVariables(
800 if (!DataAccessProf) {
802 MemoryProfileFileName.data(),
803 StringRef(
"Data access profiles not found in memprof. Ignore "
804 "-memprof-annotate-static-data-prefix."),
809 bool Changed =
false;
816 assert(!GVar.getSectionPrefix().has_value() &&
817 "GVar shouldn't have section prefix yet");
818 if (GVar.isDeclarationForLinker())
822 ++NumOfMemProfExplicitSectionGlobalVars;
824 <<
" has explicit section name. Skip annotating.\n");
833 if (
Name.starts_with(
".str")) {
841 std::optional<DataAccessProfRecord>
Record =
850 ++NumOfMemProfHotGlobalVars;
851 GVar.setSectionPrefix(
"hot");
854 <<
" is annotated as hot\n");
856 ++NumOfMemProfColdGlobalVars;
857 GVar.setSectionPrefix(
"unlikely");
860 <<
" is annotated as unlikely\n");
862 ++NumOfMemProfUnknownGlobalVars;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
Module.h This file contains the declarations for the Module class.
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::unordered_set< CallSiteEntry, CallSiteEntryHash > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites)
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static bool hasExplicitSectionName(const GlobalVariable &GVar)
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Diagnostic information for the PGO profiler.
Base class for error info classes.
virtual std::string message() const
Return the error message as a string.
Lightweight error class with error context and mandatory checking.
bool hasSection() const
Check if this global has a custom object file section.
@ AvailableExternallyLinkage
Available for inspection, not emission.
AttributeSet getAttributes() const
Return the attribute set for this global.
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
instrprof_error get() const
std::string message() const override
Return the error message as a string.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
cl::opt< bool > NoPGOWarnMismatchComdatWeak
size_t operator()(const CallSiteEntry &Entry) const
ArrayRef< GlobalValue::GUID > CalleeGuids
bool operator==(const CallSiteEntry &Other) const
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
llvm::SmallVector< CallSiteInfo > CallSites
llvm::SmallVector< AllocationInfo > AllocSites