LLVM 22.0.0git
MemProfUse.cpp
Go to the documentation of this file.
1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
25#include "llvm/IR/Module.h"
30#include "llvm/Support/BLAKE3.h"
32#include "llvm/Support/Debug.h"
36#include <map>
37#include <set>
38
39using namespace llvm;
40using namespace llvm::memprof;
41
42#define DEBUG_TYPE "memprof"
43
44namespace llvm {
48} // namespace llvm
49
50// By default disable matching of allocation profiles onto operator new that
51// already explicitly pass a hot/cold hint, since we don't currently
52// override these hints anyway.
54 "memprof-match-hot-cold-new",
56 "Match allocation profiles onto existing hot/cold operator new calls"),
57 cl::Hidden, cl::init(false));
58
59static cl::opt<bool>
60 ClPrintMemProfMatchInfo("memprof-print-match-info",
61 cl::desc("Print matching stats for each allocation "
62 "context in this module's profiles"),
63 cl::Hidden, cl::init(false));
64
65static cl::opt<bool>
66 SalvageStaleProfile("memprof-salvage-stale-profile",
67 cl::desc("Salvage stale MemProf profile"),
68 cl::init(false), cl::Hidden);
69
71 "memprof-attach-calleeguids",
73 "Attach calleeguids as value profile metadata for indirect calls."),
74 cl::init(true), cl::Hidden);
75
77 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
78 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
79
81 "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
82 cl::desc("If true, annotate the static data section prefix"));
83
84// Matching statistics
85STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
86STATISTIC(NumOfMemProfMismatch,
87 "Number of functions having mismatched memory profile hash.");
88STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
89STATISTIC(NumOfMemProfAllocContextProfiles,
90 "Number of alloc contexts in memory profile.");
91STATISTIC(NumOfMemProfCallSiteProfiles,
92 "Number of callsites in memory profile.");
93STATISTIC(NumOfMemProfMatchedAllocContexts,
94 "Number of matched memory profile alloc contexts.");
95STATISTIC(NumOfMemProfMatchedAllocs,
96 "Number of matched memory profile allocs.");
97STATISTIC(NumOfMemProfMatchedCallSites,
98 "Number of matched memory profile callsites.");
99STATISTIC(NumOfMemProfHotGlobalVars,
100 "Number of global vars annotated with 'hot' section prefix.");
101STATISTIC(NumOfMemProfColdGlobalVars,
102 "Number of global vars annotated with 'unlikely' section prefix.");
103STATISTIC(NumOfMemProfUnknownGlobalVars,
104 "Number of global vars with unknown hotness (no section prefix).");
105STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
106 "Number of global vars with user-specified section (not annotated).");
107
109 ArrayRef<uint64_t> InlinedCallStack,
110 LLVMContext &Ctx) {
111 I.setMetadata(LLVMContext::MD_callsite,
112 buildCallstackMetadata(InlinedCallStack, Ctx));
113}
114
116 uint32_t Column) {
119 HashBuilder.add(Function, LineOffset, Column);
121 uint64_t Id;
122 std::memcpy(&Id, Hash.data(), sizeof(Hash));
123 return Id;
124}
125
129
132 uint64_t FullStackId) {
133 SmallVector<uint64_t> StackIds;
134 for (const auto &StackFrame : AllocInfo->CallStack)
135 StackIds.push_back(computeStackId(StackFrame));
136 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
137 AllocInfo->Info.getAllocCount(),
138 AllocInfo->Info.getTotalLifetime());
139 std::vector<ContextTotalSize> ContextSizeInfo;
141 auto TotalSize = AllocInfo->Info.getTotalSize();
142 assert(TotalSize);
143 assert(FullStackId != 0);
144 ContextSizeInfo.push_back({FullStackId, TotalSize});
145 }
146 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
147 return AllocType;
148}
149
150// Return true if InlinedCallStack, computed from a call instruction's debug
151// info, is a prefix of ProfileCallStack, a list of Frames from profile data
152// (either the allocation data or a callsite).
153static bool
155 ArrayRef<uint64_t> InlinedCallStack) {
156 return ProfileCallStack.size() >= InlinedCallStack.size() &&
157 llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),
158 InlinedCallStack, [](const Frame &F, uint64_t StackId) {
159 return computeStackId(F) == StackId;
160 });
161}
162
163static bool isAllocationWithHotColdVariant(const Function *Callee,
164 const TargetLibraryInfo &TLI) {
165 if (!Callee)
166 return false;
167 LibFunc Func;
168 if (!TLI.getLibFunc(*Callee, Func))
169 return false;
170 switch (Func) {
171 case LibFunc_Znwm:
172 case LibFunc_ZnwmRKSt9nothrow_t:
173 case LibFunc_ZnwmSt11align_val_t:
174 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
175 case LibFunc_Znam:
176 case LibFunc_ZnamRKSt9nothrow_t:
177 case LibFunc_ZnamSt11align_val_t:
178 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
179 case LibFunc_size_returning_new:
180 case LibFunc_size_returning_new_aligned:
181 return true;
182 case LibFunc_Znwm12__hot_cold_t:
183 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
184 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
185 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
186 case LibFunc_Znam12__hot_cold_t:
187 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
188 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
189 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
190 case LibFunc_size_returning_new_hot_cold:
191 case LibFunc_size_returning_new_aligned_hot_cold:
193 default:
194 return false;
195 }
196}
197
199 AnnotationKind Kind) {
201 "Should not handle AnnotationOK here");
202 SmallString<32> Reason;
203 switch (Kind) {
205 ++NumOfMemProfExplicitSectionGlobalVars;
206 Reason.append("explicit section name");
207 break;
209 Reason.append("linker declaration");
210 break;
212 Reason.append("name starts with `llvm.`");
213 break;
214 default:
215 llvm_unreachable("Unexpected annotation kind");
216 }
217 LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
218 << Reason << ".\n");
219 return;
220}
221
226
229 function_ref<bool(uint64_t)> IsPresentInProfile) {
231
232 auto GetOffset = [](const DILocation *DIL) {
233 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
234 0xffff;
235 };
236
237 for (Function &F : M) {
238 if (F.isDeclaration())
239 continue;
240
241 for (auto &BB : F) {
242 for (auto &I : BB) {
244 continue;
245
246 auto *CB = dyn_cast<CallBase>(&I);
247 auto *CalledFunction = CB->getCalledFunction();
248 // Disregard indirect calls and intrinsics.
249 if (!CalledFunction || CalledFunction->isIntrinsic())
250 continue;
251
252 StringRef CalleeName = CalledFunction->getName();
253 // True if we are calling a heap allocation function that supports
254 // hot/cold variants.
255 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
256 // True for the first iteration below, indicating that we are looking at
257 // a leaf node.
258 bool IsLeaf = true;
259 for (const DILocation *DIL = I.getDebugLoc(); DIL;
260 DIL = DIL->getInlinedAt()) {
261 StringRef CallerName = DIL->getSubprogramLinkageName();
262 assert(!CallerName.empty() &&
263 "Be sure to enable -fdebug-info-for-profiling");
264 uint64_t CallerGUID = memprof::getGUID(CallerName);
265 uint64_t CalleeGUID = memprof::getGUID(CalleeName);
266 // Pretend that we are calling a function with GUID == 0 if we are
267 // in the inline stack leading to a heap allocation function.
268 if (IsAlloc) {
269 if (IsLeaf) {
270 // For leaf nodes, set CalleeGUID to 0 without consulting
271 // IsPresentInProfile.
272 CalleeGUID = 0;
273 } else if (!IsPresentInProfile(CalleeGUID)) {
274 // In addition to the leaf case above, continue to set CalleeGUID
275 // to 0 as long as we don't see CalleeGUID in the profile.
276 CalleeGUID = 0;
277 } else {
278 // Once we encounter a callee that exists in the profile, stop
279 // setting CalleeGUID to 0.
280 IsAlloc = false;
281 }
282 }
283
284 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
285 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
286 CalleeName = CallerName;
287 IsLeaf = false;
288 }
289 }
290 }
291 }
292
293 // Sort each call list by the source location.
294 for (auto &[CallerGUID, CallList] : Calls) {
295 llvm::sort(CallList);
296 CallList.erase(llvm::unique(CallList), CallList.end());
297 }
298
299 return Calls;
300}
301
304 const TargetLibraryInfo &TLI) {
306
308 MemProfReader->getMemProfCallerCalleePairs();
310 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
311 return CallsFromProfile.contains(GUID);
312 });
313
314 // Compute an undrift map for each CallerGUID.
315 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
316 auto It = CallsFromProfile.find(CallerGUID);
317 if (It == CallsFromProfile.end())
318 continue;
319 const auto &ProfileAnchors = It->second;
320
321 LocToLocMap Matchings;
323 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
324 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
325 [[maybe_unused]] bool Inserted =
326 UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;
327
328 // The insertion must succeed because we visit each GUID exactly once.
329 assert(Inserted);
330 }
331
332 return UndriftMaps;
333}
334
335// Given a MemProfRecord, undrift all the source locations present in the
336// record in place.
337static void
339 memprof::MemProfRecord &MemProfRec) {
340 // Undrift a call stack in place.
341 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
342 for (auto &F : CallStack) {
343 auto I = UndriftMaps.find(F.Function);
344 if (I == UndriftMaps.end())
345 continue;
346 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
347 if (J == I->second.end())
348 continue;
349 auto &NewLoc = J->second;
350 F.LineOffset = NewLoc.LineOffset;
351 F.Column = NewLoc.Column;
352 }
353 };
354
355 for (auto &AS : MemProfRec.AllocSites)
356 UndriftCallStack(AS.CallStack);
357
358 for (auto &CS : MemProfRec.CallSites)
359 UndriftCallStack(CS.Frames);
360}
361
362// Helper function to process CalleeGuids and create value profile metadata
364 ArrayRef<GlobalValue::GUID> CalleeGuids) {
365 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
366 return;
367
368 if (I.getMetadata(LLVMContext::MD_prof)) {
369 uint64_t Unused;
370 // TODO: When merging is implemented, increase this to a typical ICP value
371 // (e.g., 3-6) For now, we only need to check if existing data exists, so 1
372 // is sufficient
373 auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
374 /*MaxNumValueData=*/1, Unused);
375 // We don't know how to merge value profile data yet.
376 if (!ExistingVD.empty()) {
377 return;
378 }
379 }
380
382 uint64_t TotalCount = 0;
383
384 for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
385 InstrProfValueData VD;
386 VD.Value = CalleeGUID;
387 // For MemProf, we don't have actual call counts, so we assign
388 // a weight of 1 to each potential target.
389 // TODO: Consider making this weight configurable or increasing it to
390 // improve effectiveness for ICP.
391 VD.Count = 1;
392 VDs.push_back(VD);
393 TotalCount += VD.Count;
394 }
395
396 if (!VDs.empty()) {
397 annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
398 VDs.size());
399 }
400}
401
402static void
404 ArrayRef<uint64_t> InlinedCallStack, LLVMContext &Ctx,
405 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
406 const std::set<const AllocationInfo *> &AllocInfoSet,
407 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
408 &FullStackIdToAllocMatchInfo) {
409 // We may match this instruction's location list to multiple MIB
410 // contexts. Add them to a Trie specialized for trimming the contexts to
411 // the minimal needed to disambiguate contexts with unique behavior.
412 CallStackTrie AllocTrie(&ORE, MaxColdSize);
413 uint64_t TotalSize = 0;
414 uint64_t TotalColdSize = 0;
415 for (auto *AllocInfo : AllocInfoSet) {
416 // Check the full inlined call stack against this one.
417 // If we found and thus matched all frames on the call, include
418 // this MIB.
420 InlinedCallStack)) {
421 NumOfMemProfMatchedAllocContexts++;
422 uint64_t FullStackId = 0;
424 FullStackId = computeFullStackId(AllocInfo->CallStack);
425 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
426 TotalSize += AllocInfo->Info.getTotalSize();
428 TotalColdSize += AllocInfo->Info.getTotalSize();
429 // Record information about the allocation if match info printing
430 // was requested.
432 assert(FullStackId != 0);
433 FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
434 InlinedCallStack.size())] = {
435 AllocInfo->Info.getTotalSize(), AllocType};
436 }
437 }
438 }
439 // If the threshold for the percent of cold bytes is less than 100%,
440 // and not all bytes are cold, see if we should still hint this
441 // allocation as cold without context sensitivity.
442 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
443 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
444 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, "dominant");
445 return;
446 }
447
448 // We might not have matched any to the full inlined call stack.
449 // But if we did, create and attach metadata, or a function attribute if
450 // all contexts have identical profiled behavior.
451 if (!AllocTrie.empty()) {
452 NumOfMemProfMatchedAllocs++;
453 // MemprofMDAttached will be false if a function attribute was
454 // attached.
455 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
456 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
457 if (MemprofMDAttached) {
458 // Add callsite metadata for the instruction's location list so that
459 // it simpler later on to identify which part of the MIB contexts
460 // are from this particular instruction (including during inlining,
461 // when the callsite metadata will be updated appropriately).
462 // FIXME: can this be changed to strip out the matching stack
463 // context ids from the MIB contexts and not add any callsite
464 // metadata here to save space?
465 addCallsiteMetadata(I, InlinedCallStack, Ctx);
466 }
467 }
468}
469
470// Helper struct for maintaining refs to callsite data. As an alternative we
471// could store a pointer to the CallSiteInfo struct but we also need the frame
472// index. Using ArrayRefs instead makes it a little easier to read.
474 // Subset of frames for the corresponding CallSiteInfo.
476 // Potential targets for indirect calls.
478
479 // Only compare Frame contents.
480 // Use pointer-based equality instead of ArrayRef's operator== which does
481 // element-wise comparison. We want to check if it's the same slice of the
482 // underlying array, not just equivalent content.
483 bool operator==(const CallSiteEntry &Other) const {
484 return Frames.data() == Other.Frames.data() &&
485 Frames.size() == Other.Frames.size();
486 }
487};
488
490 size_t operator()(const CallSiteEntry &Entry) const {
491 return computeFullStackId(Entry.Frames);
492 }
493};
494
495static void handleCallSite(
496 Instruction &I, const Function *CalledFunction,
497 ArrayRef<uint64_t> InlinedCallStack,
498 const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
499 Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites) {
500 auto &Ctx = M.getContext();
501 for (const auto &CallSiteEntry : CallSiteEntries) {
502 // If we found and thus matched all frames on the call, create and
503 // attach call stack metadata.
505 InlinedCallStack)) {
506 NumOfMemProfMatchedCallSites++;
507 addCallsiteMetadata(I, InlinedCallStack, Ctx);
508
509 // Try to attach indirect call metadata if possible.
510 if (!CalledFunction)
512
513 // Only need to find one with a matching call stack and add a single
514 // callsite metadata.
515
516 // Accumulate call site matching information upon request.
518 std::vector<uint64_t> CallStack;
519 append_range(CallStack, InlinedCallStack);
520 MatchedCallSites.insert(std::move(CallStack));
521 }
522 break;
523 }
524 }
525}
526
527static void readMemprof(Module &M, Function &F,
529 const TargetLibraryInfo &TLI,
530 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
531 &FullStackIdToAllocMatchInfo,
532 std::set<std::vector<uint64_t>> &MatchedCallSites,
534 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
535 auto &Ctx = M.getContext();
536 // Previously we used getIRPGOFuncName() here. If F is local linkage,
537 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
538 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
539 // contain FileName's prefix. It caused local linkage function can't
540 // find MemProfRecord. So we use getName() now.
541 // 'unique-internal-linkage-names' can make MemProf work better for local
542 // linkage function.
543 auto FuncName = F.getName();
544 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
545 std::optional<memprof::MemProfRecord> MemProfRec;
546 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
547 if (Err) {
548 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
549 auto Err = IPE.get();
550 bool SkipWarning = false;
551 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
552 << ": ");
554 NumOfMemProfMissing++;
555 SkipWarning = !PGOWarnMissing;
556 LLVM_DEBUG(dbgs() << "unknown function");
557 } else if (Err == instrprof_error::hash_mismatch) {
558 NumOfMemProfMismatch++;
559 SkipWarning =
562 (F.hasComdat() ||
564 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
565 }
566
567 if (SkipWarning)
568 return;
569
570 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
571 Twine(" Hash = ") + std::to_string(FuncGUID))
572 .str();
573
574 Ctx.diagnose(
575 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
576 });
577 return;
578 }
579
580 NumOfMemProfFunc++;
581
582 // If requested, undrfit MemProfRecord so that the source locations in it
583 // match those in the IR.
585 undriftMemProfRecord(UndriftMaps, *MemProfRec);
586
587 // Detect if there are non-zero column numbers in the profile. If not,
588 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
589 // columns in the IR). The profiled binary might have been built with
590 // column numbers disabled, for example.
591 bool ProfileHasColumns = false;
592
593 // Build maps of the location hash to all profile data with that leaf location
594 // (allocation info and the callsites).
595 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
596
597 // For the callsites we need to record slices of the frame array (see comments
598 // below where the map entries are added) along with their CalleeGuids.
599 std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
600 LocHashToCallSites;
601 for (auto &AI : MemProfRec->AllocSites) {
602 NumOfMemProfAllocContextProfiles++;
603 // Associate the allocation info with the leaf frame. The later matching
604 // code will match any inlined call sequences in the IR with a longer prefix
605 // of call stack frames.
606 uint64_t StackId = computeStackId(AI.CallStack[0]);
607 LocHashToAllocInfo[StackId].insert(&AI);
608 ProfileHasColumns |= AI.CallStack[0].Column;
609 }
610 for (auto &CS : MemProfRec->CallSites) {
611 NumOfMemProfCallSiteProfiles++;
612 // Need to record all frames from leaf up to and including this function,
613 // as any of these may or may not have been inlined at this point.
614 unsigned Idx = 0;
615 for (auto &StackFrame : CS.Frames) {
616 uint64_t StackId = computeStackId(StackFrame);
617 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
618 ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
619 LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
620
621 ProfileHasColumns |= StackFrame.Column;
622 // Once we find this function, we can stop recording.
623 if (StackFrame.Function == FuncGUID)
624 break;
625 }
626 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
627 }
628
629 auto GetOffset = [](const DILocation *DIL) {
630 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
631 0xffff;
632 };
633
634 // Now walk the instructions, looking up the associated profile data using
635 // debug locations.
636 for (auto &BB : F) {
637 for (auto &I : BB) {
638 if (I.isDebugOrPseudoInst())
639 continue;
640 // We are only interested in calls (allocation or interior call stack
641 // context calls).
642 auto *CI = dyn_cast<CallBase>(&I);
643 if (!CI)
644 continue;
645 auto *CalledFunction = CI->getCalledFunction();
646 if (CalledFunction && CalledFunction->isIntrinsic())
647 continue;
648 // List of call stack ids computed from the location hashes on debug
649 // locations (leaf to inlined at root).
650 SmallVector<uint64_t, 8> InlinedCallStack;
651 // Was the leaf location found in one of the profile maps?
652 bool LeafFound = false;
653 // If leaf was found in a map, iterators pointing to its location in both
654 // of the maps. It might exist in neither, one, or both (the latter case
655 // can happen because we don't currently have discriminators to
656 // distinguish the case when a single line/col maps to both an allocation
657 // and another callsite).
658 auto AllocInfoIter = LocHashToAllocInfo.end();
659 auto CallSitesIter = LocHashToCallSites.end();
660 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
661 DIL = DIL->getInlinedAt()) {
662 // Use C++ linkage name if possible. Need to compile with
663 // -fdebug-info-for-profiling to get linkage name.
664 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
665 if (Name.empty())
666 Name = DIL->getScope()->getSubprogram()->getName();
667 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
668 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
669 ProfileHasColumns ? DIL->getColumn() : 0);
670 // Check if we have found the profile's leaf frame. If yes, collect
671 // the rest of the call's inlined context starting here. If not, see if
672 // we find a match further up the inlined context (in case the profile
673 // was missing debug frames at the leaf).
674 if (!LeafFound) {
675 AllocInfoIter = LocHashToAllocInfo.find(StackId);
676 CallSitesIter = LocHashToCallSites.find(StackId);
677 if (AllocInfoIter != LocHashToAllocInfo.end() ||
678 CallSitesIter != LocHashToCallSites.end())
679 LeafFound = true;
680 }
681 if (LeafFound)
682 InlinedCallStack.push_back(StackId);
683 }
684 // If leaf not in either of the maps, skip inst.
685 if (!LeafFound)
686 continue;
687
688 // First add !memprof metadata from allocation info, if we found the
689 // instruction's leaf location in that map, and if the rest of the
690 // instruction's locations match the prefix Frame locations on an
691 // allocation context with the same leaf.
692 if (AllocInfoIter != LocHashToAllocInfo.end() &&
693 // Only consider allocations which support hinting.
694 isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
695 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
696 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
697 else if (CallSitesIter != LocHashToCallSites.end())
698 // Otherwise, add callsite metadata. If we reach here then we found the
699 // instruction's leaf location in the callsites map and not the
700 // allocation map.
701 handleCallSite(I, CalledFunction, InlinedCallStack,
702 CallSitesIter->second, M, MatchedCallSites);
703 }
704 }
705}
706
707MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
709 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
710 if (!FS)
711 this->FS = vfs::getRealFileSystem();
712}
713
715 // Return immediately if the module doesn't contain any function or global
716 // variables.
717 if (M.empty() && M.globals().empty())
718 return PreservedAnalyses::all();
719
720 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
721 auto &Ctx = M.getContext();
722 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
723 if (Error E = ReaderOrErr.takeError()) {
724 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
725 Ctx.diagnose(
726 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
727 });
728 return PreservedAnalyses::all();
729 }
730
731 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
732 std::move(ReaderOrErr.get());
733 if (!MemProfReader) {
734 Ctx.diagnose(DiagnosticInfoPGOProfile(
735 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
736 return PreservedAnalyses::all();
737 }
738
739 if (!MemProfReader->hasMemoryProfile()) {
740 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
741 "Not a memory profile"));
742 return PreservedAnalyses::all();
743 }
744
745 const bool Changed =
746 annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
747
748 // If the module doesn't contain any function, return after we process all
749 // global variables.
750 if (M.empty())
752
753 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
754
755 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
758 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
759
760 // Map from the stack hash and matched frame count of each allocation context
761 // in the function profiles to the total profiled size (bytes) and allocation
762 // type.
763 std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
764 FullStackIdToAllocMatchInfo;
765
766 // Set of the matched call sites, each expressed as a sequence of an inline
767 // call stack.
768 std::set<std::vector<uint64_t>> MatchedCallSites;
769
770 uint64_t MaxColdSize = 0;
771 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
772 MaxColdSize = MemProfSum->getMaxColdTotalSize();
773
774 for (auto &F : M) {
775 if (F.isDeclaration())
776 continue;
777
778 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
779 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
780 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
781 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
782 }
783
785 for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
786 auto [Id, Length] = IdLengthPair;
787 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
788 << " context with id " << Id << " has total profiled size "
789 << Info.TotalSize << " is matched with " << Length << " frames\n";
790 }
791
792 for (const auto &CallStack : MatchedCallSites) {
793 errs() << "MemProf callsite match for inline call stack";
794 for (uint64_t StackId : CallStack)
795 errs() << " " << StackId;
796 errs() << "\n";
797 }
798 }
799
801}
802
803bool MemProfUsePass::annotateGlobalVariables(
804 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
805 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
806 return false;
807
808 if (!DataAccessProf) {
809 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
810 M.getContext().diagnose(DiagnosticInfoPGOProfile(
811 MemoryProfileFileName.data(),
812 StringRef("Data access profiles not found in memprof. Ignore "
813 "-memprof-annotate-static-data-prefix."),
814 DS_Warning));
815 return false;
816 }
817 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
818
819 bool Changed = false;
820 // Iterate all global variables in the module and annotate them based on
821 // data access profiles. Note it's up to the linker to decide how to map input
822 // sections to output sections, and one conservative practice is to map
823 // unlikely-prefixed ones to unlikely output section, and map the rest
824 // (hot-prefixed or prefix-less) to the canonical output section.
825 for (GlobalVariable &GVar : M.globals()) {
826 assert(!GVar.getSectionPrefix().has_value() &&
827 "GVar shouldn't have section prefix yet");
828 auto Kind = llvm::memprof::getAnnotationKind(GVar);
831 continue;
832 }
833
834 StringRef Name = GVar.getName();
835 // Skip string literals as their mangled names don't stay stable across
836 // binary releases.
837 // TODO: Track string content hash in the profiles and compute it inside the
838 // compiler to categeorize the hotness string literals.
839 if (Name.starts_with(".str")) {
840 LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
841 continue;
842 }
843
844 // DataAccessProfRecord's get* methods will canonicalize the name under the
845 // hood before looking it up, so optimizer doesn't need to do it.
846 std::optional<DataAccessProfRecord> Record =
847 DataAccessProf->getProfileRecord(Name);
848 // Annotate a global variable as hot if it has non-zero sampled count, and
849 // annotate it as cold if it's seen in the profiled binary
850 // file but doesn't have any access sample.
851 // For logging, optimization remark emitter requires a llvm::Function, but
852 // it's not well defined how to associate a global variable with a function.
853 // So we just print out the static data section prefix in LLVM_DEBUG.
854 if (Record && Record->AccessCount > 0) {
855 ++NumOfMemProfHotGlobalVars;
856 Changed |= GVar.setSectionPrefix("hot");
857 LLVM_DEBUG(dbgs() << "Global variable " << Name
858 << " is annotated as hot\n");
859 } else if (DataAccessProf->isKnownColdSymbol(Name)) {
860 ++NumOfMemProfColdGlobalVars;
861 Changed |= GVar.setSectionPrefix("unlikely");
862 Changed = true;
863 LLVM_DEBUG(dbgs() << "Global variable " << Name
864 << " is annotated as unlikely\n");
865 } else {
866 ++NumOfMemProfUnknownGlobalVars;
867 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
868 }
869 }
870
871 return Changed;
872}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::unordered_set< CallSiteEntry, CallSiteEntryHash > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites)
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, AnnotationKind Kind)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< std::pair< uint64_t, unsigned >, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
AllocType
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:224
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:200
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:237
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:158
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition HashBuilder.h:64
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
instrprof_error get() const
Definition InstrProf.h:465
std::string message() const override
Return the error message as a string.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
@ Warning
Emits a warning if two values disagree.
Definition Module.h:124
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition MemProfUse.h:65
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
AnnotationKind getAnnotationKind(const GlobalVariable &GV)
Returns the annotation kind of the global variable GV.
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition MemProf.cpp:344
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
@ Length
Definition DWP.cpp:477
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition BLAKE3.h:35
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2076
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
Definition ModRef.h:68
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2088
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
cl::opt< bool > NoPGOWarnMismatchComdatWeak
uint64_t TotalSize
AllocationType AllocType
size_t operator()(const CallSiteEntry &Entry) const
ArrayRef< GlobalValue::GUID > CalleeGuids
bool operator==(const CallSiteEntry &Other) const
ArrayRef< Frame > Frames
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition MemProf.h:245
uint32_t LineOffset
Definition MemProf.h:250
llvm::SmallVector< CallSiteInfo > CallSites
Definition MemProf.h:522
llvm::SmallVector< AllocationInfo > AllocSites
Definition MemProf.h:520