LLVM 23.0.0git
MemProfUse.cpp
Go to the documentation of this file.
1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
25#include "llvm/IR/Module.h"
30#include "llvm/Support/BLAKE3.h"
32#include "llvm/Support/Debug.h"
36#include <map>
37#include <set>
38
39using namespace llvm;
40using namespace llvm::memprof;
41
42#define DEBUG_TYPE "memprof"
43
44namespace llvm {
48} // namespace llvm
49
50// By default disable matching of allocation profiles onto operator new that
51// already explicitly pass a hot/cold hint, since we don't currently
52// override these hints anyway.
54 "memprof-match-hot-cold-new",
56 "Match allocation profiles onto existing hot/cold operator new calls"),
57 cl::Hidden, cl::init(false));
58
59static cl::opt<bool>
60 ClPrintMemProfMatchInfo("memprof-print-match-info",
61 cl::desc("Print matching stats for each allocation "
62 "context in this module's profiles"),
63 cl::Hidden, cl::init(false));
64
66 "memprof-print-matched-alloc-stack",
67 cl::desc("Print full stack context for matched "
68 "allocations with -memprof-print-match-info."),
69 cl::Hidden, cl::init(false));
70
71static cl::opt<bool>
72 PrintFunctionGuids("memprof-print-function-guids",
73 cl::desc("Print function GUIDs computed for matching"),
74 cl::Hidden, cl::init(false));
75
76static cl::opt<bool>
77 SalvageStaleProfile("memprof-salvage-stale-profile",
78 cl::desc("Salvage stale MemProf profile"),
79 cl::init(false), cl::Hidden);
80
82 "memprof-attach-calleeguids",
84 "Attach calleeguids as value profile metadata for indirect calls."),
85 cl::init(true), cl::Hidden);
86
88 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
89 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
90
92 "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
93 cl::desc("If true, annotate the static data section prefix"));
94
95// FIXME: This option is added for incremental rollout purposes.
96// After the option, string literal partitioning should be implied by
97// AnnotateStaticDataSectionPrefix above and this option should be cleaned up.
99 "memprof-annotate-string-literal-section-prefix", cl::init(false),
101 cl::desc("If true, annotate the string literal data section prefix"));
102
103// Matching statistics
104STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
105STATISTIC(NumOfMemProfMismatch,
106 "Number of functions having mismatched memory profile hash.");
107STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
108STATISTIC(NumOfMemProfAllocContextProfiles,
109 "Number of alloc contexts in memory profile.");
110STATISTIC(NumOfMemProfCallSiteProfiles,
111 "Number of callsites in memory profile.");
112STATISTIC(NumOfMemProfMatchedAllocContexts,
113 "Number of matched memory profile alloc contexts.");
114STATISTIC(NumOfMemProfMatchedAllocs,
115 "Number of matched memory profile allocs.");
116STATISTIC(NumOfMemProfMatchedCallSites,
117 "Number of matched memory profile callsites.");
118STATISTIC(NumOfMemProfHotGlobalVars,
119 "Number of global vars annotated with 'hot' section prefix.");
120STATISTIC(NumOfMemProfColdGlobalVars,
121 "Number of global vars annotated with 'unlikely' section prefix.");
122STATISTIC(NumOfMemProfUnknownGlobalVars,
123 "Number of global vars with unknown hotness (no section prefix).");
124STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
125 "Number of global vars with user-specified section (not annotated).");
126
128 ArrayRef<uint64_t> InlinedCallStack,
129 LLVMContext &Ctx) {
130 I.setMetadata(LLVMContext::MD_callsite,
131 buildCallstackMetadata(InlinedCallStack, Ctx));
132}
133
135 uint32_t Column) {
138 HashBuilder.add(Function, LineOffset, Column);
140 uint64_t Id;
141 std::memcpy(&Id, Hash.data(), sizeof(Hash));
142 return Id;
143}
144
148
150 return getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
151 AllocInfo->Info.getAllocCount(),
152 AllocInfo->Info.getTotalLifetime());
153}
154
157 uint64_t FullStackId) {
158 SmallVector<uint64_t> StackIds;
159 for (const auto &StackFrame : AllocInfo->CallStack)
160 StackIds.push_back(computeStackId(StackFrame));
162 std::vector<ContextTotalSize> ContextSizeInfo;
164 auto TotalSize = AllocInfo->Info.getTotalSize();
165 assert(TotalSize);
166 assert(FullStackId != 0);
167 ContextSizeInfo.push_back({FullStackId, TotalSize});
168 }
169 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
170 return AllocType;
171}
172
173// Return true if InlinedCallStack, computed from a call instruction's debug
174// info, is a prefix of ProfileCallStack, a list of Frames from profile data
175// (either the allocation data or a callsite).
176static bool
178 ArrayRef<uint64_t> InlinedCallStack) {
179 return ProfileCallStack.size() >= InlinedCallStack.size() &&
180 llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),
181 InlinedCallStack, [](const Frame &F, uint64_t StackId) {
182 return computeStackId(F) == StackId;
183 });
184}
185
186static bool isAllocationWithHotColdVariant(const Function *Callee,
187 const TargetLibraryInfo &TLI) {
188 if (!Callee)
189 return false;
190 LibFunc Func;
191 if (!TLI.getLibFunc(*Callee, Func))
192 return false;
193 switch (Func) {
194 case LibFunc_Znwm:
195 case LibFunc_ZnwmRKSt9nothrow_t:
196 case LibFunc_ZnwmSt11align_val_t:
197 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
198 case LibFunc_Znam:
199 case LibFunc_ZnamRKSt9nothrow_t:
200 case LibFunc_ZnamSt11align_val_t:
201 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
202 case LibFunc_size_returning_new:
203 case LibFunc_size_returning_new_aligned:
204 return true;
205 case LibFunc_Znwm12__hot_cold_t:
206 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
207 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
208 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
209 case LibFunc_Znam12__hot_cold_t:
210 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
211 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
212 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
213 case LibFunc_size_returning_new_hot_cold:
214 case LibFunc_size_returning_new_aligned_hot_cold:
216 default:
217 return false;
218 }
219}
220
222 AnnotationKind Kind) {
224 "Should not handle AnnotationOK here");
225 SmallString<32> Reason;
226 switch (Kind) {
228 ++NumOfMemProfExplicitSectionGlobalVars;
229 Reason.append("explicit section name");
230 break;
232 Reason.append("linker declaration");
233 break;
235 Reason.append("name starts with `llvm.`");
236 break;
237 default:
238 llvm_unreachable("Unexpected annotation kind");
239 }
240 LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
241 << Reason << ".\n");
242}
243
244// Computes the LLVM version of MD5 hash for the content of a string
245// literal.
246static std::optional<uint64_t>
248 auto *Initializer = GVar.getInitializer();
249 if (!Initializer)
250 return std::nullopt;
251 if (auto *C = dyn_cast<ConstantDataSequential>(Initializer))
252 if (C->isString()) {
253 // Note the hash computed for the literal would include the null byte.
254 return llvm::MD5Hash(C->getAsString());
255 }
256 return std::nullopt;
257}
258
259// Structure for tracking info about matched allocation contexts for use with
260// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
262 // Total size in bytes of matched context.
264 // Matched allocation's type.
266 // Number of frames matched to the allocation itself (values will be >1 in
267 // cases where allocation was already inlined). Use a set because there can
268 // be multiple inlined instances and each may have a different inline depth.
269 // Use std::set to iterate in sorted order when printing.
270 std::set<unsigned> MatchedFramesSet;
271 // The full call stack of the allocation, for cases where requested via
272 // -memprof-print-matched-alloc-stack.
273 std::vector<Frame> CallStack;
274
275 // Caller responsible for inserting the matched frames and the call stack when
276 // appropriate.
279};
280
283 function_ref<bool(uint64_t)> IsPresentInProfile) {
285
286 auto GetOffset = [](const DILocation *DIL) {
287 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
288 0xffff;
289 };
290
291 for (Function &F : M) {
292 if (F.isDeclaration())
293 continue;
294
295 for (auto &BB : F) {
296 for (auto &I : BB) {
298 continue;
299
300 auto *CB = dyn_cast<CallBase>(&I);
301 auto *CalledFunction = CB->getCalledFunction();
302 // Disregard indirect calls and intrinsics.
303 if (!CalledFunction || CalledFunction->isIntrinsic())
304 continue;
305
306 StringRef CalleeName = CalledFunction->getName();
307 // True if we are calling a heap allocation function that supports
308 // hot/cold variants.
309 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
310 // True for the first iteration below, indicating that we are looking at
311 // a leaf node.
312 bool IsLeaf = true;
313 for (const DILocation *DIL = I.getDebugLoc(); DIL;
314 DIL = DIL->getInlinedAt()) {
315 StringRef CallerName = DIL->getSubprogramLinkageName();
316 assert(!CallerName.empty() &&
317 "Be sure to enable -fdebug-info-for-profiling");
318 uint64_t CallerGUID = memprof::getGUID(CallerName);
319 uint64_t CalleeGUID = memprof::getGUID(CalleeName);
320 // Pretend that we are calling a function with GUID == 0 if we are
321 // in the inline stack leading to a heap allocation function.
322 if (IsAlloc) {
323 if (IsLeaf) {
324 // For leaf nodes, set CalleeGUID to 0 without consulting
325 // IsPresentInProfile.
326 CalleeGUID = 0;
327 } else if (!IsPresentInProfile(CalleeGUID)) {
328 // In addition to the leaf case above, continue to set CalleeGUID
329 // to 0 as long as we don't see CalleeGUID in the profile.
330 CalleeGUID = 0;
331 } else {
332 // Once we encounter a callee that exists in the profile, stop
333 // setting CalleeGUID to 0.
334 IsAlloc = false;
335 }
336 }
337
338 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
339 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
340 CalleeName = CallerName;
341 IsLeaf = false;
342 }
343 }
344 }
345 }
346
347 // Sort each call list by the source location.
348 for (auto &[CallerGUID, CallList] : Calls) {
349 llvm::sort(CallList);
350 CallList.erase(llvm::unique(CallList), CallList.end());
351 }
352
353 return Calls;
354}
355
358 const TargetLibraryInfo &TLI) {
360
362 MemProfReader->getMemProfCallerCalleePairs();
364 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
365 return CallsFromProfile.contains(GUID);
366 });
367
368 // Compute an undrift map for each CallerGUID.
369 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
370 auto It = CallsFromProfile.find(CallerGUID);
371 if (It == CallsFromProfile.end())
372 continue;
373 const auto &ProfileAnchors = It->second;
374
375 LocToLocMap Matchings;
377 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
378 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
379 [[maybe_unused]] bool Inserted =
380 UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;
381
382 // The insertion must succeed because we visit each GUID exactly once.
383 assert(Inserted);
384 }
385
386 return UndriftMaps;
387}
388
389// Given a MemProfRecord, undrift all the source locations present in the
390// record in place.
391static void
393 memprof::MemProfRecord &MemProfRec) {
394 // Undrift a call stack in place.
395 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
396 for (auto &F : CallStack) {
397 auto I = UndriftMaps.find(F.Function);
398 if (I == UndriftMaps.end())
399 continue;
400 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
401 if (J == I->second.end())
402 continue;
403 auto &NewLoc = J->second;
404 F.LineOffset = NewLoc.LineOffset;
405 F.Column = NewLoc.Column;
406 }
407 };
408
409 for (auto &AS : MemProfRec.AllocSites)
410 UndriftCallStack(AS.CallStack);
411
412 for (auto &CS : MemProfRec.CallSites)
413 UndriftCallStack(CS.Frames);
414}
415
416// Helper function to process CalleeGuids and create value profile metadata
418 ArrayRef<GlobalValue::GUID> CalleeGuids) {
419 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
420 return;
421
422 // Prepare the vector of value data, initializing from any existing
423 // value-profile metadata present on the instruction so that we merge the
424 // new CalleeGuids into the existing entries.
426 uint64_t TotalCount = 0;
427
428 if (I.getMetadata(LLVMContext::MD_prof)) {
429 // Read all existing entries so we can merge them. Use a large
430 // MaxNumValueData to retrieve all existing entries.
431 VDs = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
432 /*MaxNumValueData=*/UINT32_MAX, TotalCount);
433 }
434
435 // Save the original size for use later in detecting whether any were added.
436 const size_t OriginalSize = VDs.size();
437
438 // Initialize the set of existing guids with the original list.
439 DenseSet<uint64_t> ExistingValues(
442 VDs, [](const InstrProfValueData &Entry) { return Entry.Value; }));
443
444 // Merge CalleeGuids into list of existing VDs, by appending any that are not
445 // already included.
446 VDs.reserve(OriginalSize + CalleeGuids.size());
447 for (auto G : CalleeGuids) {
448 if (!ExistingValues.insert(G).second)
449 continue;
450 InstrProfValueData NewEntry;
451 NewEntry.Value = G;
452 // For MemProf, we don't have actual call counts, so we assign
453 // a weight of 1 to each potential target.
454 // TODO: Consider making this weight configurable or increasing it to
455 // improve effectiveness for ICP.
456 NewEntry.Count = 1;
457 TotalCount += NewEntry.Count;
458 VDs.push_back(NewEntry);
459 }
460
461 // Update the VP metadata if we added any new callee GUIDs to the list.
462 assert(VDs.size() >= OriginalSize);
463 if (VDs.size() == OriginalSize)
464 return;
465
466 // First clear the existing !prof.
467 I.setMetadata(LLVMContext::MD_prof, nullptr);
468
469 // No need to sort the updated VDs as all appended entries have the same count
470 // of 1, which is no larger than any existing entries. The incoming list of
471 // CalleeGuids should already be deterministic for a given profile.
472 annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, VDs.size());
473}
474
475static void handleAllocSite(
476 Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
477 LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
478 const std::set<const AllocationInfo *> &AllocInfoSet,
479 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
480 // TODO: Remove this once the profile creation logic deduplicates contexts
481 // that are the same other than the IsInlineFrame bool. Until then, keep the
482 // largest.
483 DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
484 for (auto *AllocInfo : AllocInfoSet) {
485 auto FullStackId = computeFullStackId(AllocInfo->CallStack);
486 auto [It, Inserted] =
487 UniqueFullContextIdAllocInfo.insert({FullStackId, AllocInfo});
488 // If inserted entry, done.
489 if (Inserted)
490 continue;
491 // Keep the larger one, or the noncold one if they are the same size.
492 auto CurSize = It->second->Info.getTotalSize();
493 auto NewSize = AllocInfo->Info.getTotalSize();
494 if ((CurSize > NewSize) ||
495 (CurSize == NewSize &&
497 continue;
498 It->second = AllocInfo;
499 }
500 // We may match this instruction's location list to multiple MIB
501 // contexts. Add them to a Trie specialized for trimming the contexts to
502 // the minimal needed to disambiguate contexts with unique behavior.
503 CallStackTrie AllocTrie(&ORE, MaxColdSize);
504 uint64_t TotalSize = 0;
505 uint64_t TotalColdSize = 0;
506 for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
507 // Check the full inlined call stack against this one.
508 // If we found and thus matched all frames on the call, include
509 // this MIB.
511 InlinedCallStack)) {
512 NumOfMemProfMatchedAllocContexts++;
513 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
514 TotalSize += AllocInfo->Info.getTotalSize();
516 TotalColdSize += AllocInfo->Info.getTotalSize();
517 // Record information about the allocation if match info printing
518 // was requested.
520 assert(FullStackId != 0);
521 auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
522 FullStackId,
523 AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
524 // Always insert the new matched frame count, since it may differ.
525 Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
526 if (Inserted && PrintMatchedAllocStack)
527 Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
528 AllocInfo->CallStack.begin(),
529 AllocInfo->CallStack.end());
530 }
531 ORE.emit(
532 OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
533 << ore::NV("AllocationCall", CI) << " in function "
534 << ore::NV("Caller", CI->getFunction())
535 << " matched alloc context with alloc type "
537 << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize())
538 << " full context id " << ore::NV("Context", FullStackId)
539 << " frame count " << ore::NV("Frames", InlinedCallStack.size()));
540 }
541 }
542 // If the threshold for the percent of cold bytes is less than 100%,
543 // and not all bytes are cold, see if we should still hint this
544 // allocation as cold without context sensitivity.
545 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
546 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
547 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, "dominant");
548 return;
549 }
550
551 // We might not have matched any to the full inlined call stack.
552 // But if we did, create and attach metadata, or a function attribute if
553 // all contexts have identical profiled behavior.
554 if (!AllocTrie.empty()) {
555 NumOfMemProfMatchedAllocs++;
556 // MemprofMDAttached will be false if a function attribute was
557 // attached.
558 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
559 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
560 if (MemprofMDAttached) {
561 // Add callsite metadata for the instruction's location list so that
562 // it simpler later on to identify which part of the MIB contexts
563 // are from this particular instruction (including during inlining,
564 // when the callsite metadata will be updated appropriately).
565 // FIXME: can this be changed to strip out the matching stack
566 // context ids from the MIB contexts and not add any callsite
567 // metadata here to save space?
568 addCallsiteMetadata(I, InlinedCallStack, Ctx);
569 }
570 }
571}
572
573// Helper struct for maintaining refs to callsite data. As an alternative we
574// could store a pointer to the CallSiteInfo struct but we also need the frame
575// index. Using ArrayRefs instead makes it a little easier to read.
577 // Subset of frames for the corresponding CallSiteInfo.
579 // Potential targets for indirect calls.
581};
582
583static void handleCallSite(Instruction &I, const Function *CalledFunction,
584 ArrayRef<uint64_t> InlinedCallStack,
585 const std::vector<CallSiteEntry> &CallSiteEntries,
586 Module &M,
587 std::set<std::vector<uint64_t>> &MatchedCallSites,
589 auto &Ctx = M.getContext();
590 // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
591 // to support cases where the instuction's inlined frames match multiple call
592 // site entries, which can happen if the profile was collected from a binary
593 // where this instruction was eventually inlined into multiple callers.
595 bool CallsiteMDAdded = false;
596 for (const auto &CallSiteEntry : CallSiteEntries) {
597 // If we found and thus matched all frames on the call, create and
598 // attach call stack metadata.
600 InlinedCallStack)) {
601 NumOfMemProfMatchedCallSites++;
602 // Only need to find one with a matching call stack and add a single
603 // callsite metadata.
604 if (!CallsiteMDAdded) {
605 addCallsiteMetadata(I, InlinedCallStack, Ctx);
606
607 // Accumulate call site matching information upon request.
609 std::vector<uint64_t> CallStack;
610 append_range(CallStack, InlinedCallStack);
611 MatchedCallSites.insert(std::move(CallStack));
612 }
613 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
614 << ore::NV("CallSite", &I) << " in function "
615 << ore::NV("Caller", I.getFunction())
616 << " matched callsite with frame count "
617 << ore::NV("Frames", InlinedCallStack.size()));
618
619 // If this is a direct call, we're done.
620 if (CalledFunction)
621 break;
622 CallsiteMDAdded = true;
623 }
624
625 assert(!CalledFunction && "Didn't expect direct call");
626
627 // Collect Callee GUIDs from all matching CallSiteEntries.
630 }
631 }
632 // Try to attach indirect call metadata if possible.
633 addVPMetadata(M, I, CalleeGuids.getArrayRef());
634}
635
636static void
638 const TargetLibraryInfo &TLI,
639 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
640 std::set<std::vector<uint64_t>> &MatchedCallSites,
642 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
643 auto &Ctx = M.getContext();
644 // Previously we used getIRPGOFuncName() here. If F is local linkage,
645 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
646 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
647 // contain FileName's prefix. It caused local linkage function can't
648 // find MemProfRecord. So we use getName() now.
649 // 'unique-internal-linkage-names' can make MemProf work better for local
650 // linkage function.
651 auto FuncName = F.getName();
652 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
654 errs() << "MemProf: Function GUID " << FuncGUID << " is " << FuncName
655 << "\n";
656 std::optional<memprof::MemProfRecord> MemProfRec;
657 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
658 if (Err) {
659 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
660 auto Err = IPE.get();
661 bool SkipWarning = false;
662 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
663 << ": ");
665 NumOfMemProfMissing++;
666 SkipWarning = !PGOWarnMissing;
667 LLVM_DEBUG(dbgs() << "unknown function");
668 } else if (Err == instrprof_error::hash_mismatch) {
669 NumOfMemProfMismatch++;
670 SkipWarning =
673 (F.hasComdat() ||
675 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
676 }
677
678 if (SkipWarning)
679 return;
680
681 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
682 Twine(" Hash = ") + std::to_string(FuncGUID))
683 .str();
684
685 Ctx.diagnose(
686 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
687 });
688 return;
689 }
690
691 NumOfMemProfFunc++;
692
693 // If requested, undrfit MemProfRecord so that the source locations in it
694 // match those in the IR.
696 undriftMemProfRecord(UndriftMaps, *MemProfRec);
697
698 // Detect if there are non-zero column numbers in the profile. If not,
699 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
700 // columns in the IR). The profiled binary might have been built with
701 // column numbers disabled, for example.
702 bool ProfileHasColumns = false;
703
704 // Build maps of the location hash to all profile data with that leaf location
705 // (allocation info and the callsites).
706 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
707
708 // For the callsites we need to record slices of the frame array (see comments
709 // below where the map entries are added) along with their CalleeGuids.
710 std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
711 for (auto &AI : MemProfRec->AllocSites) {
712 NumOfMemProfAllocContextProfiles++;
713 // Associate the allocation info with the leaf frame. The later matching
714 // code will match any inlined call sequences in the IR with a longer prefix
715 // of call stack frames.
716 uint64_t StackId = computeStackId(AI.CallStack[0]);
717 LocHashToAllocInfo[StackId].insert(&AI);
718 ProfileHasColumns |= AI.CallStack[0].Column;
719 }
720 for (auto &CS : MemProfRec->CallSites) {
721 NumOfMemProfCallSiteProfiles++;
722 // Need to record all frames from leaf up to and including this function,
723 // as any of these may or may not have been inlined at this point.
724 unsigned Idx = 0;
725 for (auto &StackFrame : CS.Frames) {
726 uint64_t StackId = computeStackId(StackFrame);
727 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
728 // The callee guids for the slice containing all frames (due to the
729 // increment above Idx is now 1) comes from the CalleeGuids recorded in
730 // the CallSite. For the slices not containing the leaf-most frame, the
731 // callee guid is simply the function GUID of the prior frame.
732 LocHashToCallSites[StackId].push_back(
733 {FrameSlice, (Idx == 1 ? CS.CalleeGuids
735 CS.Frames[Idx - 2].Function))});
736
737 ProfileHasColumns |= StackFrame.Column;
738 // Once we find this function, we can stop recording.
739 if (StackFrame.Function == FuncGUID)
740 break;
741 }
742 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
743 }
744
745 auto GetOffset = [](const DILocation *DIL) {
746 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
747 0xffff;
748 };
749
750 // Now walk the instructions, looking up the associated profile data using
751 // debug locations.
752 for (auto &BB : F) {
753 for (auto &I : BB) {
754 if (I.isDebugOrPseudoInst())
755 continue;
756 // We are only interested in calls (allocation or interior call stack
757 // context calls).
758 auto *CI = dyn_cast<CallBase>(&I);
759 if (!CI)
760 continue;
761 auto *CalledFunction = CI->getCalledFunction();
762 if (CalledFunction && CalledFunction->isIntrinsic())
763 continue;
764 // List of call stack ids computed from the location hashes on debug
765 // locations (leaf to inlined at root).
766 SmallVector<uint64_t, 8> InlinedCallStack;
767 // Was the leaf location found in one of the profile maps?
768 bool LeafFound = false;
769 // If leaf was found in a map, iterators pointing to its location in both
770 // of the maps. It might exist in neither, one, or both (the latter case
771 // can happen because we don't currently have discriminators to
772 // distinguish the case when a single line/col maps to both an allocation
773 // and another callsite).
774 auto AllocInfoIter = LocHashToAllocInfo.end();
775 auto CallSitesIter = LocHashToCallSites.end();
776 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
777 DIL = DIL->getInlinedAt()) {
778 // Use C++ linkage name if possible. Need to compile with
779 // -fdebug-info-for-profiling to get linkage name.
780 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
781 if (Name.empty())
782 Name = DIL->getScope()->getSubprogram()->getName();
783 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
784 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
785 ProfileHasColumns ? DIL->getColumn() : 0);
786 // Check if we have found the profile's leaf frame. If yes, collect
787 // the rest of the call's inlined context starting here. If not, see if
788 // we find a match further up the inlined context (in case the profile
789 // was missing debug frames at the leaf).
790 if (!LeafFound) {
791 AllocInfoIter = LocHashToAllocInfo.find(StackId);
792 CallSitesIter = LocHashToCallSites.find(StackId);
793 if (AllocInfoIter != LocHashToAllocInfo.end() ||
794 CallSitesIter != LocHashToCallSites.end())
795 LeafFound = true;
796 }
797 if (LeafFound)
798 InlinedCallStack.push_back(StackId);
799 }
800 // If leaf not in either of the maps, skip inst.
801 if (!LeafFound)
802 continue;
803
804 // First add !memprof metadata from allocation info, if we found the
805 // instruction's leaf location in that map, and if the rest of the
806 // instruction's locations match the prefix Frame locations on an
807 // allocation context with the same leaf.
808 if (AllocInfoIter != LocHashToAllocInfo.end() &&
809 // Only consider allocations which support hinting.
810 isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
811 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
812 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
813 else if (CallSitesIter != LocHashToCallSites.end())
814 // Otherwise, add callsite metadata. If we reach here then we found the
815 // instruction's leaf location in the callsites map and not the
816 // allocation map.
817 handleCallSite(I, CalledFunction, InlinedCallStack,
818 CallSitesIter->second, M, MatchedCallSites, ORE);
819 }
820 }
821}
822
823MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
825 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
826 if (!FS)
827 this->FS = vfs::getRealFileSystem();
828}
829
831 // Return immediately if the module doesn't contain any function or global
832 // variables.
833 if (M.empty() && M.globals().empty())
834 return PreservedAnalyses::all();
835
836 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
837 auto &Ctx = M.getContext();
838 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
839 if (Error E = ReaderOrErr.takeError()) {
840 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
841 Ctx.diagnose(
842 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
843 });
844 return PreservedAnalyses::all();
845 }
846
847 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
848 std::move(ReaderOrErr.get());
849 if (!MemProfReader) {
850 Ctx.diagnose(DiagnosticInfoPGOProfile(
851 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
852 return PreservedAnalyses::all();
853 }
854
855 if (!MemProfReader->hasMemoryProfile()) {
856 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
857 "Not a memory profile"));
858 return PreservedAnalyses::all();
859 }
860
861 const bool Changed =
862 annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
863
864 // If the module doesn't contain any function, return after we process all
865 // global variables.
866 if (M.empty())
868
869 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
870
871 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
874 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
875
876 // Map from the stack hash of each matched allocation context in the function
877 // profiles to match info such as the total profiled size (bytes), allocation
878 // type, number of frames matched to the allocation itself, and the full array
879 // of call stack ids.
880 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
881
882 // Set of the matched call sites, each expressed as a sequence of an inline
883 // call stack.
884 std::set<std::vector<uint64_t>> MatchedCallSites;
885
886 uint64_t MaxColdSize = 0;
887 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
888 MaxColdSize = MemProfSum->getMaxColdTotalSize();
889
890 for (auto &F : M) {
891 if (F.isDeclaration())
892 continue;
893
894 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
895 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
896 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
897 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
898 }
899
901 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
902 for (auto Frames : Info.MatchedFramesSet) {
903 // TODO: To reduce verbosity, should we change the existing message
904 // so that we emit a list of matched frame counts in a single message
905 // about the context (instead of one message per frame count?
906 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
907 << " context with id " << Id << " has total profiled size "
908 << Info.TotalSize << " is matched with " << Frames << " frames";
910 errs() << " and call stack";
911 for (auto &F : Info.CallStack)
912 errs() << " " << computeStackId(F);
913 }
914 errs() << "\n";
915 }
916 }
917
918 for (const auto &CallStack : MatchedCallSites) {
919 errs() << "MemProf callsite match for inline call stack";
920 for (uint64_t StackId : CallStack)
921 errs() << " " << StackId;
922 errs() << "\n";
923 }
924 }
925
927}
928
929bool MemProfUsePass::annotateGlobalVariables(
930 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
931 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
932 return false;
933
934 if (!DataAccessProf) {
935 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
936 // FIXME: Add a diagnostic message without failing the compilation when
937 // data access profile payload is not available.
938 return false;
939 }
940 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
941
942 bool Changed = false;
943 // Iterate all global variables in the module and annotate them based on
944 // data access profiles. Note it's up to the linker to decide how to map input
945 // sections to output sections, and one conservative practice is to map
946 // unlikely-prefixed ones to unlikely output section, and map the rest
947 // (hot-prefixed or prefix-less) to the canonical output section.
948 for (GlobalVariable &GVar : M.globals()) {
949 assert(!GVar.getSectionPrefix().has_value() &&
950 "GVar shouldn't have section prefix yet");
951 auto Kind = llvm::memprof::getAnnotationKind(GVar);
954 continue;
955 }
956
957 StringRef Name = GVar.getName();
958 SymbolHandleRef Handle = SymbolHandleRef(Name);
959 // Skip string literals as their mangled names don't stay stable across
960 // binary releases.
962 if (Name.starts_with(".str"))
963 continue;
964
965 if (Name.starts_with(".str")) {
966 std::optional<uint64_t> Hash = getStringContentHash(GVar);
967 if (!Hash) {
968 LLVM_DEBUG(dbgs() << "Cannot compute content hash for string literal "
969 << Name << "\n");
970 continue;
971 }
972 Handle = SymbolHandleRef(Hash.value());
973 }
974
975 // DataAccessProfRecord's get* methods will canonicalize the name under the
976 // hood before looking it up, so optimizer doesn't need to do it.
977 std::optional<DataAccessProfRecord> Record =
978 DataAccessProf->getProfileRecord(Handle);
979 // Annotate a global variable as hot if it has non-zero sampled count, and
980 // annotate it as cold if it's seen in the profiled binary
981 // file but doesn't have any access sample.
982 // For logging, optimization remark emitter requires a llvm::Function, but
983 // it's not well defined how to associate a global variable with a function.
984 // So we just print out the static data section prefix in LLVM_DEBUG.
985 if (Record && Record->AccessCount > 0) {
986 ++NumOfMemProfHotGlobalVars;
987 Changed |= GVar.setSectionPrefix("hot");
988 LLVM_DEBUG(dbgs() << "Global variable " << Name
989 << " is annotated as hot\n");
990 } else if (DataAccessProf->isKnownColdSymbol(Handle)) {
991 ++NumOfMemProfColdGlobalVars;
992 Changed |= GVar.setSectionPrefix("unlikely");
993 Changed = true;
994 LLVM_DEBUG(dbgs() << "Global variable " << Name
995 << " is annotated as unlikely\n");
996 } else {
997 ++NumOfMemProfUnknownGlobalVars;
998 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
999 }
1000 }
1001
1002 return Changed;
1003}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static std::optional< uint64_t > getStringContentHash(const GlobalVariable &GVar)
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, AnnotationKind Kind)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > PrintMatchedAllocStack("memprof-print-matched-alloc-stack", cl::desc("Print full stack context for matched " "allocations with -memprof-print-match-info."), cl::Hidden, cl::init(false))
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::vector< CallSiteEntry > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites, OptimizationRemarkEmitter &ORE)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > PrintFunctionGuids("memprof-print-function-guids", cl::desc("Print function GUIDs computed for matching"), cl::Hidden, cl::init(false))
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
static cl::opt< bool > AnnotateStringLiteralSectionPrefix("memprof-annotate-string-literal-section-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the string literal data section prefix"))
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
AllocType
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:219
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:78
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition HashBuilder.h:64
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
instrprof_error get() const
Definition InstrProf.h:464
std::string message() const override
Return the error message as a string.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
@ Warning
Emits a warning if two values disagree.
Definition Module.h:124
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition MemProfUse.h:65
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
std::variant< StringRef, uint64_t > SymbolHandleRef
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
AnnotationKind getAnnotationKind(const GlobalVariable &GV)
Returns the annotation kind of the global variable GV.
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition MemProf.cpp:344
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
DiagnosticInfoOptimizationBase::Argument NV
uint64_t MD5Hash(const FunctionId &Obj)
Definition FunctionId.h:167
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition BLAKE3.h:35
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2136
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
cl::opt< bool > NoPGOWarnMismatchComdatWeak
std::set< unsigned > MatchedFramesSet
uint64_t TotalSize
std::vector< Frame > CallStack
AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
AllocationType AllocType
ArrayRef< GlobalValue::GUID > CalleeGuids
ArrayRef< Frame > Frames
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition MemProf.h:245
uint32_t LineOffset
Definition MemProf.h:250
llvm::SmallVector< CallSiteInfo > CallSites
Definition MemProf.h:522
llvm::SmallVector< AllocationInfo > AllocSites
Definition MemProf.h:520