LLVM 23.0.0git
MemProfUse.cpp
Go to the documentation of this file.
1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
24#include "llvm/IR/Function.h"
26#include "llvm/IR/Module.h"
31#include "llvm/Support/BLAKE3.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/Format.h"
36#include "llvm/Support/MD5.h"
39#include <map>
40#include <set>
41
42using namespace llvm;
43using namespace llvm::memprof;
44
45#define DEBUG_TYPE "memprof"
46
47namespace llvm {
52} // namespace llvm
53
54// By default disable matching of allocation profiles onto operator new that
55// already explicitly pass a hot/cold hint, since we don't currently
56// override these hints anyway.
58 "memprof-match-hot-cold-new",
60 "Match allocation profiles onto existing hot/cold operator new calls"),
61 cl::Hidden, cl::init(false));
62
63static cl::opt<bool>
64 ClPrintMemProfMatchInfo("memprof-print-match-info",
65 cl::desc("Print matching stats for each allocation "
66 "context in this module's profiles"),
67 cl::Hidden, cl::init(false));
68
70 "memprof-print-matched-alloc-stack",
71 cl::desc("Print full stack context for matched "
72 "allocations with -memprof-print-match-info."),
73 cl::Hidden, cl::init(false));
74
75static cl::opt<bool>
76 PrintFunctionGuids("memprof-print-function-guids",
77 cl::desc("Print function GUIDs computed for matching"),
78 cl::Hidden, cl::init(false));
79
80static cl::opt<bool>
81 SalvageStaleProfile("memprof-salvage-stale-profile",
82 cl::desc("Salvage stale MemProf profile"),
83 cl::init(false), cl::Hidden);
84
86 "memprof-attach-calleeguids",
88 "Attach calleeguids as value profile metadata for indirect calls."),
89 cl::init(true), cl::Hidden);
90
92 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
93 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
94
96 "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
97 cl::desc("If true, annotate the static data section prefix"));
98
99// Matching statistics
100STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
101STATISTIC(NumOfMemProfMismatch,
102 "Number of functions having mismatched memory profile hash.");
103STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
104STATISTIC(NumOfMemProfAllocContextProfiles,
105 "Number of alloc contexts in memory profile.");
106STATISTIC(NumOfMemProfCallSiteProfiles,
107 "Number of callsites in memory profile.");
108STATISTIC(NumOfMemProfMatchedAllocContexts,
109 "Number of matched memory profile alloc contexts.");
110STATISTIC(NumOfMemProfMatchedAllocs,
111 "Number of matched memory profile allocs.");
112STATISTIC(NumOfMemProfMatchedCallSites,
113 "Number of matched memory profile callsites.");
114STATISTIC(NumOfMemProfHotGlobalVars,
115 "Number of global vars annotated with 'hot' section prefix.");
116STATISTIC(NumOfMemProfColdGlobalVars,
117 "Number of global vars annotated with 'unlikely' section prefix.");
118STATISTIC(NumOfMemProfUnknownGlobalVars,
119 "Number of global vars with unknown hotness (no section prefix).");
120STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
121 "Number of global vars with user-specified section (not annotated).");
122
124 ArrayRef<uint64_t> InlinedCallStack,
125 LLVMContext &Ctx) {
126 I.setMetadata(LLVMContext::MD_callsite,
127 buildCallstackMetadata(InlinedCallStack, Ctx));
128}
129
131 uint32_t Column) {
134 HashBuilder.add(Function, LineOffset, Column);
136 uint64_t Id;
137 std::memcpy(&Id, Hash.data(), sizeof(Hash));
138 return Id;
139}
140
144
146 return getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
147 AllocInfo->Info.getAllocCount(),
148 AllocInfo->Info.getTotalLifetime());
149}
150
153 uint64_t FullStackId) {
154 SmallVector<uint64_t> StackIds;
155 for (const auto &StackFrame : AllocInfo->CallStack)
156 StackIds.push_back(computeStackId(StackFrame));
158 std::vector<ContextTotalSize> ContextSizeInfo;
160 auto TotalSize = AllocInfo->Info.getTotalSize();
161 assert(TotalSize);
162 assert(FullStackId != 0);
163 ContextSizeInfo.push_back({FullStackId, TotalSize});
164 }
165 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
166 return AllocType;
167}
168
169// Return true if InlinedCallStack, computed from a call instruction's debug
170// info, is a prefix of ProfileCallStack, a list of Frames from profile data
171// (either the allocation data or a callsite).
172static bool
174 ArrayRef<uint64_t> InlinedCallStack) {
175 return ProfileCallStack.size() >= InlinedCallStack.size() &&
176 llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),
177 InlinedCallStack, [](const Frame &F, uint64_t StackId) {
178 return computeStackId(F) == StackId;
179 });
180}
181
182static bool isAllocationWithHotColdVariant(const Function *Callee,
183 const TargetLibraryInfo &TLI) {
184 if (!Callee)
185 return false;
186 LibFunc Func;
187 if (!TLI.getLibFunc(*Callee, Func))
188 return false;
189 switch (Func) {
190 case LibFunc_Znwm:
191 case LibFunc_ZnwmRKSt9nothrow_t:
192 case LibFunc_ZnwmSt11align_val_t:
193 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
194 case LibFunc_Znam:
195 case LibFunc_ZnamRKSt9nothrow_t:
196 case LibFunc_ZnamSt11align_val_t:
197 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
198 case LibFunc_size_returning_new:
199 case LibFunc_size_returning_new_aligned:
200 return true;
201 case LibFunc_Znwm12__hot_cold_t:
202 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
203 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
204 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
205 case LibFunc_Znam12__hot_cold_t:
206 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
207 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
208 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
209 case LibFunc_size_returning_new_hot_cold:
210 case LibFunc_size_returning_new_aligned_hot_cold:
212 default:
213 return false;
214 }
215}
216
218 AnnotationKind Kind) {
220 "Should not handle AnnotationOK here");
221 SmallString<32> Reason;
222 switch (Kind) {
224 ++NumOfMemProfExplicitSectionGlobalVars;
225 Reason.append("explicit section name");
226 break;
228 Reason.append("linker declaration");
229 break;
231 Reason.append("name starts with `llvm.`");
232 break;
233 default:
234 llvm_unreachable("Unexpected annotation kind");
235 }
236 LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
237 << Reason << ".\n");
238}
239
240// Computes the LLVM version of MD5 hash for the content of a string
241// literal.
242static std::optional<uint64_t>
244 auto *Initializer = GVar.getInitializer();
245 if (!Initializer)
246 return std::nullopt;
247 if (auto *C = dyn_cast<ConstantDataSequential>(Initializer))
248 if (C->isString()) {
249 // Note the hash computed for the literal would include the null byte.
250 return llvm::MD5Hash(C->getAsString());
251 }
252 return std::nullopt;
253}
254
255// Structure for tracking info about matched allocation contexts for use with
256// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
258 // Total size in bytes of matched context.
260 // Matched allocation's type.
262 // Number of frames matched to the allocation itself (values will be >1 in
263 // cases where allocation was already inlined). Use a set because there can
264 // be multiple inlined instances and each may have a different inline depth.
265 // Use std::set to iterate in sorted order when printing.
266 std::set<unsigned> MatchedFramesSet;
267 // The full call stack of the allocation, for cases where requested via
268 // -memprof-print-matched-alloc-stack.
269 std::vector<Frame> CallStack;
270
271 // Caller responsible for inserting the matched frames and the call stack when
272 // appropriate.
275};
276
279 function_ref<bool(uint64_t)> IsPresentInProfile) {
281
282 auto GetOffset = [](const DILocation *DIL) {
283 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
284 0xffff;
285 };
286
287 for (Function &F : M) {
288 if (F.isDeclaration())
289 continue;
290
291 for (auto &BB : F) {
292 for (auto &I : BB) {
294 continue;
295
296 auto *CB = dyn_cast<CallBase>(&I);
297 auto *CalledFunction = CB->getCalledFunction();
298 // Disregard indirect calls and intrinsics.
299 if (!CalledFunction || CalledFunction->isIntrinsic())
300 continue;
301
302 StringRef CalleeName = CalledFunction->getName();
303 // True if we are calling a heap allocation function that supports
304 // hot/cold variants.
305 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
306 // True for the first iteration below, indicating that we are looking at
307 // a leaf node.
308 bool IsLeaf = true;
309 for (const DILocation *DIL = I.getDebugLoc(); DIL;
310 DIL = DIL->getInlinedAt()) {
311 StringRef CallerName = DIL->getSubprogramLinkageName();
312 assert(!CallerName.empty() &&
313 "Be sure to enable -fdebug-info-for-profiling");
314 uint64_t CallerGUID = memprof::getGUID(CallerName);
315 uint64_t CalleeGUID = memprof::getGUID(CalleeName);
316 // Pretend that we are calling a function with GUID == 0 if we are
317 // in the inline stack leading to a heap allocation function.
318 if (IsAlloc) {
319 if (IsLeaf) {
320 // For leaf nodes, set CalleeGUID to 0 without consulting
321 // IsPresentInProfile.
322 CalleeGUID = 0;
323 } else if (!IsPresentInProfile(CalleeGUID)) {
324 // In addition to the leaf case above, continue to set CalleeGUID
325 // to 0 as long as we don't see CalleeGUID in the profile.
326 CalleeGUID = 0;
327 } else {
328 // Once we encounter a callee that exists in the profile, stop
329 // setting CalleeGUID to 0.
330 IsAlloc = false;
331 }
332 }
333
334 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
335 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
336 CalleeName = CallerName;
337 IsLeaf = false;
338 }
339 }
340 }
341 }
342
343 // Sort each call list by the source location.
344 for (auto &[CallerGUID, CallList] : Calls) {
345 llvm::sort(CallList);
346 CallList.erase(llvm::unique(CallList), CallList.end());
347 }
348
349 return Calls;
350}
351
354 const TargetLibraryInfo &TLI) {
356
358 MemProfReader->getMemProfCallerCalleePairs();
360 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
361 return CallsFromProfile.contains(GUID);
362 });
363
364 // Compute an undrift map for each CallerGUID.
365 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
366 auto It = CallsFromProfile.find(CallerGUID);
367 if (It == CallsFromProfile.end())
368 continue;
369 const auto &ProfileAnchors = It->second;
370
371 LocToLocMap Matchings;
373 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
374 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
375 [[maybe_unused]] bool Inserted =
376 UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;
377
378 // The insertion must succeed because we visit each GUID exactly once.
379 assert(Inserted);
380 }
381
382 return UndriftMaps;
383}
384
385// Given a MemProfRecord, undrift all the source locations present in the
386// record in place.
387static void
389 memprof::MemProfRecord &MemProfRec) {
390 // Undrift a call stack in place.
391 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
392 for (auto &F : CallStack) {
393 auto I = UndriftMaps.find(F.Function);
394 if (I == UndriftMaps.end())
395 continue;
396 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
397 if (J == I->second.end())
398 continue;
399 auto &NewLoc = J->second;
400 F.LineOffset = NewLoc.LineOffset;
401 F.Column = NewLoc.Column;
402 }
403 };
404
405 for (auto &AS : MemProfRec.AllocSites)
406 UndriftCallStack(AS.CallStack);
407
408 for (auto &CS : MemProfRec.CallSites)
409 UndriftCallStack(CS.Frames);
410}
411
412// Helper function to process CalleeGuids and create value profile metadata
414 ArrayRef<GlobalValue::GUID> CalleeGuids) {
415 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
416 return;
417
418 // Prepare the vector of value data, initializing from any existing
419 // value-profile metadata present on the instruction so that we merge the
420 // new CalleeGuids into the existing entries.
422 uint64_t TotalCount = 0;
423
424 if (I.getMetadata(LLVMContext::MD_prof)) {
425 // Read all existing entries so we can merge them. Use a large
426 // MaxNumValueData to retrieve all existing entries.
427 VDs = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
428 /*MaxNumValueData=*/UINT32_MAX, TotalCount);
429 }
430
431 // Save the original size for use later in detecting whether any were added.
432 const size_t OriginalSize = VDs.size();
433
434 // Initialize the set of existing guids with the original list.
435 DenseSet<uint64_t> ExistingValues(
438 VDs, [](const InstrProfValueData &Entry) { return Entry.Value; }));
439
440 // Merge CalleeGuids into list of existing VDs, by appending any that are not
441 // already included.
442 VDs.reserve(OriginalSize + CalleeGuids.size());
443 for (auto G : CalleeGuids) {
444 if (!ExistingValues.insert(G).second)
445 continue;
446 InstrProfValueData NewEntry;
447 NewEntry.Value = G;
448 // For MemProf, we don't have actual call counts, so we assign
449 // a weight of 1 to each potential target.
450 // TODO: Consider making this weight configurable or increasing it to
451 // improve effectiveness for ICP.
452 NewEntry.Count = 1;
453 TotalCount += NewEntry.Count;
454 VDs.push_back(NewEntry);
455 }
456
457 // Update the VP metadata if we added any new callee GUIDs to the list.
458 assert(VDs.size() >= OriginalSize);
459 if (VDs.size() == OriginalSize)
460 return;
461
462 // First clear the existing !prof.
463 I.setMetadata(LLVMContext::MD_prof, nullptr);
464
465 // No need to sort the updated VDs as all appended entries have the same count
466 // of 1, which is no larger than any existing entries. The incoming list of
467 // CalleeGuids should already be deterministic for a given profile.
468 annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, VDs.size());
469}
470
471static void handleAllocSite(
472 Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
473 LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
474 const std::set<const AllocationInfo *> &AllocInfoSet,
475 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
476 // TODO: Remove this once the profile creation logic deduplicates contexts
477 // that are the same other than the IsInlineFrame bool. Until then, keep the
478 // largest.
479 DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
480 for (auto *AllocInfo : AllocInfoSet) {
481 auto FullStackId = computeFullStackId(AllocInfo->CallStack);
482 auto [It, Inserted] =
483 UniqueFullContextIdAllocInfo.insert({FullStackId, AllocInfo});
484 // If inserted entry, done.
485 if (Inserted)
486 continue;
487 // Keep the larger one, or the noncold one if they are the same size.
488 auto CurSize = It->second->Info.getTotalSize();
489 auto NewSize = AllocInfo->Info.getTotalSize();
490 if ((CurSize > NewSize) ||
491 (CurSize == NewSize &&
493 continue;
494 It->second = AllocInfo;
495 }
496 // We may match this instruction's location list to multiple MIB
497 // contexts. Add them to a Trie specialized for trimming the contexts to
498 // the minimal needed to disambiguate contexts with unique behavior.
499 CallStackTrie AllocTrie(&ORE, MaxColdSize);
500 uint64_t TotalSize = 0;
501 uint64_t TotalColdSize = 0;
502 for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
503 // Check the full inlined call stack against this one.
504 // If we found and thus matched all frames on the call, include
505 // this MIB.
507 InlinedCallStack)) {
508 NumOfMemProfMatchedAllocContexts++;
509 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
510 TotalSize += AllocInfo->Info.getTotalSize();
512 TotalColdSize += AllocInfo->Info.getTotalSize();
513 // Record information about the allocation if match info printing
514 // was requested.
516 assert(FullStackId != 0);
517 auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
518 FullStackId,
519 AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
520 // Always insert the new matched frame count, since it may differ.
521 Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
522 if (Inserted && PrintMatchedAllocStack)
523 Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
524 AllocInfo->CallStack.begin(),
525 AllocInfo->CallStack.end());
526 }
527 ORE.emit(
528 OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
529 << ore::NV("AllocationCall", CI) << " in function "
530 << ore::NV("Caller", CI->getFunction())
531 << " matched alloc context with alloc type "
533 << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize())
534 << " full context id " << ore::NV("Context", FullStackId)
535 << " frame count " << ore::NV("Frames", InlinedCallStack.size()));
536 }
537 }
538 // If the threshold for the percent of cold bytes is less than 100%,
539 // and not all bytes are cold, see if we should still hint this
540 // allocation as cold without context sensitivity.
541 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
542 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
543 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, "dominant");
544 return;
545 }
546
547 // We might not have matched any to the full inlined call stack.
548 // But if we did, create and attach metadata, or a function attribute if
549 // all contexts have identical profiled behavior.
550 if (!AllocTrie.empty()) {
551 NumOfMemProfMatchedAllocs++;
552 // MemprofMDAttached will be false if a function attribute was
553 // attached.
554 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
555 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
556 if (MemprofMDAttached) {
557 // Add callsite metadata for the instruction's location list so that
558 // it simpler later on to identify which part of the MIB contexts
559 // are from this particular instruction (including during inlining,
560 // when the callsite metadata will be updated appropriately).
561 // FIXME: can this be changed to strip out the matching stack
562 // context ids from the MIB contexts and not add any callsite
563 // metadata here to save space?
564 addCallsiteMetadata(I, InlinedCallStack, Ctx);
565 }
566 }
567}
568
569// Helper struct for maintaining refs to callsite data. As an alternative we
570// could store a pointer to the CallSiteInfo struct but we also need the frame
571// index. Using ArrayRefs instead makes it a little easier to read.
573 // Subset of frames for the corresponding CallSiteInfo.
575 // Potential targets for indirect calls.
577};
578
579static void handleCallSite(Instruction &I, const Function *CalledFunction,
580 ArrayRef<uint64_t> InlinedCallStack,
581 const std::vector<CallSiteEntry> &CallSiteEntries,
582 Module &M,
583 std::set<std::vector<uint64_t>> &MatchedCallSites,
585 auto &Ctx = M.getContext();
586 // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
587 // to support cases where the instuction's inlined frames match multiple call
588 // site entries, which can happen if the profile was collected from a binary
589 // where this instruction was eventually inlined into multiple callers.
591 bool CallsiteMDAdded = false;
592 for (const auto &CallSiteEntry : CallSiteEntries) {
593 // If we found and thus matched all frames on the call, create and
594 // attach call stack metadata.
596 InlinedCallStack)) {
597 NumOfMemProfMatchedCallSites++;
598 // Only need to find one with a matching call stack and add a single
599 // callsite metadata.
600 if (!CallsiteMDAdded) {
601 addCallsiteMetadata(I, InlinedCallStack, Ctx);
602
603 // Accumulate call site matching information upon request.
605 std::vector<uint64_t> CallStack;
606 append_range(CallStack, InlinedCallStack);
607 MatchedCallSites.insert(std::move(CallStack));
608 }
609 OptimizationRemark Remark(DEBUG_TYPE, "MemProfUse", &I);
610 Remark << ore::NV("CallSite", &I) << " in function "
611 << ore::NV("Caller", I.getFunction())
612 << " matched callsite with frame count "
613 << ore::NV("Frames", InlinedCallStack.size())
614 << " and stack ids";
615 for (uint64_t StackId : InlinedCallStack)
616 Remark << " " << ore::NV("StackId", StackId);
617 ORE.emit(Remark);
618
619 // If this is a direct call, we're done.
620 if (CalledFunction)
621 break;
622 CallsiteMDAdded = true;
623 }
624
625 assert(!CalledFunction && "Didn't expect direct call");
626
627 // Collect Callee GUIDs from all matching CallSiteEntries.
630 }
631 }
632 // Try to attach indirect call metadata if possible.
633 addVPMetadata(M, I, CalleeGuids.getArrayRef());
634}
635
636// Dump inline call stack for debugging purposes.
639 DenseSet<uint64_t> &SeenFrames,
640 DenseSet<uint64_t> &SeenStacks,
641 bool ProfileHasColumns) {
642 auto GetOffset = [](const DILocation *DIL) {
643 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
644 0xffff;
645 };
646
647 // Dump frame info. Frames are deduplicated using FrameID.
648 std::string CallStack;
649 raw_string_ostream CallStackOS(CallStack);
650 bool First = true;
651 for (const DILocation *DIL = I.getDebugLoc(); DIL;
652 DIL = DIL->getInlinedAt()) {
653 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
654 if (Name.empty())
655 Name = DIL->getScope()->getSubprogram()->getName();
656 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
657 uint64_t FrameID = computeStackId(CalleeGUID, GetOffset(DIL),
658 ProfileHasColumns ? DIL->getColumn() : 0);
659 if (SeenFrames.insert(FrameID).second) {
660 std::string DictMsg;
661 raw_string_ostream DictOS(DictMsg);
662 DictOS << "frame: " << FrameID << " " << Name << ":" << GetOffset(DIL)
663 << ":" << (ProfileHasColumns ? DIL->getColumn() : 0);
664 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "MemProfUse", CI)
665 << DictOS.str());
666 }
667
668 if (First)
669 First = false;
670 else
671 CallStackOS << ",";
672 CallStackOS << FrameID;
673 }
674
675 // Dump inline call stack info. Stacks are deduplicated using StackHash.
676 uint64_t StackHash = llvm::MD5Hash(CallStack);
677 if (SeenStacks.insert(StackHash).second) {
678 std::string Msg;
679 raw_string_ostream OS(Msg);
680 OS << "inline call stack: " << CallStack;
681 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "MemProfUse", CI)
682 << OS.str());
683 }
684}
685
686static void
688 const TargetLibraryInfo &TLI,
689 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
690 std::set<std::vector<uint64_t>> &MatchedCallSites,
692 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
693 DenseSet<uint64_t> &SeenStacks, DenseSet<uint64_t> &SeenFrames) {
694 auto &Ctx = M.getContext();
695 // Previously we used getIRPGOFuncName() here. If F is local linkage,
696 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
697 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
698 // contain FileName's prefix. It caused local linkage function can't
699 // find MemProfRecord. So we use getName() now.
700 // 'unique-internal-linkage-names' can make MemProf work better for local
701 // linkage function.
702 auto FuncName = F.getName();
703 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
705 errs() << "MemProf: Function GUID " << FuncGUID << " is " << FuncName
706 << "\n";
707 std::optional<memprof::MemProfRecord> MemProfRec;
708 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
709 if (Err) {
710 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
711 auto Err = IPE.get();
712 bool SkipWarning = false;
713 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
714 << ": ");
716 NumOfMemProfMissing++;
717 SkipWarning = !PGOWarnMissing;
718 LLVM_DEBUG(dbgs() << "unknown function");
719 } else if (Err == instrprof_error::hash_mismatch) {
720 NumOfMemProfMismatch++;
721 SkipWarning =
724 (F.hasComdat() ||
726 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
727 }
728
729 if (SkipWarning)
730 return;
731
732 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
733 Twine(" Hash = ") + std::to_string(FuncGUID))
734 .str();
735
736 Ctx.diagnose(
737 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
738 });
739 return;
740 }
741
742 NumOfMemProfFunc++;
743
744 // If requested, undrfit MemProfRecord so that the source locations in it
745 // match those in the IR.
747 undriftMemProfRecord(UndriftMaps, *MemProfRec);
748
749 // Detect if there are non-zero column numbers in the profile. If not,
750 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
751 // columns in the IR). The profiled binary might have been built with
752 // column numbers disabled, for example.
753 bool ProfileHasColumns = false;
754
755 // Build maps of the location hash to all profile data with that leaf location
756 // (allocation info and the callsites).
757 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
758
759 // For the callsites we need to record slices of the frame array (see comments
760 // below where the map entries are added) along with their CalleeGuids.
761 std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
762 for (auto &AI : MemProfRec->AllocSites) {
763 NumOfMemProfAllocContextProfiles++;
764 // Associate the allocation info with the leaf frame. The later matching
765 // code will match any inlined call sequences in the IR with a longer prefix
766 // of call stack frames.
767 uint64_t StackId = computeStackId(AI.CallStack[0]);
768 LocHashToAllocInfo[StackId].insert(&AI);
769 ProfileHasColumns |= AI.CallStack[0].Column;
770 }
771 for (auto &CS : MemProfRec->CallSites) {
772 NumOfMemProfCallSiteProfiles++;
773 // Need to record all frames from leaf up to and including this function,
774 // as any of these may or may not have been inlined at this point.
775 unsigned Idx = 0;
776 for (auto &StackFrame : CS.Frames) {
777 uint64_t StackId = computeStackId(StackFrame);
778 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
779 // The callee guids for the slice containing all frames (due to the
780 // increment above Idx is now 1) comes from the CalleeGuids recorded in
781 // the CallSite. For the slices not containing the leaf-most frame, the
782 // callee guid is simply the function GUID of the prior frame.
783 LocHashToCallSites[StackId].push_back(
784 {FrameSlice, (Idx == 1 ? CS.CalleeGuids
786 CS.Frames[Idx - 2].Function))});
787
788 ProfileHasColumns |= StackFrame.Column;
789 // Once we find this function, we can stop recording.
790 if (StackFrame.Function == FuncGUID)
791 break;
792 }
793 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
794 }
795
796 auto GetOffset = [](const DILocation *DIL) {
797 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
798 0xffff;
799 };
800
801 // Now walk the instructions, looking up the associated profile data using
802 // debug locations.
803 for (auto &BB : F) {
804 for (auto &I : BB) {
805 if (I.isDebugOrPseudoInst())
806 continue;
807 // We are only interested in calls (allocation or interior call stack
808 // context calls).
809 auto *CI = dyn_cast<CallBase>(&I);
810 if (!CI)
811 continue;
812 auto *CalledFunction = CI->getCalledFunction();
813 if (CalledFunction && CalledFunction->isIntrinsic())
814 continue;
815
817 dumpInlineCallStack(I, CI, ORE, SeenFrames, SeenStacks,
818 ProfileHasColumns);
819
820 // List of call stack ids computed from the location hashes on debug
821 // locations (leaf to inlined at root).
822 SmallVector<uint64_t, 8> InlinedCallStack;
823 // Was the leaf location found in one of the profile maps?
824 bool LeafFound = false;
825 // If leaf was found in a map, iterators pointing to its location in both
826 // of the maps. It might exist in neither, one, or both (the latter case
827 // can happen because we don't currently have discriminators to
828 // distinguish the case when a single line/col maps to both an allocation
829 // and another callsite).
830 auto AllocInfoIter = LocHashToAllocInfo.end();
831 auto CallSitesIter = LocHashToCallSites.end();
832 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
833 DIL = DIL->getInlinedAt()) {
834 // Use C++ linkage name if possible. Need to compile with
835 // -fdebug-info-for-profiling to get linkage name.
836 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
837 if (Name.empty())
838 Name = DIL->getScope()->getSubprogram()->getName();
839 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
840 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
841 ProfileHasColumns ? DIL->getColumn() : 0);
842 // Check if we have found the profile's leaf frame. If yes, collect
843 // the rest of the call's inlined context starting here. If not, see if
844 // we find a match further up the inlined context (in case the profile
845 // was missing debug frames at the leaf).
846 if (!LeafFound) {
847 AllocInfoIter = LocHashToAllocInfo.find(StackId);
848 CallSitesIter = LocHashToCallSites.find(StackId);
849 if (AllocInfoIter != LocHashToAllocInfo.end() ||
850 CallSitesIter != LocHashToCallSites.end())
851 LeafFound = true;
852 }
853 if (LeafFound)
854 InlinedCallStack.push_back(StackId);
855 }
856 // If leaf not in either of the maps, skip inst.
857 if (!LeafFound)
858 continue;
859
860 // First add !memprof metadata from allocation info, if we found the
861 // instruction's leaf location in that map, and if the rest of the
862 // instruction's locations match the prefix Frame locations on an
863 // allocation context with the same leaf.
864 if (AllocInfoIter != LocHashToAllocInfo.end() &&
865 // Only consider allocations which support hinting.
866 isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
867 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
868 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
869 else if (CallSitesIter != LocHashToCallSites.end())
870 // Otherwise, add callsite metadata. If we reach here then we found the
871 // instruction's leaf location in the callsites map and not the
872 // allocation map.
873 handleCallSite(I, CalledFunction, InlinedCallStack,
874 CallSitesIter->second, M, MatchedCallSites, ORE);
875 }
876 }
877}
878
879MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
881 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
882 if (!FS)
883 this->FS = vfs::getRealFileSystem();
884}
885
887 // Return immediately if the module doesn't contain any function or global
888 // variables.
889 if (M.empty() && M.globals().empty())
890 return PreservedAnalyses::all();
891
892 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
893 auto &Ctx = M.getContext();
894 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
895 if (Error E = ReaderOrErr.takeError()) {
896 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
897 Ctx.diagnose(
898 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
899 });
900 return PreservedAnalyses::all();
901 }
902
903 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
904 std::move(ReaderOrErr.get());
905 if (!MemProfReader) {
906 Ctx.diagnose(DiagnosticInfoPGOProfile(
907 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
908 return PreservedAnalyses::all();
909 }
910
911 if (!MemProfReader->hasMemoryProfile()) {
912 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
913 "Not a memory profile"));
914 return PreservedAnalyses::all();
915 }
916
917 const bool Changed =
918 annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
919
920 // If the module doesn't contain any function, return after we process all
921 // global variables.
922 if (M.empty())
924
925 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
926
927 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
930 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
931
932 // Map from the stack hash of each matched allocation context in the function
933 // profiles to match info such as the total profiled size (bytes), allocation
934 // type, number of frames matched to the allocation itself, and the full array
935 // of call stack ids.
936 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
937
938 // Set of the matched call sites, each expressed as a sequence of an inline
939 // call stack.
940 std::set<std::vector<uint64_t>> MatchedCallSites;
941
942 DenseSet<uint64_t> SeenStacks;
943 DenseSet<uint64_t> SeenFrames;
944
945 uint64_t MaxColdSize = 0;
946 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
947 MaxColdSize = MemProfSum->getMaxColdTotalSize();
948
949 for (auto &F : M) {
950 if (F.isDeclaration())
951 continue;
952
953 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
954 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
955 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
956 MatchedCallSites, UndriftMaps, ORE, MaxColdSize, SeenStacks,
957 SeenFrames);
958 }
959
961 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
962 for (auto Frames : Info.MatchedFramesSet) {
963 // TODO: To reduce verbosity, should we change the existing message
964 // so that we emit a list of matched frame counts in a single message
965 // about the context (instead of one message per frame count?
966 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
967 << " context with id " << Id << " has total profiled size "
968 << Info.TotalSize << " is matched with " << Frames << " frames";
970 errs() << " and call stack";
971 for (auto &F : Info.CallStack)
972 errs() << " " << computeStackId(F);
973 }
974 errs() << "\n";
975 }
976 }
977
978 for (const auto &CallStack : MatchedCallSites) {
979 errs() << "MemProf callsite match for inline call stack";
980 for (uint64_t StackId : CallStack)
981 errs() << " " << StackId;
982 errs() << "\n";
983 }
984 }
985
987}
988
989bool MemProfUsePass::annotateGlobalVariables(
990 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
991 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
992 return false;
993
994 if (!DataAccessProf) {
995 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
996 // FIXME: Add a diagnostic message without failing the compilation when
997 // data access profile payload is not available.
998 return false;
999 }
1000 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
1001
1002 bool Changed = false;
1003 // Iterate all global variables in the module and annotate them based on
1004 // data access profiles. Note it's up to the linker to decide how to map input
1005 // sections to output sections, and one conservative practice is to map
1006 // unlikely-prefixed ones to unlikely output section, and map the rest
1007 // (hot-prefixed or prefix-less) to the canonical output section.
1008 for (GlobalVariable &GVar : M.globals()) {
1009 assert(!GVar.getSectionPrefix().has_value() &&
1010 "GVar shouldn't have section prefix yet");
1011 auto Kind = llvm::memprof::getAnnotationKind(GVar);
1014 continue;
1015 }
1016
1017 StringRef Name = GVar.getName();
1018 SymbolHandleRef Handle = SymbolHandleRef(Name);
1019 // Skip string literals as their mangled names don't stay stable across
1020 // binary releases.
1022 if (Name.starts_with(".str"))
1023 continue;
1024
1025 if (Name.starts_with(".str")) {
1026 std::optional<uint64_t> Hash = getStringContentHash(GVar);
1027 if (!Hash) {
1028 LLVM_DEBUG(dbgs() << "Cannot compute content hash for string literal "
1029 << Name << "\n");
1030 continue;
1031 }
1032 Handle = SymbolHandleRef(Hash.value());
1033 }
1034
1035 // DataAccessProfRecord's get* methods will canonicalize the name under the
1036 // hood before looking it up, so optimizer doesn't need to do it.
1037 std::optional<DataAccessProfRecord> Record =
1038 DataAccessProf->getProfileRecord(Handle);
1039 // Annotate a global variable as hot if it has non-zero sampled count, and
1040 // annotate it as cold if it's seen in the profiled binary
1041 // file but doesn't have any access sample.
1042 // For logging, optimization remark emitter requires a llvm::Function, but
1043 // it's not well defined how to associate a global variable with a function.
1044 // So we just print out the static data section prefix in LLVM_DEBUG.
1045 if (Record && Record->AccessCount > 0) {
1046 ++NumOfMemProfHotGlobalVars;
1047 Changed |= GVar.setSectionPrefix("hot");
1048 LLVM_DEBUG(dbgs() << "Global variable " << Name
1049 << " is annotated as hot\n");
1050 } else if (DataAccessProf->isKnownColdSymbol(Handle)) {
1051 ++NumOfMemProfColdGlobalVars;
1052 Changed |= GVar.setSectionPrefix("unlikely");
1053 Changed = true;
1054 LLVM_DEBUG(dbgs() << "Global variable " << Name
1055 << " is annotated as unlikely\n");
1056 } else {
1057 ++NumOfMemProfUnknownGlobalVars;
1058 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
1059 }
1060 }
1061
1062 return Changed;
1063}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseSet and SmallDenseSet classes.
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static std::optional< uint64_t > getStringContentHash(const GlobalVariable &GVar)
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, AnnotationKind Kind)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > PrintMatchedAllocStack("memprof-print-matched-alloc-stack", cl::desc("Print full stack context for matched " "allocations with -memprof-print-match-info."), cl::Hidden, cl::init(false))
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::vector< CallSiteEntry > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites, OptimizationRemarkEmitter &ORE)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > PrintFunctionGuids("memprof-print-function-guids", cl::desc("Print function GUIDs computed for matching"), cl::Hidden, cl::init(false))
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, DenseSet< uint64_t > &SeenStacks, DenseSet< uint64_t > &SeenFrames)
static void dumpInlineCallStack(Instruction &I, CallBase *CI, OptimizationRemarkEmitter &ORE, DenseSet< uint64_t > &SeenFrames, DenseSet< uint64_t > &SeenStacks, bool ProfileHasColumns)
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
AllocType
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:219
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:80
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition HashBuilder.h:64
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
instrprof_error get() const
Definition InstrProf.h:464
std::string message() const override
Return the error message as a string.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
@ Warning
Emits a warning if two values disagree.
Definition Module.h:124
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition MemProfUse.h:65
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
std::variant< StringRef, uint64_t > SymbolHandleRef
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
AnnotationKind getAnnotationKind(const GlobalVariable &GV)
Returns the annotation kind of the global variable GV.
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition MemProf.cpp:344
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
DiagnosticInfoOptimizationBase::Argument NV
uint64_t MD5Hash(const FunctionId &Obj)
Definition FunctionId.h:167
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition BLAKE3.h:35
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:1013
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
cl::opt< bool > AnnotateStringLiteralSectionPrefix("memprof-annotate-string-literal-section-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the string literal data section prefix"))
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
cl::opt< bool > NoPGOWarnMismatchComdatWeak
std::set< unsigned > MatchedFramesSet
uint64_t TotalSize
std::vector< Frame > CallStack
AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
AllocationType AllocType
ArrayRef< GlobalValue::GUID > CalleeGuids
ArrayRef< Frame > Frames
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition MemProf.h:245
uint32_t LineOffset
Definition MemProf.h:250
llvm::SmallVector< CallSiteInfo > CallSites
Definition MemProf.h:522
llvm::SmallVector< AllocationInfo > AllocSites
Definition MemProf.h:520