LLVM 22.0.0git
MemProfUse.cpp
Go to the documentation of this file.
1//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MemProfUsePass which reads memory profiling data
10// and uses it to add metadata to instructions to guide optimization.
11//
12//===----------------------------------------------------------------------===//
13
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Function.h"
25#include "llvm/IR/Module.h"
30#include "llvm/Support/BLAKE3.h"
32#include "llvm/Support/Debug.h"
36#include <map>
37#include <set>
38
39using namespace llvm;
40using namespace llvm::memprof;
41
42#define DEBUG_TYPE "memprof"
43
44namespace llvm {
48} // namespace llvm
49
50// By default disable matching of allocation profiles onto operator new that
51// already explicitly pass a hot/cold hint, since we don't currently
52// override these hints anyway.
54 "memprof-match-hot-cold-new",
56 "Match allocation profiles onto existing hot/cold operator new calls"),
57 cl::Hidden, cl::init(false));
58
59static cl::opt<bool>
60 ClPrintMemProfMatchInfo("memprof-print-match-info",
61 cl::desc("Print matching stats for each allocation "
62 "context in this module's profiles"),
63 cl::Hidden, cl::init(false));
64
66 "memprof-print-matched-alloc-stack",
67 cl::desc("Print full stack context for matched "
68 "allocations with -memprof-print-match-info."),
69 cl::Hidden, cl::init(false));
70
71static cl::opt<bool>
72 PrintFunctionGuids("memprof-print-function-guids",
73 cl::desc("Print function GUIDs computed for matching"),
74 cl::Hidden, cl::init(false));
75
76static cl::opt<bool>
77 SalvageStaleProfile("memprof-salvage-stale-profile",
78 cl::desc("Salvage stale MemProf profile"),
79 cl::init(false), cl::Hidden);
80
82 "memprof-attach-calleeguids",
84 "Attach calleeguids as value profile metadata for indirect calls."),
85 cl::init(true), cl::Hidden);
86
88 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
89 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
90
92 "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
93 cl::desc("If true, annotate the static data section prefix"));
94
95// Matching statistics
96STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
97STATISTIC(NumOfMemProfMismatch,
98 "Number of functions having mismatched memory profile hash.");
99STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
100STATISTIC(NumOfMemProfAllocContextProfiles,
101 "Number of alloc contexts in memory profile.");
102STATISTIC(NumOfMemProfCallSiteProfiles,
103 "Number of callsites in memory profile.");
104STATISTIC(NumOfMemProfMatchedAllocContexts,
105 "Number of matched memory profile alloc contexts.");
106STATISTIC(NumOfMemProfMatchedAllocs,
107 "Number of matched memory profile allocs.");
108STATISTIC(NumOfMemProfMatchedCallSites,
109 "Number of matched memory profile callsites.");
110STATISTIC(NumOfMemProfHotGlobalVars,
111 "Number of global vars annotated with 'hot' section prefix.");
112STATISTIC(NumOfMemProfColdGlobalVars,
113 "Number of global vars annotated with 'unlikely' section prefix.");
114STATISTIC(NumOfMemProfUnknownGlobalVars,
115 "Number of global vars with unknown hotness (no section prefix).");
116STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
117 "Number of global vars with user-specified section (not annotated).");
118
120 ArrayRef<uint64_t> InlinedCallStack,
121 LLVMContext &Ctx) {
122 I.setMetadata(LLVMContext::MD_callsite,
123 buildCallstackMetadata(InlinedCallStack, Ctx));
124}
125
127 uint32_t Column) {
130 HashBuilder.add(Function, LineOffset, Column);
132 uint64_t Id;
133 std::memcpy(&Id, Hash.data(), sizeof(Hash));
134 return Id;
135}
136
140
142 return getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
143 AllocInfo->Info.getAllocCount(),
144 AllocInfo->Info.getTotalLifetime());
145}
146
149 uint64_t FullStackId) {
150 SmallVector<uint64_t> StackIds;
151 for (const auto &StackFrame : AllocInfo->CallStack)
152 StackIds.push_back(computeStackId(StackFrame));
154 std::vector<ContextTotalSize> ContextSizeInfo;
156 auto TotalSize = AllocInfo->Info.getTotalSize();
157 assert(TotalSize);
158 assert(FullStackId != 0);
159 ContextSizeInfo.push_back({FullStackId, TotalSize});
160 }
161 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
162 return AllocType;
163}
164
165// Return true if InlinedCallStack, computed from a call instruction's debug
166// info, is a prefix of ProfileCallStack, a list of Frames from profile data
167// (either the allocation data or a callsite).
168static bool
170 ArrayRef<uint64_t> InlinedCallStack) {
171 return ProfileCallStack.size() >= InlinedCallStack.size() &&
172 llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),
173 InlinedCallStack, [](const Frame &F, uint64_t StackId) {
174 return computeStackId(F) == StackId;
175 });
176}
177
178static bool isAllocationWithHotColdVariant(const Function *Callee,
179 const TargetLibraryInfo &TLI) {
180 if (!Callee)
181 return false;
182 LibFunc Func;
183 if (!TLI.getLibFunc(*Callee, Func))
184 return false;
185 switch (Func) {
186 case LibFunc_Znwm:
187 case LibFunc_ZnwmRKSt9nothrow_t:
188 case LibFunc_ZnwmSt11align_val_t:
189 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
190 case LibFunc_Znam:
191 case LibFunc_ZnamRKSt9nothrow_t:
192 case LibFunc_ZnamSt11align_val_t:
193 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
194 case LibFunc_size_returning_new:
195 case LibFunc_size_returning_new_aligned:
196 return true;
197 case LibFunc_Znwm12__hot_cold_t:
198 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
199 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
200 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
201 case LibFunc_Znam12__hot_cold_t:
202 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
203 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
204 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
205 case LibFunc_size_returning_new_hot_cold:
206 case LibFunc_size_returning_new_aligned_hot_cold:
208 default:
209 return false;
210 }
211}
212
214 AnnotationKind Kind) {
216 "Should not handle AnnotationOK here");
217 SmallString<32> Reason;
218 switch (Kind) {
220 ++NumOfMemProfExplicitSectionGlobalVars;
221 Reason.append("explicit section name");
222 break;
224 Reason.append("linker declaration");
225 break;
227 Reason.append("name starts with `llvm.`");
228 break;
229 default:
230 llvm_unreachable("Unexpected annotation kind");
231 }
232 LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
233 << Reason << ".\n");
234}
235
236// Structure for tracking info about matched allocation contexts for use with
237// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
239 // Total size in bytes of matched context.
241 // Matched allocation's type.
243 // Number of frames matched to the allocation itself (values will be >1 in
244 // cases where allocation was already inlined). Use a set because there can
245 // be multiple inlined instances and each may have a different inline depth.
246 // Use std::set to iterate in sorted order when printing.
247 std::set<unsigned> MatchedFramesSet;
248 // The full call stack of the allocation, for cases where requested via
249 // -memprof-print-matched-alloc-stack.
250 std::vector<Frame> CallStack;
251
252 // Caller responsible for inserting the matched frames and the call stack when
253 // appropriate.
256};
257
260 function_ref<bool(uint64_t)> IsPresentInProfile) {
262
263 auto GetOffset = [](const DILocation *DIL) {
264 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
265 0xffff;
266 };
267
268 for (Function &F : M) {
269 if (F.isDeclaration())
270 continue;
271
272 for (auto &BB : F) {
273 for (auto &I : BB) {
275 continue;
276
277 auto *CB = dyn_cast<CallBase>(&I);
278 auto *CalledFunction = CB->getCalledFunction();
279 // Disregard indirect calls and intrinsics.
280 if (!CalledFunction || CalledFunction->isIntrinsic())
281 continue;
282
283 StringRef CalleeName = CalledFunction->getName();
284 // True if we are calling a heap allocation function that supports
285 // hot/cold variants.
286 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
287 // True for the first iteration below, indicating that we are looking at
288 // a leaf node.
289 bool IsLeaf = true;
290 for (const DILocation *DIL = I.getDebugLoc(); DIL;
291 DIL = DIL->getInlinedAt()) {
292 StringRef CallerName = DIL->getSubprogramLinkageName();
293 assert(!CallerName.empty() &&
294 "Be sure to enable -fdebug-info-for-profiling");
295 uint64_t CallerGUID = memprof::getGUID(CallerName);
296 uint64_t CalleeGUID = memprof::getGUID(CalleeName);
297 // Pretend that we are calling a function with GUID == 0 if we are
298 // in the inline stack leading to a heap allocation function.
299 if (IsAlloc) {
300 if (IsLeaf) {
301 // For leaf nodes, set CalleeGUID to 0 without consulting
302 // IsPresentInProfile.
303 CalleeGUID = 0;
304 } else if (!IsPresentInProfile(CalleeGUID)) {
305 // In addition to the leaf case above, continue to set CalleeGUID
306 // to 0 as long as we don't see CalleeGUID in the profile.
307 CalleeGUID = 0;
308 } else {
309 // Once we encounter a callee that exists in the profile, stop
310 // setting CalleeGUID to 0.
311 IsAlloc = false;
312 }
313 }
314
315 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
316 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
317 CalleeName = CallerName;
318 IsLeaf = false;
319 }
320 }
321 }
322 }
323
324 // Sort each call list by the source location.
325 for (auto &[CallerGUID, CallList] : Calls) {
326 llvm::sort(CallList);
327 CallList.erase(llvm::unique(CallList), CallList.end());
328 }
329
330 return Calls;
331}
332
335 const TargetLibraryInfo &TLI) {
337
339 MemProfReader->getMemProfCallerCalleePairs();
341 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
342 return CallsFromProfile.contains(GUID);
343 });
344
345 // Compute an undrift map for each CallerGUID.
346 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
347 auto It = CallsFromProfile.find(CallerGUID);
348 if (It == CallsFromProfile.end())
349 continue;
350 const auto &ProfileAnchors = It->second;
351
352 LocToLocMap Matchings;
354 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
355 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
356 [[maybe_unused]] bool Inserted =
357 UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;
358
359 // The insertion must succeed because we visit each GUID exactly once.
360 assert(Inserted);
361 }
362
363 return UndriftMaps;
364}
365
366// Given a MemProfRecord, undrift all the source locations present in the
367// record in place.
368static void
370 memprof::MemProfRecord &MemProfRec) {
371 // Undrift a call stack in place.
372 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
373 for (auto &F : CallStack) {
374 auto I = UndriftMaps.find(F.Function);
375 if (I == UndriftMaps.end())
376 continue;
377 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
378 if (J == I->second.end())
379 continue;
380 auto &NewLoc = J->second;
381 F.LineOffset = NewLoc.LineOffset;
382 F.Column = NewLoc.Column;
383 }
384 };
385
386 for (auto &AS : MemProfRec.AllocSites)
387 UndriftCallStack(AS.CallStack);
388
389 for (auto &CS : MemProfRec.CallSites)
390 UndriftCallStack(CS.Frames);
391}
392
393// Helper function to process CalleeGuids and create value profile metadata
395 ArrayRef<GlobalValue::GUID> CalleeGuids) {
396 if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
397 return;
398
399 // Prepare the vector of value data, initializing from any existing
400 // value-profile metadata present on the instruction so that we merge the
401 // new CalleeGuids into the existing entries.
403 uint64_t TotalCount = 0;
404
405 if (I.getMetadata(LLVMContext::MD_prof)) {
406 // Read all existing entries so we can merge them. Use a large
407 // MaxNumValueData to retrieve all existing entries.
408 VDs = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
409 /*MaxNumValueData=*/UINT32_MAX, TotalCount);
410 }
411
412 // Save the original size for use later in detecting whether any were added.
413 const size_t OriginalSize = VDs.size();
414
415 // Initialize the set of existing guids with the original list.
416 DenseSet<uint64_t> ExistingValues(
419 VDs, [](const InstrProfValueData &Entry) { return Entry.Value; }));
420
421 // Merge CalleeGuids into list of existing VDs, by appending any that are not
422 // already included.
423 VDs.reserve(OriginalSize + CalleeGuids.size());
424 for (auto G : CalleeGuids) {
425 if (!ExistingValues.insert(G).second)
426 continue;
427 InstrProfValueData NewEntry;
428 NewEntry.Value = G;
429 // For MemProf, we don't have actual call counts, so we assign
430 // a weight of 1 to each potential target.
431 // TODO: Consider making this weight configurable or increasing it to
432 // improve effectiveness for ICP.
433 NewEntry.Count = 1;
434 TotalCount += NewEntry.Count;
435 VDs.push_back(NewEntry);
436 }
437
438 // Update the VP metadata if we added any new callee GUIDs to the list.
439 assert(VDs.size() >= OriginalSize);
440 if (VDs.size() == OriginalSize)
441 return;
442
443 // First clear the existing !prof.
444 I.setMetadata(LLVMContext::MD_prof, nullptr);
445
446 // No need to sort the updated VDs as all appended entries have the same count
447 // of 1, which is no larger than any existing entries. The incoming list of
448 // CalleeGuids should already be deterministic for a given profile.
449 annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, VDs.size());
450}
451
452static void handleAllocSite(
453 Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
454 LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
455 const std::set<const AllocationInfo *> &AllocInfoSet,
456 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
457 // TODO: Remove this once the profile creation logic deduplicates contexts
458 // that are the same other than the IsInlineFrame bool. Until then, keep the
459 // largest.
460 DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
461 for (auto *AllocInfo : AllocInfoSet) {
462 auto FullStackId = computeFullStackId(AllocInfo->CallStack);
463 auto [It, Inserted] =
464 UniqueFullContextIdAllocInfo.insert({FullStackId, AllocInfo});
465 // If inserted entry, done.
466 if (Inserted)
467 continue;
468 // Keep the larger one, or the noncold one if they are the same size.
469 auto CurSize = It->second->Info.getTotalSize();
470 auto NewSize = AllocInfo->Info.getTotalSize();
471 if ((CurSize > NewSize) ||
472 (CurSize == NewSize &&
474 continue;
475 It->second = AllocInfo;
476 }
477 // We may match this instruction's location list to multiple MIB
478 // contexts. Add them to a Trie specialized for trimming the contexts to
479 // the minimal needed to disambiguate contexts with unique behavior.
480 CallStackTrie AllocTrie(&ORE, MaxColdSize);
481 uint64_t TotalSize = 0;
482 uint64_t TotalColdSize = 0;
483 for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
484 // Check the full inlined call stack against this one.
485 // If we found and thus matched all frames on the call, include
486 // this MIB.
488 InlinedCallStack)) {
489 NumOfMemProfMatchedAllocContexts++;
490 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
491 TotalSize += AllocInfo->Info.getTotalSize();
493 TotalColdSize += AllocInfo->Info.getTotalSize();
494 // Record information about the allocation if match info printing
495 // was requested.
497 assert(FullStackId != 0);
498 auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
499 FullStackId,
500 AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
501 // Always insert the new matched frame count, since it may differ.
502 Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
503 if (Inserted && PrintMatchedAllocStack)
504 Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
505 AllocInfo->CallStack.begin(),
506 AllocInfo->CallStack.end());
507 }
508 ORE.emit(
509 OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
510 << ore::NV("AllocationCall", CI) << " in function "
511 << ore::NV("Caller", CI->getFunction())
512 << " matched alloc context with alloc type "
514 << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize())
515 << " full context id " << ore::NV("Context", FullStackId)
516 << " frame count " << ore::NV("Frames", InlinedCallStack.size()));
517 }
518 }
519 // If the threshold for the percent of cold bytes is less than 100%,
520 // and not all bytes are cold, see if we should still hint this
521 // allocation as cold without context sensitivity.
522 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
523 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
524 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, "dominant");
525 return;
526 }
527
528 // We might not have matched any to the full inlined call stack.
529 // But if we did, create and attach metadata, or a function attribute if
530 // all contexts have identical profiled behavior.
531 if (!AllocTrie.empty()) {
532 NumOfMemProfMatchedAllocs++;
533 // MemprofMDAttached will be false if a function attribute was
534 // attached.
535 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
536 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
537 if (MemprofMDAttached) {
538 // Add callsite metadata for the instruction's location list so that
539 // it simpler later on to identify which part of the MIB contexts
540 // are from this particular instruction (including during inlining,
541 // when the callsite metadata will be updated appropriately).
542 // FIXME: can this be changed to strip out the matching stack
543 // context ids from the MIB contexts and not add any callsite
544 // metadata here to save space?
545 addCallsiteMetadata(I, InlinedCallStack, Ctx);
546 }
547 }
548}
549
550// Helper struct for maintaining refs to callsite data. As an alternative we
551// could store a pointer to the CallSiteInfo struct but we also need the frame
552// index. Using ArrayRefs instead makes it a little easier to read.
554 // Subset of frames for the corresponding CallSiteInfo.
556 // Potential targets for indirect calls.
558};
559
560static void handleCallSite(Instruction &I, const Function *CalledFunction,
561 ArrayRef<uint64_t> InlinedCallStack,
562 const std::vector<CallSiteEntry> &CallSiteEntries,
563 Module &M,
564 std::set<std::vector<uint64_t>> &MatchedCallSites,
566 auto &Ctx = M.getContext();
567 // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
568 // to support cases where the instuction's inlined frames match multiple call
569 // site entries, which can happen if the profile was collected from a binary
570 // where this instruction was eventually inlined into multiple callers.
572 bool CallsiteMDAdded = false;
573 for (const auto &CallSiteEntry : CallSiteEntries) {
574 // If we found and thus matched all frames on the call, create and
575 // attach call stack metadata.
577 InlinedCallStack)) {
578 NumOfMemProfMatchedCallSites++;
579 // Only need to find one with a matching call stack and add a single
580 // callsite metadata.
581 if (!CallsiteMDAdded) {
582 addCallsiteMetadata(I, InlinedCallStack, Ctx);
583
584 // Accumulate call site matching information upon request.
586 std::vector<uint64_t> CallStack;
587 append_range(CallStack, InlinedCallStack);
588 MatchedCallSites.insert(std::move(CallStack));
589 }
590 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
591 << ore::NV("CallSite", &I) << " in function "
592 << ore::NV("Caller", I.getFunction())
593 << " matched callsite with frame count "
594 << ore::NV("Frames", InlinedCallStack.size()));
595
596 // If this is a direct call, we're done.
597 if (CalledFunction)
598 break;
599 CallsiteMDAdded = true;
600 }
601
602 assert(!CalledFunction && "Didn't expect direct call");
603
604 // Collect Callee GUIDs from all matching CallSiteEntries.
607 }
608 }
609 // Try to attach indirect call metadata if possible.
610 addVPMetadata(M, I, CalleeGuids.getArrayRef());
611}
612
613static void
615 const TargetLibraryInfo &TLI,
616 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
617 std::set<std::vector<uint64_t>> &MatchedCallSites,
619 OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
620 auto &Ctx = M.getContext();
621 // Previously we used getIRPGOFuncName() here. If F is local linkage,
622 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
623 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
624 // contain FileName's prefix. It caused local linkage function can't
625 // find MemProfRecord. So we use getName() now.
626 // 'unique-internal-linkage-names' can make MemProf work better for local
627 // linkage function.
628 auto FuncName = F.getName();
629 auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);
631 errs() << "MemProf: Function GUID " << FuncGUID << " is " << FuncName
632 << "\n";
633 std::optional<memprof::MemProfRecord> MemProfRec;
634 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
635 if (Err) {
636 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
637 auto Err = IPE.get();
638 bool SkipWarning = false;
639 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
640 << ": ");
642 NumOfMemProfMissing++;
643 SkipWarning = !PGOWarnMissing;
644 LLVM_DEBUG(dbgs() << "unknown function");
645 } else if (Err == instrprof_error::hash_mismatch) {
646 NumOfMemProfMismatch++;
647 SkipWarning =
650 (F.hasComdat() ||
652 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
653 }
654
655 if (SkipWarning)
656 return;
657
658 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
659 Twine(" Hash = ") + std::to_string(FuncGUID))
660 .str();
661
662 Ctx.diagnose(
663 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
664 });
665 return;
666 }
667
668 NumOfMemProfFunc++;
669
670 // If requested, undrfit MemProfRecord so that the source locations in it
671 // match those in the IR.
673 undriftMemProfRecord(UndriftMaps, *MemProfRec);
674
675 // Detect if there are non-zero column numbers in the profile. If not,
676 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
677 // columns in the IR). The profiled binary might have been built with
678 // column numbers disabled, for example.
679 bool ProfileHasColumns = false;
680
681 // Build maps of the location hash to all profile data with that leaf location
682 // (allocation info and the callsites).
683 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
684
685 // For the callsites we need to record slices of the frame array (see comments
686 // below where the map entries are added) along with their CalleeGuids.
687 std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
688 for (auto &AI : MemProfRec->AllocSites) {
689 NumOfMemProfAllocContextProfiles++;
690 // Associate the allocation info with the leaf frame. The later matching
691 // code will match any inlined call sequences in the IR with a longer prefix
692 // of call stack frames.
693 uint64_t StackId = computeStackId(AI.CallStack[0]);
694 LocHashToAllocInfo[StackId].insert(&AI);
695 ProfileHasColumns |= AI.CallStack[0].Column;
696 }
697 for (auto &CS : MemProfRec->CallSites) {
698 NumOfMemProfCallSiteProfiles++;
699 // Need to record all frames from leaf up to and including this function,
700 // as any of these may or may not have been inlined at this point.
701 unsigned Idx = 0;
702 for (auto &StackFrame : CS.Frames) {
703 uint64_t StackId = computeStackId(StackFrame);
704 ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
705 // The callee guids for the slice containing all frames (due to the
706 // increment above Idx is now 1) comes from the CalleeGuids recorded in
707 // the CallSite. For the slices not containing the leaf-most frame, the
708 // callee guid is simply the function GUID of the prior frame.
709 LocHashToCallSites[StackId].push_back(
710 {FrameSlice, (Idx == 1 ? CS.CalleeGuids
712 CS.Frames[Idx - 2].Function))});
713
714 ProfileHasColumns |= StackFrame.Column;
715 // Once we find this function, we can stop recording.
716 if (StackFrame.Function == FuncGUID)
717 break;
718 }
719 assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
720 }
721
722 auto GetOffset = [](const DILocation *DIL) {
723 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
724 0xffff;
725 };
726
727 // Now walk the instructions, looking up the associated profile data using
728 // debug locations.
729 for (auto &BB : F) {
730 for (auto &I : BB) {
731 if (I.isDebugOrPseudoInst())
732 continue;
733 // We are only interested in calls (allocation or interior call stack
734 // context calls).
735 auto *CI = dyn_cast<CallBase>(&I);
736 if (!CI)
737 continue;
738 auto *CalledFunction = CI->getCalledFunction();
739 if (CalledFunction && CalledFunction->isIntrinsic())
740 continue;
741 // List of call stack ids computed from the location hashes on debug
742 // locations (leaf to inlined at root).
743 SmallVector<uint64_t, 8> InlinedCallStack;
744 // Was the leaf location found in one of the profile maps?
745 bool LeafFound = false;
746 // If leaf was found in a map, iterators pointing to its location in both
747 // of the maps. It might exist in neither, one, or both (the latter case
748 // can happen because we don't currently have discriminators to
749 // distinguish the case when a single line/col maps to both an allocation
750 // and another callsite).
751 auto AllocInfoIter = LocHashToAllocInfo.end();
752 auto CallSitesIter = LocHashToCallSites.end();
753 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
754 DIL = DIL->getInlinedAt()) {
755 // Use C++ linkage name if possible. Need to compile with
756 // -fdebug-info-for-profiling to get linkage name.
757 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
758 if (Name.empty())
759 Name = DIL->getScope()->getSubprogram()->getName();
760 auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);
761 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
762 ProfileHasColumns ? DIL->getColumn() : 0);
763 // Check if we have found the profile's leaf frame. If yes, collect
764 // the rest of the call's inlined context starting here. If not, see if
765 // we find a match further up the inlined context (in case the profile
766 // was missing debug frames at the leaf).
767 if (!LeafFound) {
768 AllocInfoIter = LocHashToAllocInfo.find(StackId);
769 CallSitesIter = LocHashToCallSites.find(StackId);
770 if (AllocInfoIter != LocHashToAllocInfo.end() ||
771 CallSitesIter != LocHashToCallSites.end())
772 LeafFound = true;
773 }
774 if (LeafFound)
775 InlinedCallStack.push_back(StackId);
776 }
777 // If leaf not in either of the maps, skip inst.
778 if (!LeafFound)
779 continue;
780
781 // First add !memprof metadata from allocation info, if we found the
782 // instruction's leaf location in that map, and if the rest of the
783 // instruction's locations match the prefix Frame locations on an
784 // allocation context with the same leaf.
785 if (AllocInfoIter != LocHashToAllocInfo.end() &&
786 // Only consider allocations which support hinting.
787 isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
788 handleAllocSite(I, CI, InlinedCallStack, Ctx, ORE, MaxColdSize,
789 AllocInfoIter->second, FullStackIdToAllocMatchInfo);
790 else if (CallSitesIter != LocHashToCallSites.end())
791 // Otherwise, add callsite metadata. If we reach here then we found the
792 // instruction's leaf location in the callsites map and not the
793 // allocation map.
794 handleCallSite(I, CalledFunction, InlinedCallStack,
795 CallSitesIter->second, M, MatchedCallSites, ORE);
796 }
797 }
798}
799
800MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
802 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
803 if (!FS)
804 this->FS = vfs::getRealFileSystem();
805}
806
808 // Return immediately if the module doesn't contain any function or global
809 // variables.
810 if (M.empty() && M.globals().empty())
811 return PreservedAnalyses::all();
812
813 LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
814 auto &Ctx = M.getContext();
815 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
816 if (Error E = ReaderOrErr.takeError()) {
817 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
818 Ctx.diagnose(
819 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
820 });
821 return PreservedAnalyses::all();
822 }
823
824 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
825 std::move(ReaderOrErr.get());
826 if (!MemProfReader) {
827 Ctx.diagnose(DiagnosticInfoPGOProfile(
828 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
829 return PreservedAnalyses::all();
830 }
831
832 if (!MemProfReader->hasMemoryProfile()) {
833 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
834 "Not a memory profile"));
835 return PreservedAnalyses::all();
836 }
837
838 const bool Changed =
839 annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
840
841 // If the module doesn't contain any function, return after we process all
842 // global variables.
843 if (M.empty())
845
846 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
847
848 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
851 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
852
853 // Map from the stack hash of each matched allocation context in the function
854 // profiles to match info such as the total profiled size (bytes), allocation
855 // type, number of frames matched to the allocation itself, and the full array
856 // of call stack ids.
857 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
858
859 // Set of the matched call sites, each expressed as a sequence of an inline
860 // call stack.
861 std::set<std::vector<uint64_t>> MatchedCallSites;
862
863 uint64_t MaxColdSize = 0;
864 if (auto *MemProfSum = MemProfReader->getMemProfSummary())
865 MaxColdSize = MemProfSum->getMaxColdTotalSize();
866
867 for (auto &F : M) {
868 if (F.isDeclaration())
869 continue;
870
871 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
872 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
873 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
874 MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
875 }
876
878 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
879 for (auto Frames : Info.MatchedFramesSet) {
880 // TODO: To reduce verbosity, should we change the existing message
881 // so that we emit a list of matched frame counts in a single message
882 // about the context (instead of one message per frame count?
883 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
884 << " context with id " << Id << " has total profiled size "
885 << Info.TotalSize << " is matched with " << Frames << " frames";
887 errs() << " and call stack";
888 for (auto &F : Info.CallStack)
889 errs() << " " << computeStackId(F);
890 }
891 errs() << "\n";
892 }
893 }
894
895 for (const auto &CallStack : MatchedCallSites) {
896 errs() << "MemProf callsite match for inline call stack";
897 for (uint64_t StackId : CallStack)
898 errs() << " " << StackId;
899 errs() << "\n";
900 }
901 }
902
904}
905
906bool MemProfUsePass::annotateGlobalVariables(
907 Module &M, const memprof::DataAccessProfData *DataAccessProf) {
908 if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
909 return false;
910
911 if (!DataAccessProf) {
912 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
913 M.getContext().diagnose(DiagnosticInfoPGOProfile(
914 MemoryProfileFileName.data(),
915 StringRef("Data access profiles not found in memprof. Ignore "
916 "-memprof-annotate-static-data-prefix."),
917 DS_Warning));
918 return false;
919 }
920 M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
921
922 bool Changed = false;
923 // Iterate all global variables in the module and annotate them based on
924 // data access profiles. Note it's up to the linker to decide how to map input
925 // sections to output sections, and one conservative practice is to map
926 // unlikely-prefixed ones to unlikely output section, and map the rest
927 // (hot-prefixed or prefix-less) to the canonical output section.
928 for (GlobalVariable &GVar : M.globals()) {
929 assert(!GVar.getSectionPrefix().has_value() &&
930 "GVar shouldn't have section prefix yet");
931 auto Kind = llvm::memprof::getAnnotationKind(GVar);
934 continue;
935 }
936
937 StringRef Name = GVar.getName();
938 // Skip string literals as their mangled names don't stay stable across
939 // binary releases.
940 // TODO: Track string content hash in the profiles and compute it inside the
941 // compiler to categeorize the hotness string literals.
942 if (Name.starts_with(".str")) {
943 LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
944 continue;
945 }
946
947 // DataAccessProfRecord's get* methods will canonicalize the name under the
948 // hood before looking it up, so optimizer doesn't need to do it.
949 std::optional<DataAccessProfRecord> Record =
950 DataAccessProf->getProfileRecord(Name);
951 // Annotate a global variable as hot if it has non-zero sampled count, and
952 // annotate it as cold if it's seen in the profiled binary
953 // file but doesn't have any access sample.
954 // For logging, optimization remark emitter requires a llvm::Function, but
955 // it's not well defined how to associate a global variable with a function.
956 // So we just print out the static data section prefix in LLVM_DEBUG.
957 if (Record && Record->AccessCount > 0) {
958 ++NumOfMemProfHotGlobalVars;
959 Changed |= GVar.setSectionPrefix("hot");
960 LLVM_DEBUG(dbgs() << "Global variable " << Name
961 << " is annotated as hot\n");
962 } else if (DataAccessProf->isKnownColdSymbol(Name)) {
963 ++NumOfMemProfColdGlobalVars;
964 Changed |= GVar.setSectionPrefix("unlikely");
965 Changed = true;
966 LLVM_DEBUG(dbgs() << "Global variable " << Name
967 << " is annotated as unlikely\n");
968 } else {
969 ++NumOfMemProfUnknownGlobalVars;
970 LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
971 }
972 }
973
974 return Changed;
975}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
static cl::opt< bool > ClMemProfAttachCalleeGuids("memprof-attach-calleeguids", cl::desc("Attach calleeguids as value profile metadata for indirect calls."), cl::init(true), cl::Hidden)
static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar, AnnotationKind Kind)
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > PrintMatchedAllocStack("memprof-print-matched-alloc-stack", cl::desc("Print full stack context for matched " "allocations with -memprof-print-match-info."), cl::Hidden, cl::init(false))
static void handleCallSite(Instruction &I, const Function *CalledFunction, ArrayRef< uint64_t > InlinedCallStack, const std::vector< CallSiteEntry > &CallSiteEntries, Module &M, std::set< std::vector< uint64_t > > &MatchedCallSites, OptimizationRemarkEmitter &ORE)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
static void addVPMetadata(Module &M, Instruction &I, ArrayRef< GlobalValue::GUID > CalleeGuids)
static cl::opt< bool > PrintFunctionGuids("memprof-print-function-guids", cl::desc("Print function GUIDs computed for matching"), cl::Hidden, cl::init(false))
static cl::opt< bool > AnnotateStaticDataSectionPrefix("memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, cl::desc("If true, annotate the static data section prefix"))
static void handleAllocSite(Instruction &I, CallBase *CI, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize, const std::set< const AllocationInfo * > &AllocInfoSet, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo)
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo, std::set< std::vector< uint64_t > > &MatchedCallSites, DenseMap< uint64_t, LocToLocMap > &UndriftMaps, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize)
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
AllocType
FunctionAnalysisManager FAM
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:219
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition HashBuilder.h:64
Interface to help hash various types through a hasher type.
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
instrprof_error get() const
Definition InstrProf.h:464
std::string message() const override
Return the error message as a string.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
@ Warning
Emits a warning if two values disagree.
Definition Module.h:124
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
LLVM_ABI void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
LLVM_ABI void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
LLVM_ABI bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
LLVM_ABI DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
LLVM_ABI MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
LLVM_ABI bool recordContextSizeInfoForAnalysis()
Whether we need to record the context size info in the alloc trie used to build metadata.
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition MemProfUse.h:65
LLVM_ABI uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
Helper to generate a single hash id for a given callstack, used for emitting matching statistics and ...
LLVM_ABI DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
AnnotationKind getAnnotationKind(const GlobalVariable &GV)
Returns the annotation kind of the global variable GV.
LLVM_ABI GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition MemProf.cpp:344
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition BLAKE3.h:35
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2157
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2097
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2109
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
cl::opt< bool > NoPGOWarnMismatchComdatWeak
std::set< unsigned > MatchedFramesSet
uint64_t TotalSize
std::vector< Frame > CallStack
AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
AllocationType AllocType
ArrayRef< GlobalValue::GUID > CalleeGuids
ArrayRef< Frame > Frames
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition MemProf.h:245
uint32_t LineOffset
Definition MemProf.h:250
llvm::SmallVector< CallSiteInfo > CallSites
Definition MemProf.h:522
llvm::SmallVector< AllocationInfo > AllocSites
Definition MemProf.h:520