LLVM 20.0.0git
MemProfiler.cpp
Go to the documentation of this file.
1//===- MemProfiler.cpp - memory allocation and access profiler ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of MemProfiler. Memory accesses are instrumented
10// to increment the access count held in a shadow memory location, or
11// alternatively to call into the runtime. Memory intrinsic calls (memmove,
12// memcpy, memset) are changed to call the memory profiling runtime version
13// instead.
14//
15//===----------------------------------------------------------------------===//
16
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Module.h"
34#include "llvm/IR/Type.h"
35#include "llvm/IR/Value.h"
38#include "llvm/Support/BLAKE3.h"
40#include "llvm/Support/Debug.h"
47#include <map>
48#include <set>
49
50using namespace llvm;
51using namespace llvm::memprof;
52
53#define DEBUG_TYPE "memprof"
54
55namespace llvm {
59} // namespace llvm
60
61constexpr int LLVM_MEM_PROFILER_VERSION = 1;
62
63// Size of memory mapped to a single shadow location.
65
66// Size of memory mapped to a single histogram bucket.
68
69// Scale from granularity down to shadow size.
71
72constexpr char MemProfModuleCtorName[] = "memprof.module_ctor";
74// On Emscripten, the system needs more than one priorities for constructors.
76constexpr char MemProfInitName[] = "__memprof_init";
78 "__memprof_version_mismatch_check_v";
79
81 "__memprof_shadow_memory_dynamic_address";
82
83constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";
84
85constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";
86
87// Command-line flags.
88
90 "memprof-guard-against-version-mismatch",
91 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,
92 cl::init(true));
93
94// This flag may need to be replaced with -f[no-]memprof-reads.
95static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
96 cl::desc("instrument read instructions"),
97 cl::Hidden, cl::init(true));
98
99static cl::opt<bool>
100 ClInstrumentWrites("memprof-instrument-writes",
101 cl::desc("instrument write instructions"), cl::Hidden,
102 cl::init(true));
103
105 "memprof-instrument-atomics",
106 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
107 cl::init(true));
108
110 "memprof-use-callbacks",
111 cl::desc("Use callbacks instead of inline instrumentation sequences."),
112 cl::Hidden, cl::init(false));
113
115 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix",
116 cl::desc("Prefix for memory access callbacks"),
117 cl::Hidden, cl::init("__memprof_"));
118
119// These flags allow to change the shadow mapping.
120// The shadow mapping looks like
121// Shadow = ((Mem & mask) >> scale) + offset
122
123static cl::opt<int> ClMappingScale("memprof-mapping-scale",
124 cl::desc("scale of memprof shadow mapping"),
126
127static cl::opt<int>
128 ClMappingGranularity("memprof-mapping-granularity",
129 cl::desc("granularity of memprof shadow mapping"),
131
132static cl::opt<bool> ClStack("memprof-instrument-stack",
133 cl::desc("Instrument scalar stack variables"),
134 cl::Hidden, cl::init(false));
135
136// Debug flags.
137
138static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
139 cl::init(0));
140
141static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden,
142 cl::desc("Debug func"));
143
144static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),
145 cl::Hidden, cl::init(-1));
146
147static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
148 cl::Hidden, cl::init(-1));
149
150// By default disable matching of allocation profiles onto operator new that
151// already explicitly pass a hot/cold hint, since we don't currently
152// override these hints anyway.
154 "memprof-match-hot-cold-new",
155 cl::desc(
156 "Match allocation profiles onto existing hot/cold operator new calls"),
157 cl::Hidden, cl::init(false));
158
159static cl::opt<bool> ClHistogram("memprof-histogram",
160 cl::desc("Collect access count histograms"),
161 cl::Hidden, cl::init(false));
162
163static cl::opt<bool>
164 ClPrintMemProfMatchInfo("memprof-print-match-info",
165 cl::desc("Print matching stats for each allocation "
166 "context in this module's profiles"),
167 cl::Hidden, cl::init(false));
168
170 MemprofRuntimeDefaultOptions("memprof-runtime-default-options",
171 cl::desc("The default memprof options"),
172 cl::Hidden, cl::init(""));
173
174static cl::opt<bool>
175 SalvageStaleProfile("memprof-salvage-stale-profile",
176 cl::desc("Salvage stale MemProf profile"),
177 cl::init(false), cl::Hidden);
178
180 "memprof-cloning-cold-threshold", cl::init(100), cl::Hidden,
181 cl::desc("Min percent of cold bytes to hint alloc cold during cloning"));
182
184
186 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
187 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
188
189// Instrumentation statistics
190STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
191STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
192STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
193STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
194
195// Matching statistics
196STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
197STATISTIC(NumOfMemProfMismatch,
198 "Number of functions having mismatched memory profile hash.");
199STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
200STATISTIC(NumOfMemProfAllocContextProfiles,
201 "Number of alloc contexts in memory profile.");
202STATISTIC(NumOfMemProfCallSiteProfiles,
203 "Number of callsites in memory profile.");
204STATISTIC(NumOfMemProfMatchedAllocContexts,
205 "Number of matched memory profile alloc contexts.");
206STATISTIC(NumOfMemProfMatchedAllocs,
207 "Number of matched memory profile allocs.");
208STATISTIC(NumOfMemProfMatchedCallSites,
209 "Number of matched memory profile callsites.");
210
211namespace {
212
213/// This struct defines the shadow mapping using the rule:
214/// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset.
215struct ShadowMapping {
216 ShadowMapping() {
217 Scale = ClMappingScale;
219 Mask = ~(Granularity - 1);
220 }
221
222 int Scale;
223 int Granularity;
224 uint64_t Mask; // Computed as ~(Granularity-1)
225};
226
227static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {
230}
231
232struct InterestingMemoryAccess {
233 Value *Addr = nullptr;
234 bool IsWrite;
235 Type *AccessTy;
236 Value *MaybeMask = nullptr;
237};
238
239/// Instrument the code in module to profile memory accesses.
240class MemProfiler {
241public:
242 MemProfiler(Module &M) {
243 C = &(M.getContext());
244 LongSize = M.getDataLayout().getPointerSizeInBits();
245 IntptrTy = Type::getIntNTy(*C, LongSize);
246 PtrTy = PointerType::getUnqual(*C);
247 }
248
249 /// If it is an interesting memory access, populate information
250 /// about the access and return a InterestingMemoryAccess struct.
251 /// Otherwise return std::nullopt.
252 std::optional<InterestingMemoryAccess>
253 isInterestingMemoryAccess(Instruction *I) const;
254
255 void instrumentMop(Instruction *I, const DataLayout &DL,
256 InterestingMemoryAccess &Access);
257 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
258 Value *Addr, bool IsWrite);
259 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
260 Instruction *I, Value *Addr, Type *AccessTy,
261 bool IsWrite);
262 void instrumentMemIntrinsic(MemIntrinsic *MI);
263 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
264 bool instrumentFunction(Function &F);
265 bool maybeInsertMemProfInitAtFunctionEntry(Function &F);
266 bool insertDynamicShadowAtFunctionEntry(Function &F);
267
268private:
269 void initializeCallbacks(Module &M);
270
271 LLVMContext *C;
272 int LongSize;
273 Type *IntptrTy;
274 PointerType *PtrTy;
275 ShadowMapping Mapping;
276
277 // These arrays is indexed by AccessIsWrite
278 FunctionCallee MemProfMemoryAccessCallback[2];
279
280 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset;
281 Value *DynamicShadowOffset = nullptr;
282};
283
284class ModuleMemProfiler {
285public:
286 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); }
287
288 bool instrumentModule(Module &);
289
290private:
291 Triple TargetTriple;
292 ShadowMapping Mapping;
293 Function *MemProfCtorFunction = nullptr;
294};
295
296} // end anonymous namespace
297
299
303 "Memprof with histogram only supports default mapping granularity");
304 Module &M = *F.getParent();
305 MemProfiler Profiler(M);
306 if (Profiler.instrumentFunction(F))
308 return PreservedAnalyses::all();
309}
310
312
315
316 ModuleMemProfiler Profiler(M);
317 if (Profiler.instrumentModule(M))
319 return PreservedAnalyses::all();
320}
321
322Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
323 // (Shadow & mask) >> scale
324 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask);
325 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
326 // (Shadow >> scale) | offset
327 assert(DynamicShadowOffset);
328 return IRB.CreateAdd(Shadow, DynamicShadowOffset);
329}
330
331// Instrument memset/memmove/memcpy
332void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {
333 IRBuilder<> IRB(MI);
334 if (isa<MemTransferInst>(MI)) {
335 IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
336 {MI->getOperand(0), MI->getOperand(1),
337 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
338 } else if (isa<MemSetInst>(MI)) {
339 IRB.CreateCall(
340 MemProfMemset,
341 {MI->getOperand(0),
342 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
343 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
344 }
345 MI->eraseFromParent();
346}
347
348std::optional<InterestingMemoryAccess>
349MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
350 // Do not instrument the load fetching the dynamic shadow address.
351 if (DynamicShadowOffset == I)
352 return std::nullopt;
353
354 InterestingMemoryAccess Access;
355
356 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
358 return std::nullopt;
359 Access.IsWrite = false;
360 Access.AccessTy = LI->getType();
361 Access.Addr = LI->getPointerOperand();
362 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
364 return std::nullopt;
365 Access.IsWrite = true;
366 Access.AccessTy = SI->getValueOperand()->getType();
367 Access.Addr = SI->getPointerOperand();
368 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
370 return std::nullopt;
371 Access.IsWrite = true;
372 Access.AccessTy = RMW->getValOperand()->getType();
373 Access.Addr = RMW->getPointerOperand();
374 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
376 return std::nullopt;
377 Access.IsWrite = true;
378 Access.AccessTy = XCHG->getCompareOperand()->getType();
379 Access.Addr = XCHG->getPointerOperand();
380 } else if (auto *CI = dyn_cast<CallInst>(I)) {
381 auto *F = CI->getCalledFunction();
382 if (F && (F->getIntrinsicID() == Intrinsic::masked_load ||
383 F->getIntrinsicID() == Intrinsic::masked_store)) {
384 unsigned OpOffset = 0;
385 if (F->getIntrinsicID() == Intrinsic::masked_store) {
387 return std::nullopt;
388 // Masked store has an initial operand for the value.
389 OpOffset = 1;
390 Access.AccessTy = CI->getArgOperand(0)->getType();
391 Access.IsWrite = true;
392 } else {
394 return std::nullopt;
395 Access.AccessTy = CI->getType();
396 Access.IsWrite = false;
397 }
398
399 auto *BasePtr = CI->getOperand(0 + OpOffset);
400 Access.MaybeMask = CI->getOperand(2 + OpOffset);
401 Access.Addr = BasePtr;
402 }
403 }
404
405 if (!Access.Addr)
406 return std::nullopt;
407
408 // Do not instrument accesses from different address spaces; we cannot deal
409 // with them.
410 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType());
411 if (PtrTy->getPointerAddressSpace() != 0)
412 return std::nullopt;
413
414 // Ignore swifterror addresses.
415 // swifterror memory addresses are mem2reg promoted by instruction
416 // selection. As such they cannot have regular uses like an instrumentation
417 // function and it makes no sense to track them as memory.
418 if (Access.Addr->isSwiftError())
419 return std::nullopt;
420
421 // Peel off GEPs and BitCasts.
422 auto *Addr = Access.Addr->stripInBoundsOffsets();
423
424 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
425 // Do not instrument PGO counter updates.
426 if (GV->hasSection()) {
427 StringRef SectionName = GV->getSection();
428 // Check if the global is in the PGO counters section.
429 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
430 if (SectionName.ends_with(
431 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
432 return std::nullopt;
433 }
434
435 // Do not instrument accesses to LLVM internal variables.
436 if (GV->getName().starts_with("__llvm"))
437 return std::nullopt;
438 }
439
440 return Access;
441}
442
443void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
445 Type *AccessTy, bool IsWrite) {
446 auto *VTy = cast<FixedVectorType>(AccessTy);
447 unsigned Num = VTy->getNumElements();
448 auto *Zero = ConstantInt::get(IntptrTy, 0);
449 for (unsigned Idx = 0; Idx < Num; ++Idx) {
450 Value *InstrumentedAddress = nullptr;
451 Instruction *InsertBefore = I;
452 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
453 // dyn_cast as we might get UndefValue
454 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
455 if (Masked->isZero())
456 // Mask is constant false, so no instrumentation needed.
457 continue;
458 // If we have a true or undef value, fall through to instrumentAddress.
459 // with InsertBefore == I
460 }
461 } else {
462 IRBuilder<> IRB(I);
463 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
464 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
465 InsertBefore = ThenTerm;
466 }
467
468 IRBuilder<> IRB(InsertBefore);
469 InstrumentedAddress =
470 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
471 instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite);
472 }
473}
474
475void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
476 InterestingMemoryAccess &Access) {
477 // Skip instrumentation of stack accesses unless requested.
478 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {
479 if (Access.IsWrite)
480 ++NumSkippedStackWrites;
481 else
482 ++NumSkippedStackReads;
483 return;
484 }
485
486 if (Access.IsWrite)
487 NumInstrumentedWrites++;
488 else
489 NumInstrumentedReads++;
490
491 if (Access.MaybeMask) {
492 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
493 Access.AccessTy, Access.IsWrite);
494 } else {
495 // Since the access counts will be accumulated across the entire allocation,
496 // we only update the shadow access count for the first location and thus
497 // don't need to worry about alignment and type size.
498 instrumentAddress(I, I, Access.Addr, Access.IsWrite);
499 }
500}
501
502void MemProfiler::instrumentAddress(Instruction *OrigIns,
503 Instruction *InsertBefore, Value *Addr,
504 bool IsWrite) {
505 IRBuilder<> IRB(InsertBefore);
506 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
507
508 if (ClUseCalls) {
509 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong);
510 return;
511 }
512
513 Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C);
514 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
515
516 Value *ShadowPtr = memToShadow(AddrLong, IRB);
517 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
518 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
519 // If we are profiling with histograms, add overflow protection at 255.
520 if (ClHistogram) {
521 Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255);
522 Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount);
523 Instruction *IncBlock =
524 SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false);
525 IRB.SetInsertPoint(IncBlock);
526 }
527 Value *Inc = ConstantInt::get(ShadowTy, 1);
528 ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
529 IRB.CreateStore(ShadowValue, ShadowAddr);
530}
531
532// Create the variable for the profile file name.
534 const MDString *MemProfFilename =
535 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename"));
536 if (!MemProfFilename)
537 return;
538 assert(!MemProfFilename->getString().empty() &&
539 "Unexpected MemProfProfileFilename metadata with empty string");
540 Constant *ProfileNameConst = ConstantDataArray::getString(
541 M.getContext(), MemProfFilename->getString(), true);
542 GlobalVariable *ProfileNameVar = new GlobalVariable(
543 M, ProfileNameConst->getType(), /*isConstant=*/true,
545 Triple TT(M.getTargetTriple());
546 if (TT.supportsCOMDAT()) {
548 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar));
549 }
550}
551
552// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
553// to the runtime, changing shadow count behavior.
555 const StringRef VarName(MemProfHistogramFlagVar);
556 Type *IntTy1 = Type::getInt1Ty(M.getContext());
557 auto MemprofHistogramFlag = new GlobalVariable(
558 M, IntTy1, true, GlobalValue::WeakAnyLinkage,
559 Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName);
560 Triple TT(M.getTargetTriple());
561 if (TT.supportsCOMDAT()) {
562 MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage);
563 MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName));
564 }
565 appendToCompilerUsed(M, MemprofHistogramFlag);
566}
567
570 M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true);
571 GlobalVariable *OptionsVar =
572 new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true,
573 GlobalValue::WeakAnyLinkage, OptionsConst,
574 "__memprof_default_options_str");
575 Triple TT(M.getTargetTriple());
576 if (TT.supportsCOMDAT()) {
578 OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName()));
579 }
580}
581
582bool ModuleMemProfiler::instrumentModule(Module &M) {
583
584 // Create a module constructor.
585 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);
586 std::string VersionCheckName =
588 : "";
589 std::tie(MemProfCtorFunction, std::ignore) =
591 MemProfInitName, /*InitArgTypes=*/{},
592 /*InitArgs=*/{}, VersionCheckName);
593
594 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple);
595 appendToGlobalCtors(M, MemProfCtorFunction, Priority);
596
598
600
602
603 return true;
604}
605
606void MemProfiler::initializeCallbacks(Module &M) {
607 IRBuilder<> IRB(*C);
608
609 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
610 const std::string TypeStr = AccessIsWrite ? "store" : "load";
611 const std::string HistPrefix = ClHistogram ? "hist_" : "";
612
613 SmallVector<Type *, 2> Args1{1, IntptrTy};
614 MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(
615 ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr,
616 FunctionType::get(IRB.getVoidTy(), Args1, false));
617 }
618 MemProfMemmove = M.getOrInsertFunction(
619 ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
620 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",
621 PtrTy, PtrTy, PtrTy, IntptrTy);
622 MemProfMemset =
623 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy,
624 PtrTy, IRB.getInt32Ty(), IntptrTy);
625}
626
627bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
628 // For each NSObject descendant having a +load method, this method is invoked
629 // by the ObjC runtime before any of the static constructors is called.
630 // Therefore we need to instrument such methods with a call to __memprof_init
631 // at the beginning in order to initialize our runtime before any access to
632 // the shadow memory.
633 // We cannot just ignore these methods, because they may call other
634 // instrumented functions.
635 if (F.getName().contains(" load]")) {
636 FunctionCallee MemProfInitFunction =
638 IRBuilder<> IRB(&F.front(), F.front().begin());
639 IRB.CreateCall(MemProfInitFunction, {});
640 return true;
641 }
642 return false;
643}
644
645bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {
646 IRBuilder<> IRB(&F.front().front());
647 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
649 if (F.getParent()->getPICLevel() == PICLevel::NotPIC)
650 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true);
651 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
652 return true;
653}
654
655bool MemProfiler::instrumentFunction(Function &F) {
656 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
657 return false;
658 if (ClDebugFunc == F.getName())
659 return false;
660 if (F.getName().starts_with("__memprof_"))
661 return false;
662
663 bool FunctionModified = false;
664
665 // If needed, insert __memprof_init.
666 // This function needs to be called even if the function body is not
667 // instrumented.
668 if (maybeInsertMemProfInitAtFunctionEntry(F))
669 FunctionModified = true;
670
671 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n");
672
673 initializeCallbacks(*F.getParent());
674
676
677 // Fill the set of memory operations to instrument.
678 for (auto &BB : F) {
679 for (auto &Inst : BB) {
680 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst))
681 ToInstrument.push_back(&Inst);
682 }
683 }
684
685 if (ToInstrument.empty()) {
686 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified
687 << " " << F << "\n");
688
689 return FunctionModified;
690 }
691
692 FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
693
694 int NumInstrumented = 0;
695 for (auto *Inst : ToInstrument) {
696 if (ClDebugMin < 0 || ClDebugMax < 0 ||
697 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
698 std::optional<InterestingMemoryAccess> Access =
699 isInterestingMemoryAccess(Inst);
700 if (Access)
701 instrumentMop(Inst, F.getDataLayout(), *Access);
702 else
703 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
704 }
705 NumInstrumented++;
706 }
707
708 if (NumInstrumented > 0)
709 FunctionModified = true;
710
711 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " "
712 << F << "\n");
713
714 return FunctionModified;
715}
716
718 ArrayRef<uint64_t> InlinedCallStack,
719 LLVMContext &Ctx) {
720 I.setMetadata(LLVMContext::MD_callsite,
721 buildCallstackMetadata(InlinedCallStack, Ctx));
722}
723
725 uint32_t Column) {
728 HashBuilder.add(Function, LineOffset, Column);
730 uint64_t Id;
731 std::memcpy(&Id, Hash.data(), sizeof(Hash));
732 return Id;
733}
734
737}
738
739// Helper to generate a single hash id for a given callstack, used for emitting
740// matching statistics and useful for uniquing such statistics across modules.
744 for (auto &F : CallStack)
745 HashBuilder.add(F.Function, F.LineOffset, F.Column);
747 uint64_t Id;
748 std::memcpy(&Id, Hash.data(), sizeof(Hash));
749 return Id;
750}
751
754 uint64_t FullStackId) {
755 SmallVector<uint64_t> StackIds;
756 for (const auto &StackFrame : AllocInfo->CallStack)
757 StackIds.push_back(computeStackId(StackFrame));
758 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
759 AllocInfo->Info.getAllocCount(),
760 AllocInfo->Info.getTotalLifetime());
761 std::vector<ContextTotalSize> ContextSizeInfo;
763 auto TotalSize = AllocInfo->Info.getTotalSize();
764 assert(TotalSize);
765 assert(FullStackId != 0);
766 ContextSizeInfo.push_back({FullStackId, TotalSize});
767 }
768 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
769 return AllocType;
770}
771
772// Helper to compare the InlinedCallStack computed from an instruction's debug
773// info to a list of Frames from profile data (either the allocation data or a
774// callsite). For callsites, the StartIndex to use in the Frame array may be
775// non-zero.
776static bool
778 ArrayRef<uint64_t> InlinedCallStack) {
779 auto StackFrame = ProfileCallStack.begin();
780 auto InlCallStackIter = InlinedCallStack.begin();
781 for (; StackFrame != ProfileCallStack.end() &&
782 InlCallStackIter != InlinedCallStack.end();
783 ++StackFrame, ++InlCallStackIter) {
784 uint64_t StackId = computeStackId(*StackFrame);
785 if (StackId != *InlCallStackIter)
786 return false;
787 }
788 // Return true if we found and matched all stack ids from the call
789 // instruction.
790 return InlCallStackIter == InlinedCallStack.end();
791}
792
793static bool isAllocationWithHotColdVariant(const Function *Callee,
794 const TargetLibraryInfo &TLI) {
795 if (!Callee)
796 return false;
797 LibFunc Func;
798 if (!TLI.getLibFunc(*Callee, Func))
799 return false;
800 switch (Func) {
801 case LibFunc_Znwm:
802 case LibFunc_ZnwmRKSt9nothrow_t:
803 case LibFunc_ZnwmSt11align_val_t:
804 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
805 case LibFunc_Znam:
806 case LibFunc_ZnamRKSt9nothrow_t:
807 case LibFunc_ZnamSt11align_val_t:
808 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
809 case LibFunc_size_returning_new:
810 case LibFunc_size_returning_new_aligned:
811 return true;
812 case LibFunc_Znwm12__hot_cold_t:
813 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
814 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
815 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
816 case LibFunc_Znam12__hot_cold_t:
817 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
818 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
819 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
820 case LibFunc_size_returning_new_hot_cold:
821 case LibFunc_size_returning_new_aligned_hot_cold:
823 default:
824 return false;
825 }
826}
827
829 uint64_t TotalSize = 0;
830 AllocationType AllocType = AllocationType::None;
831 bool Matched = false;
832};
833
836 function_ref<bool(uint64_t)> IsPresentInProfile) {
838
839 auto GetOffset = [](const DILocation *DIL) {
840 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
841 0xffff;
842 };
843
844 for (Function &F : M) {
845 if (F.isDeclaration())
846 continue;
847
848 for (auto &BB : F) {
849 for (auto &I : BB) {
850 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
851 continue;
852
853 auto *CB = dyn_cast<CallBase>(&I);
854 auto *CalledFunction = CB->getCalledFunction();
855 // Disregard indirect calls and intrinsics.
856 if (!CalledFunction || CalledFunction->isIntrinsic())
857 continue;
858
859 StringRef CalleeName = CalledFunction->getName();
860 // True if we are calling a heap allocation function that supports
861 // hot/cold variants.
862 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
863 // True for the first iteration below, indicating that we are looking at
864 // a leaf node.
865 bool IsLeaf = true;
866 for (const DILocation *DIL = I.getDebugLoc(); DIL;
867 DIL = DIL->getInlinedAt()) {
868 StringRef CallerName = DIL->getSubprogramLinkageName();
869 assert(!CallerName.empty() &&
870 "Be sure to enable -fdebug-info-for-profiling");
871 uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
872 uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
873 // Pretend that we are calling a function with GUID == 0 if we are
874 // in the inline stack leading to a heap allocation function.
875 if (IsAlloc) {
876 if (IsLeaf) {
877 // For leaf nodes, set CalleeGUID to 0 without consulting
878 // IsPresentInProfile.
879 CalleeGUID = 0;
880 } else if (!IsPresentInProfile(CalleeGUID)) {
881 // In addition to the leaf case above, continue to set CalleeGUID
882 // to 0 as long as we don't see CalleeGUID in the profile.
883 CalleeGUID = 0;
884 } else {
885 // Once we encounter a callee that exists in the profile, stop
886 // setting CalleeGUID to 0.
887 IsAlloc = false;
888 }
889 }
890
891 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
892 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
893 CalleeName = CallerName;
894 IsLeaf = false;
895 }
896 }
897 }
898 }
899
900 // Sort each call list by the source location.
901 for (auto &[CallerGUID, CallList] : Calls) {
902 llvm::sort(CallList);
903 CallList.erase(llvm::unique(CallList), CallList.end());
904 }
905
906 return Calls;
907}
908
911 const TargetLibraryInfo &TLI) {
913
915 MemProfReader->getMemProfCallerCalleePairs();
917 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
918 return CallsFromProfile.contains(GUID);
919 });
920
921 // Compute an undrift map for each CallerGUID.
922 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
923 auto It = CallsFromProfile.find(CallerGUID);
924 if (It == CallsFromProfile.end())
925 continue;
926 const auto &ProfileAnchors = It->second;
927
928 LocToLocMap Matchings;
929 longestCommonSequence<LineLocation, GlobalValue::GUID>(
930 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
931 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
932 bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second;
933
934 // The insertion must succeed because we visit each GUID exactly once.
935 assert(Inserted);
936 (void)Inserted;
937 }
938
939 return UndriftMaps;
940}
941
942// Given a MemProfRecord, undrift all the source locations present in the
943// record in place.
944static void
946 memprof::MemProfRecord &MemProfRec) {
947 // Undrift a call stack in place.
948 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
949 for (auto &F : CallStack) {
950 auto I = UndriftMaps.find(F.Function);
951 if (I == UndriftMaps.end())
952 continue;
953 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
954 if (J == I->second.end())
955 continue;
956 auto &NewLoc = J->second;
957 F.LineOffset = NewLoc.LineOffset;
958 F.Column = NewLoc.Column;
959 }
960 };
961
962 for (auto &AS : MemProfRec.AllocSites)
963 UndriftCallStack(AS.CallStack);
964
965 for (auto &CS : MemProfRec.CallSites)
966 UndriftCallStack(CS);
967}
968
969static void
971 const TargetLibraryInfo &TLI,
972 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973 DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
974 auto &Ctx = M.getContext();
975 // Previously we used getIRPGOFuncName() here. If F is local linkage,
976 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
977 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
978 // contain FileName's prefix. It caused local linkage function can't
979 // find MemProfRecord. So we use getName() now.
980 // 'unique-internal-linkage-names' can make MemProf work better for local
981 // linkage function.
982 auto FuncName = F.getName();
983 auto FuncGUID = Function::getGUID(FuncName);
984 std::optional<memprof::MemProfRecord> MemProfRec;
985 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
986 if (Err) {
987 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
988 auto Err = IPE.get();
989 bool SkipWarning = false;
990 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
991 << ": ");
993 NumOfMemProfMissing++;
994 SkipWarning = !PGOWarnMissing;
995 LLVM_DEBUG(dbgs() << "unknown function");
996 } else if (Err == instrprof_error::hash_mismatch) {
997 NumOfMemProfMismatch++;
998 SkipWarning =
1001 (F.hasComdat() ||
1003 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1004 }
1005
1006 if (SkipWarning)
1007 return;
1008
1009 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
1010 Twine(" Hash = ") + std::to_string(FuncGUID))
1011 .str();
1012
1013 Ctx.diagnose(
1014 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1015 });
1016 return;
1017 }
1018
1019 NumOfMemProfFunc++;
1020
1021 // If requested, undrfit MemProfRecord so that the source locations in it
1022 // match those in the IR.
1024 undriftMemProfRecord(UndriftMaps, *MemProfRec);
1025
1026 // Detect if there are non-zero column numbers in the profile. If not,
1027 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
1028 // columns in the IR). The profiled binary might have been built with
1029 // column numbers disabled, for example.
1030 bool ProfileHasColumns = false;
1031
1032 // Build maps of the location hash to all profile data with that leaf location
1033 // (allocation info and the callsites).
1034 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
1035 // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
1036 struct CallStackHash {
1037 size_t operator()(ArrayRef<Frame> CS) const {
1038 return computeFullStackId(CS);
1039 }
1040 };
1041 // For the callsites we need to record slices of the frame array (see comments
1042 // below where the map entries are added).
1043 std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
1044 LocHashToCallSites;
1045 for (auto &AI : MemProfRec->AllocSites) {
1046 NumOfMemProfAllocContextProfiles++;
1047 // Associate the allocation info with the leaf frame. The later matching
1048 // code will match any inlined call sequences in the IR with a longer prefix
1049 // of call stack frames.
1050 uint64_t StackId = computeStackId(AI.CallStack[0]);
1051 LocHashToAllocInfo[StackId].insert(&AI);
1052 ProfileHasColumns |= AI.CallStack[0].Column;
1053 }
1054 for (auto &CS : MemProfRec->CallSites) {
1055 NumOfMemProfCallSiteProfiles++;
1056 // Need to record all frames from leaf up to and including this function,
1057 // as any of these may or may not have been inlined at this point.
1058 unsigned Idx = 0;
1059 for (auto &StackFrame : CS) {
1060 uint64_t StackId = computeStackId(StackFrame);
1061 LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++));
1062 ProfileHasColumns |= StackFrame.Column;
1063 // Once we find this function, we can stop recording.
1064 if (StackFrame.Function == FuncGUID)
1065 break;
1066 }
1067 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
1068 }
1069
1070 auto GetOffset = [](const DILocation *DIL) {
1071 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
1072 0xffff;
1073 };
1074
1075 // Now walk the instructions, looking up the associated profile data using
1076 // debug locations.
1077 for (auto &BB : F) {
1078 for (auto &I : BB) {
1079 if (I.isDebugOrPseudoInst())
1080 continue;
1081 // We are only interested in calls (allocation or interior call stack
1082 // context calls).
1083 auto *CI = dyn_cast<CallBase>(&I);
1084 if (!CI)
1085 continue;
1086 auto *CalledFunction = CI->getCalledFunction();
1087 if (CalledFunction && CalledFunction->isIntrinsic())
1088 continue;
1089 // List of call stack ids computed from the location hashes on debug
1090 // locations (leaf to inlined at root).
1091 SmallVector<uint64_t, 8> InlinedCallStack;
1092 // Was the leaf location found in one of the profile maps?
1093 bool LeafFound = false;
1094 // If leaf was found in a map, iterators pointing to its location in both
1095 // of the maps. It might exist in neither, one, or both (the latter case
1096 // can happen because we don't currently have discriminators to
1097 // distinguish the case when a single line/col maps to both an allocation
1098 // and another callsite).
1099 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
1100 AllocInfoIter;
1101 decltype(LocHashToCallSites)::iterator CallSitesIter;
1102 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
1103 DIL = DIL->getInlinedAt()) {
1104 // Use C++ linkage name if possible. Need to compile with
1105 // -fdebug-info-for-profiling to get linkage name.
1106 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
1107 if (Name.empty())
1108 Name = DIL->getScope()->getSubprogram()->getName();
1109 auto CalleeGUID = Function::getGUID(Name);
1110 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
1111 ProfileHasColumns ? DIL->getColumn() : 0);
1112 // Check if we have found the profile's leaf frame. If yes, collect
1113 // the rest of the call's inlined context starting here. If not, see if
1114 // we find a match further up the inlined context (in case the profile
1115 // was missing debug frames at the leaf).
1116 if (!LeafFound) {
1117 AllocInfoIter = LocHashToAllocInfo.find(StackId);
1118 CallSitesIter = LocHashToCallSites.find(StackId);
1119 if (AllocInfoIter != LocHashToAllocInfo.end() ||
1120 CallSitesIter != LocHashToCallSites.end())
1121 LeafFound = true;
1122 }
1123 if (LeafFound)
1124 InlinedCallStack.push_back(StackId);
1125 }
1126 // If leaf not in either of the maps, skip inst.
1127 if (!LeafFound)
1128 continue;
1129
1130 // First add !memprof metadata from allocation info, if we found the
1131 // instruction's leaf location in that map, and if the rest of the
1132 // instruction's locations match the prefix Frame locations on an
1133 // allocation context with the same leaf.
1134 if (AllocInfoIter != LocHashToAllocInfo.end()) {
1135 // Only consider allocations which support hinting.
1136 if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
1137 continue;
1138 // We may match this instruction's location list to multiple MIB
1139 // contexts. Add them to a Trie specialized for trimming the contexts to
1140 // the minimal needed to disambiguate contexts with unique behavior.
1141 CallStackTrie AllocTrie;
1142 uint64_t TotalSize = 0;
1143 uint64_t TotalColdSize = 0;
1144 for (auto *AllocInfo : AllocInfoIter->second) {
1145 // Check the full inlined call stack against this one.
1146 // If we found and thus matched all frames on the call, include
1147 // this MIB.
1149 InlinedCallStack)) {
1150 NumOfMemProfMatchedAllocContexts++;
1151 uint64_t FullStackId = 0;
1154 FullStackId = computeFullStackId(AllocInfo->CallStack);
1155 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
1156 TotalSize += AllocInfo->Info.getTotalSize();
1158 TotalColdSize += AllocInfo->Info.getTotalSize();
1159 // Record information about the allocation if match info printing
1160 // was requested.
1162 assert(FullStackId != 0);
1163 FullStackIdToAllocMatchInfo[FullStackId] = {
1164 AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
1165 }
1166 }
1167 }
1168 // If the threshold for the percent of cold bytes is less than 100%,
1169 // and not all bytes are cold, see if we should still hint this
1170 // allocation as cold without context sensitivity.
1171 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
1172 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
1174 "dominant");
1175 continue;
1176 }
1177
1178 // We might not have matched any to the full inlined call stack.
1179 // But if we did, create and attach metadata, or a function attribute if
1180 // all contexts have identical profiled behavior.
1181 if (!AllocTrie.empty()) {
1182 NumOfMemProfMatchedAllocs++;
1183 // MemprofMDAttached will be false if a function attribute was
1184 // attached.
1185 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
1186 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
1187 if (MemprofMDAttached) {
1188 // Add callsite metadata for the instruction's location list so that
1189 // it simpler later on to identify which part of the MIB contexts
1190 // are from this particular instruction (including during inlining,
1191 // when the callsite metadata will be updated appropriately).
1192 // FIXME: can this be changed to strip out the matching stack
1193 // context ids from the MIB contexts and not add any callsite
1194 // metadata here to save space?
1195 addCallsiteMetadata(I, InlinedCallStack, Ctx);
1196 }
1197 }
1198 continue;
1199 }
1200
1201 // Otherwise, add callsite metadata. If we reach here then we found the
1202 // instruction's leaf location in the callsites map and not the allocation
1203 // map.
1204 assert(CallSitesIter != LocHashToCallSites.end());
1205 for (auto CallStackIdx : CallSitesIter->second) {
1206 // If we found and thus matched all frames on the call, create and
1207 // attach call stack metadata.
1208 if (stackFrameIncludesInlinedCallStack(CallStackIdx,
1209 InlinedCallStack)) {
1210 NumOfMemProfMatchedCallSites++;
1211 addCallsiteMetadata(I, InlinedCallStack, Ctx);
1212 // Only need to find one with a matching call stack and add a single
1213 // callsite metadata.
1214 break;
1215 }
1216 }
1217 }
1218 }
1219}
1220
1221MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
1223 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
1224 if (!FS)
1225 this->FS = vfs::getRealFileSystem();
1226}
1227
1229 // Return immediately if the module doesn't contain any function.
1230 if (M.empty())
1231 return PreservedAnalyses::all();
1232
1233 LLVM_DEBUG(dbgs() << "Read in memory profile:");
1234 auto &Ctx = M.getContext();
1235 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
1236 if (Error E = ReaderOrErr.takeError()) {
1237 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1238 Ctx.diagnose(
1239 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
1240 });
1241 return PreservedAnalyses::all();
1242 }
1243
1244 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
1245 std::move(ReaderOrErr.get());
1246 if (!MemProfReader) {
1247 Ctx.diagnose(DiagnosticInfoPGOProfile(
1248 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
1249 return PreservedAnalyses::all();
1250 }
1251
1252 if (!MemProfReader->hasMemoryProfile()) {
1253 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
1254 "Not a memory profile"));
1255 return PreservedAnalyses::all();
1256 }
1257
1258 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1259
1263 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
1264
1265 // Map from the stack has of each allocation context in the function profiles
1266 // to the total profiled size (bytes), allocation type, and whether we matched
1267 // it to an allocation in the IR.
1268 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1269
1270 for (auto &F : M) {
1271 if (F.isDeclaration())
1272 continue;
1273
1275 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1276 UndriftMaps);
1277 }
1278
1280 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1281 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1282 << " context with id " << Id << " has total profiled size "
1283 << Info.TotalSize << (Info.Matched ? " is" : " not")
1284 << " matched\n";
1285 }
1286
1287 return PreservedAnalyses::none();
1288}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< int > ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), cl::Hidden, cl::init(-1))
static cl::opt< std::string > ClMemoryAccessCallbackPrefix("asan-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), cl::Hidden, cl::init("__asan_"))
static cl::opt< bool > ClInsertVersionCheck("asan-guard-against-version-mismatch", cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, cl::init(true))
static cl::opt< bool > ClInstrumentWrites("asan-instrument-writes", cl::desc("instrument write instructions"), cl::Hidden, cl::init(true))
static cl::opt< int > ClDebugMax("asan-debug-max", cl::desc("Debug max inst"), cl::Hidden, cl::init(-1))
static cl::opt< bool > ClStack("asan-stack", cl::desc("Handle stack memory"), cl::Hidden, cl::init(true))
static cl::opt< bool > ClInstrumentAtomics("asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true))
static cl::opt< int > ClMappingScale("asan-mapping-scale", cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0))
static cl::opt< std::string > ClDebugFunc("asan-debug-func", cl::Hidden, cl::desc("Debug func"))
static cl::opt< bool > ClInstrumentReads("asan-instrument-reads", cl::desc("instrument read instructions"), cl::Hidden, cl::init(true))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
DXIL Resource Access
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< int > ClMappingGranularity("memprof-mapping-granularity", cl::desc("granularity of memprof shadow mapping"), cl::Hidden, cl::init(DefaultMemGranularity))
constexpr char MemProfVersionCheckNamePrefix[]
Definition: MemProfiler.cpp:77
static cl::opt< int > ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), cl::Hidden, cl::init(-1))
void createMemprofHistogramFlagVar(Module &M)
constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority
Definition: MemProfiler.cpp:75
static uint64_t computeFullStackId(ArrayRef< Frame > CallStack)
static cl::opt< std::string > MemprofRuntimeDefaultOptions("memprof-runtime-default-options", cl::desc("The default memprof options"), cl::Hidden, cl::init(""))
static cl::opt< std::string > ClDebugFunc("memprof-debug-func", cl::Hidden, cl::desc("Debug func"))
constexpr char MemProfShadowMemoryDynamicAddress[]
Definition: MemProfiler.cpp:80
static void addCallsiteMetadata(Instruction &I, ArrayRef< uint64_t > InlinedCallStack, LLVMContext &Ctx)
static bool isAllocationWithHotColdVariant(const Function *Callee, const TargetLibraryInfo &TLI)
constexpr uint64_t MemProfCtorAndDtorPriority
Definition: MemProfiler.cpp:73
constexpr int LLVM_MEM_PROFILER_VERSION
Definition: MemProfiler.cpp:61
static cl::opt< bool > ClUseCalls("memprof-use-callbacks", cl::desc("Use callbacks instead of inline instrumentation sequences."), cl::Hidden, cl::init(false))
static cl::opt< bool > ClInstrumentAtomics("memprof-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true))
static cl::opt< bool > ClInsertVersionCheck("memprof-guard-against-version-mismatch", cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, cl::init(true))
constexpr char MemProfInitName[]
Definition: MemProfiler.cpp:76
constexpr char MemProfFilenameVar[]
Definition: MemProfiler.cpp:83
static void undriftMemProfRecord(const DenseMap< uint64_t, LocToLocMap > &UndriftMaps, memprof::MemProfRecord &MemProfRec)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > ClStack("memprof-instrument-stack", cl::desc("Instrument scalar stack variables"), cl::Hidden, cl::init(false))
static cl::opt< bool > ClHistogram("memprof-histogram", cl::desc("Collect access count histograms"), cl::Hidden, cl::init(false))
constexpr uint64_t DefaultMemGranularity
Definition: MemProfiler.cpp:64
void createMemprofDefaultOptionsVar(Module &M)
static cl::opt< bool > ClPrintMemProfMatchInfo("memprof-print-match-info", cl::desc("Print matching stats for each allocation " "context in this module's profiles"), cl::Hidden, cl::init(false))
constexpr uint64_t HistogramGranularity
Definition: MemProfiler.cpp:67
constexpr uint64_t DefaultShadowScale
Definition: MemProfiler.cpp:70
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< std::string > ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), cl::Hidden, cl::init("__memprof_"))
constexpr char MemProfModuleCtorName[]
Definition: MemProfiler.cpp:72
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > MinMatchedColdBytePercent("memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold"))
static cl::opt< bool > ClInstrumentReads("memprof-instrument-reads", cl::desc("instrument read instructions"), cl::Hidden, cl::init(true))
static cl::opt< int > ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), cl::Hidden, cl::init(-1))
static cl::opt< bool > ClInstrumentWrites("memprof-instrument-writes", cl::desc("instrument write instructions"), cl::Hidden, cl::init(true))
static cl::opt< int > ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, cl::init(0))
static cl::opt< int > ClMappingScale("memprof-mapping-scale", cl::desc("scale of memprof shadow mapping"), cl::Hidden, cl::init(DefaultShadowScale))
static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, std::map< uint64_t, AllocMatchInfo > &FullStackIdToAllocMatchInfo, DenseMap< uint64_t, LocToLocMap > &UndriftMaps)
static cl::opt< bool > ClMemProfMatchHotColdNew("memprof-match-hot-cold-new", cl::desc("Match allocation profiles onto existing hot/cold operator new calls"), cl::Hidden, cl::init(false))
static AllocationType addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo, uint64_t FullStackId)
constexpr char MemProfHistogramFlagVar[]
Definition: MemProfiler.cpp:85
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack)
AllocType
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Defines the virtual file system interface vfs::FileSystem.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
iterator begin() const
Definition: ArrayRef.h:156
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:2990
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:403
Debug location.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:226
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Diagnostic information for the PGO profiler.
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void setComdat(Comdat *C)
Definition: Globals.cpp:212
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:537
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:595
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition: HashBuilder.h:66
Interface to help hash various types through a hasher type.
Definition: HashBuilder.h:139
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilder & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Definition: HashBuilder.h:149
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2289
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2201
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2150
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1889
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1813
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1826
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2227
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Type * getVoidTy()
Fetch the type representing void.
Definition: IRBuilder.h:561
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
instrprof_error get() const
Definition: InstrProf.h:419
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:255
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
This is the common base class for memset/memcpy/memmove.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
MemProfUsePass(std::string MemoryProfileFile, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:409
bool isOSEmscripten() const
Tests whether the OS is Emscripten.
Definition: Triple.h:709
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds, std::vector< ContextTotalSize > ContextSizeInfo={})
Add a call stack context with the given allocation type to the Trie.
void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, StringRef Descriptor)
Add an attribute for the given allocation type to the call instruction.
bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, Align Alignment, TypeSize TypeStoreSize, bool IsWrite, Value *SizeArgument, bool UseCalls, bool Recover, int AsanScale, int AsanOffset)
Instrument the memory operand Addr.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DenseMap< uint64_t, LocToLocMap > computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI)
MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime)
Return the allocation type for a given set of memory profile values.
DenseMap< uint64_t, SmallVector< CallEdgeTy, 0 > > extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref< bool(uint64_t)> IsPresentInProfile=[](uint64_t) { return true;})
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition: MemProfiler.h:82
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
cl::opt< bool > PGOWarnMissing
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
Return the name of the profile section corresponding to IPSK.
Definition: InstrProf.cpp:236
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition: BLAKE3.h:35
FunctionCallee declareSanitizerInitFunction(Module &M, StringRef InitName, ArrayRef< Type * > InitArgTypes, bool Weak=false)
std::pair< Function *, FunctionCallee > createSanitizerCtorAndInitFunctions(Module &M, StringRef CtorName, StringRef InitName, ArrayRef< Type * > InitArgTypes, ArrayRef< Value * > InitArgs, StringRef VersionCheckName=StringRef(), bool Weak=false)
Creates sanitizer constructor function, and calls sanitizer's init function from it.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:57
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1506
@ DS_Warning
void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
Definition: ModuleUtils.cpp:74
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Summary of memprof metadata on allocations.
GlobalValue::GUID Function
Definition: MemProf.h:222
uint32_t LineOffset
Definition: MemProf.h:227
static GlobalValue::GUID getGUID(const StringRef FunctionName)
Definition: MemProf.cpp:251
llvm::SmallVector< std::vector< Frame > > CallSites
Definition: MemProf.h:454
llvm::SmallVector< AllocationInfo > AllocSites
Definition: MemProf.h:452