LLVM 20.0.0git
InstrProfiling.cpp
Go to the documentation of this file.
1//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10// It also builds the data structures and initialization code needed for
11// updating execution counts and emitting the profile at runtime.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
25#include "llvm/IR/Attributes.h"
26#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/CFG.h"
28#include "llvm/IR/Constant.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/DIBuilder.h"
33#include "llvm/IR/Dominators.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Module.h"
43#include "llvm/IR/Type.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Error.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <string>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "instrprof"
65
66namespace llvm {
67// Command line option to enable vtable value profiling. Defined in
68// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
70// TODO: Remove -debug-info-correlate in next LLVM release, in favor of
71// -profile-correlate=debug-info.
73 "debug-info-correlate",
74 cl::desc("Use debug info to correlate profiles. (Deprecated, use "
75 "-profile-correlate=debug-info)"),
76 cl::init(false));
77
79 "profile-correlate",
80 cl::desc("Use debug info or binary file to correlate profiles."),
83 "No profile correlation"),
85 "Use debug info to correlate"),
87 "Use binary to correlate")));
88} // namespace llvm
89
90namespace {
91
92cl::opt<bool> DoHashBasedCounterSplit(
93 "hash-based-counter-split",
94 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
95 cl::init(true));
96
98 RuntimeCounterRelocation("runtime-counter-relocation",
99 cl::desc("Enable relocating counters at runtime."),
100 cl::init(false));
101
102cl::opt<bool> ValueProfileStaticAlloc(
103 "vp-static-alloc",
104 cl::desc("Do static counter allocation for value profiler"),
105 cl::init(true));
106
107cl::opt<double> NumCountersPerValueSite(
108 "vp-counters-per-site",
109 cl::desc("The average number of profile counters allocated "
110 "per value profiling site."),
111 // This is set to a very small value because in real programs, only
112 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
113 // For those sites with non-zero profile, the average number of targets
114 // is usually smaller than 2.
115 cl::init(1.0));
116
117cl::opt<bool> AtomicCounterUpdateAll(
118 "instrprof-atomic-counter-update-all",
119 cl::desc("Make all profile counter updates atomic (for testing only)"),
120 cl::init(false));
121
122cl::opt<bool> AtomicCounterUpdatePromoted(
123 "atomic-counter-update-promoted",
124 cl::desc("Do counter update using atomic fetch add "
125 " for promoted counters only"),
126 cl::init(false));
127
128cl::opt<bool> AtomicFirstCounter(
129 "atomic-first-counter",
130 cl::desc("Use atomic fetch add for first counter in a function (usually "
131 "the entry counter)"),
132 cl::init(false));
133
134cl::opt<bool> ConditionalCounterUpdate(
135 "conditional-counter-update",
136 cl::desc("Do conditional counter updates in single byte counters mode)"),
137 cl::init(false));
138
139// If the option is not specified, the default behavior about whether
140// counter promotion is done depends on how instrumentaiton lowering
141// pipeline is setup, i.e., the default value of true of this option
142// does not mean the promotion will be done by default. Explicitly
143// setting this option can override the default behavior.
144cl::opt<bool> DoCounterPromotion("do-counter-promotion",
145 cl::desc("Do counter register promotion"),
146 cl::init(false));
147cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
148 "max-counter-promotions-per-loop", cl::init(20),
149 cl::desc("Max number counter promotions per loop to avoid"
150 " increasing register pressure too much"));
151
152// A debug option
154 MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
155 cl::desc("Max number of allowed counter promotions"));
156
157cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
158 "speculative-counter-promotion-max-exiting", cl::init(3),
159 cl::desc("The max number of exiting blocks of a loop to allow "
160 " speculative counter promotion"));
161
162cl::opt<bool> SpeculativeCounterPromotionToLoop(
163 "speculative-counter-promotion-to-loop",
164 cl::desc("When the option is false, if the target block is in a loop, "
165 "the promotion will be disallowed unless the promoted counter "
166 " update can be further/iteratively promoted into an acyclic "
167 " region."));
168
169cl::opt<bool> IterativeCounterPromotion(
170 "iterative-counter-promotion", cl::init(true),
171 cl::desc("Allow counter promotion across the whole loop nest."));
172
173cl::opt<bool> SkipRetExitBlock(
174 "skip-ret-exit-block", cl::init(true),
175 cl::desc("Suppress counter promotion if exit blocks contain ret."));
176
177static cl::opt<bool> SampledInstr("sampled-instrumentation", cl::ZeroOrMore,
178 cl::init(false),
179 cl::desc("Do PGO instrumentation sampling"));
180
181static cl::opt<unsigned> SampledInstrPeriod(
182 "sampled-instr-period",
183 cl::desc("Set the profile instrumentation sample period. A sample period "
184 "of 0 is invalid. For each sample period, a fixed number of "
185 "consecutive samples will be recorded. The number is controlled "
186 "by 'sampled-instr-burst-duration' flag. The default sample "
187 "period of 65536 is optimized for generating efficient code that "
188 "leverages unsigned short integer wrapping in overflow, but this "
189 "is disabled under simple sampling (burst duration = 1)."),
190 cl::init(USHRT_MAX + 1));
191
192static cl::opt<unsigned> SampledInstrBurstDuration(
193 "sampled-instr-burst-duration",
194 cl::desc("Set the profile instrumentation burst duration, which can range "
195 "from 1 to the value of 'sampled-instr-period' (0 is invalid). "
196 "This number of samples will be recorded for each "
197 "'sampled-instr-period' count update. Setting to 1 enables simple "
198 "sampling, in which case it is recommended to set "
199 "'sampled-instr-period' to a prime number."),
200 cl::init(200));
201
202struct SampledInstrumentationConfig {
203 unsigned BurstDuration;
204 unsigned Period;
205 bool UseShort;
206 bool IsSimpleSampling;
207 bool IsFastSampling;
208};
209
210static SampledInstrumentationConfig getSampledInstrumentationConfig() {
211 SampledInstrumentationConfig config;
212 config.BurstDuration = SampledInstrBurstDuration.getValue();
213 config.Period = SampledInstrPeriod.getValue();
214 if (config.BurstDuration > config.Period)
216 "SampledBurstDuration must be less than or equal to SampledPeriod");
217 if (config.Period == 0 || config.BurstDuration == 0)
219 "SampledPeriod and SampledBurstDuration must be greater than 0");
220 config.IsSimpleSampling = (config.BurstDuration == 1);
221 // If (BurstDuration == 1 && Period == 65536), generate the simple sampling
222 // style code.
223 config.IsFastSampling =
224 (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
225 config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
226 return config;
227}
228
229using LoadStorePair = std::pair<Instruction *, Instruction *>;
230
231static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
232 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
233 if (!MD)
234 return 0;
235
236 // If the flag is a ConstantAsMetadata, it should be an integer representable
237 // in 64-bits.
238 return cast<ConstantInt>(MD->getValue())->getZExtValue();
239}
240
241static bool enablesValueProfiling(const Module &M) {
242 return isIRPGOFlagSet(&M) ||
243 getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
244}
245
246// Conservatively returns true if value profiling is enabled.
247static bool profDataReferencedByCode(const Module &M) {
248 return enablesValueProfiling(M);
249}
250
251class InstrLowerer final {
252public:
253 InstrLowerer(Module &M, const InstrProfOptions &Options,
254 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
255 bool IsCS)
256 : M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS),
257 GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
258
259 bool lower();
260
261private:
262 Module &M;
263 const InstrProfOptions Options;
264 const Triple TT;
265 // Is this lowering for the context-sensitive instrumentation.
266 const bool IsCS;
267
268 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
269
270 const bool DataReferencedByCode;
271
272 struct PerFunctionProfileData {
273 uint32_t NumValueSites[IPVK_Last + 1] = {};
274 GlobalVariable *RegionCounters = nullptr;
275 GlobalVariable *DataVar = nullptr;
276 GlobalVariable *RegionBitmaps = nullptr;
277 uint32_t NumBitmapBytes = 0;
278
279 PerFunctionProfileData() = default;
280 };
282 // Key is virtual table variable, value is 'VTableProfData' in the form of
283 // GlobalVariable.
285 /// If runtime relocation is enabled, this maps functions to the load
286 /// instruction that produces the profile relocation bias.
287 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
288 std::vector<GlobalValue *> CompilerUsedVars;
289 std::vector<GlobalValue *> UsedVars;
290 std::vector<GlobalVariable *> ReferencedNames;
291 // The list of virtual table variables of which the VTableProfData is
292 // collected.
293 std::vector<GlobalVariable *> ReferencedVTables;
294 GlobalVariable *NamesVar = nullptr;
295 size_t NamesSize = 0;
296
297 // vector of counter load/store pairs to be register promoted.
298 std::vector<LoadStorePair> PromotionCandidates;
299
300 int64_t TotalCountersPromoted = 0;
301
302 /// Lower instrumentation intrinsics in the function. Returns true if there
303 /// any lowering.
304 bool lowerIntrinsics(Function *F);
305
306 /// Register-promote counter loads and stores in loops.
307 void promoteCounterLoadStores(Function *F);
308
309 /// Returns true if relocating counters at runtime is enabled.
310 bool isRuntimeCounterRelocationEnabled() const;
311
312 /// Returns true if profile counter update register promotion is enabled.
313 bool isCounterPromotionEnabled() const;
314
315 /// Return true if profile sampling is enabled.
316 bool isSamplingEnabled() const;
317
318 /// Count the number of instrumented value sites for the function.
319 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
320
321 /// Replace instrprof.value.profile with a call to runtime library.
322 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
323
324 /// Replace instrprof.cover with a store instruction to the coverage byte.
325 void lowerCover(InstrProfCoverInst *Inc);
326
327 /// Replace instrprof.timestamp with a call to
328 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
329 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
330
331 /// Replace instrprof.increment with an increment of the appropriate value.
332 void lowerIncrement(InstrProfIncrementInst *Inc);
333
334 /// Force emitting of name vars for unused functions.
335 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
336
337 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
338 /// using the index represented by the a temp value into a bitmap.
339 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
340
341 /// Get the Bias value for data to access mmap-ed area.
342 /// Create it if it hasn't been seen.
343 GlobalVariable *getOrCreateBiasVar(StringRef VarName);
344
345 /// Compute the address of the counter value that this profiling instruction
346 /// acts on.
347 Value *getCounterAddress(InstrProfCntrInstBase *I);
348
349 /// Lower the incremental instructions under profile sampling predicates.
350 void doSampling(Instruction *I);
351
352 /// Get the region counters for an increment, creating them if necessary.
353 ///
354 /// If the counter array doesn't yet exist, the profile data variables
355 /// referring to them will also be created.
356 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
357
358 /// Create the region counters.
359 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
362
363 /// Compute the address of the test vector bitmap that this profiling
364 /// instruction acts on.
365 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
366
367 /// Get the region bitmaps for an increment, creating them if necessary.
368 ///
369 /// If the bitmap array doesn't yet exist, the profile data variables
370 /// referring to them will also be created.
371 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
372
373 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
374 /// an MC/DC Decision region. The number of bytes required is indicated by
375 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
376 /// as part of setupProfileSection() and is conceptually very similar to
377 /// what is done for profile data counters in createRegionCounters().
378 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
381
382 /// Set Comdat property of GV, if required.
383 void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
384
385 /// Setup the sections into which counters and bitmaps are allocated.
386 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
387 InstrProfSectKind IPSK);
388
389 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
390 void createDataVariable(InstrProfCntrInstBase *Inc);
391
392 /// Get the counters for virtual table values, creating them if necessary.
393 void getOrCreateVTableProfData(GlobalVariable *GV);
394
395 /// Emit the section with compressed function names.
396 void emitNameData();
397
398 /// Emit the section with compressed vtable names.
399 void emitVTableNames();
400
401 /// Emit value nodes section for value profiling.
402 void emitVNodes();
403
404 /// Emit runtime registration functions for each profile data variable.
405 void emitRegistration();
406
407 /// Emit the necessary plumbing to pull in the runtime initialization.
408 /// Returns true if a change was made.
409 bool emitRuntimeHook();
410
411 /// Add uses of our data variables and runtime hook.
412 void emitUses();
413
414 /// Create a static initializer for our data, on platforms that need it,
415 /// and for any profile output file that was specified.
416 void emitInitialization();
417};
418
419///
420/// A helper class to promote one counter RMW operation in the loop
421/// into register update.
422///
423/// RWM update for the counter will be sinked out of the loop after
424/// the transformation.
425///
426class PGOCounterPromoterHelper : public LoadAndStorePromoter {
427public:
428 PGOCounterPromoterHelper(
430 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
431 ArrayRef<Instruction *> InsertPts,
433 LoopInfo &LI)
434 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
435 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
436 assert(isa<LoadInst>(L));
437 assert(isa<StoreInst>(S));
438 SSA.AddAvailableValue(PH, Init);
439 }
440
441 void doExtraRewritesBeforeFinalDeletion() override {
442 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
443 BasicBlock *ExitBlock = ExitBlocks[i];
444 Instruction *InsertPos = InsertPts[i];
445 // Get LiveIn value into the ExitBlock. If there are multiple
446 // predecessors, the value is defined by a PHI node in this
447 // block.
448 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
449 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
450 Type *Ty = LiveInValue->getType();
451 IRBuilder<> Builder(InsertPos);
452 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
453 // If isRuntimeCounterRelocationEnabled() is true then the address of
454 // the store instruction is computed with two instructions in
455 // InstrProfiling::getCounterAddress(). We need to copy those
456 // instructions to this block to compute Addr correctly.
457 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
458 // %Addr = inttoptr i64 %BiasAdd to i64*
459 auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
460 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
461 Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
462 Addr = Builder.CreateIntToPtr(BiasInst,
463 PointerType::getUnqual(Ty->getContext()));
464 }
465 if (AtomicCounterUpdatePromoted)
466 // automic update currently can only be promoted across the current
467 // loop, not the whole loop nest.
468 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
469 MaybeAlign(),
470 AtomicOrdering::SequentiallyConsistent);
471 else {
472 LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
473 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
474 auto *NewStore = Builder.CreateStore(NewVal, Addr);
475
476 // Now update the parent loop's candidate list:
477 if (IterativeCounterPromotion) {
478 auto *TargetLoop = LI.getLoopFor(ExitBlock);
479 if (TargetLoop)
480 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
481 }
482 }
483 }
484 }
485
486private:
488 ArrayRef<BasicBlock *> ExitBlocks;
489 ArrayRef<Instruction *> InsertPts;
491 LoopInfo &LI;
492};
493
494/// A helper class to do register promotion for all profile counter
495/// updates in a loop.
496///
497class PGOCounterPromoter {
498public:
499 PGOCounterPromoter(
501 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
502 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
503
504 // Skip collection of ExitBlocks and InsertPts for loops that will not be
505 // able to have counters promoted.
506 SmallVector<BasicBlock *, 8> LoopExitBlocks;
508
509 L.getExitBlocks(LoopExitBlocks);
510 if (!isPromotionPossible(&L, LoopExitBlocks))
511 return;
512
513 for (BasicBlock *ExitBlock : LoopExitBlocks) {
514 if (BlockSet.insert(ExitBlock).second &&
515 llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {
516 return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);
517 })) {
518 ExitBlocks.push_back(ExitBlock);
519 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
520 }
521 }
522 }
523
524 bool run(int64_t *NumPromoted) {
525 // Skip 'infinite' loops:
526 if (ExitBlocks.size() == 0)
527 return false;
528
529 // Skip if any of the ExitBlocks contains a ret instruction.
530 // This is to prevent dumping of incomplete profile -- if the
531 // the loop is a long running loop and dump is called in the middle
532 // of the loop, the result profile is incomplete.
533 // FIXME: add other heuristics to detect long running loops.
534 if (SkipRetExitBlock) {
535 for (auto *BB : ExitBlocks)
536 if (isa<ReturnInst>(BB->getTerminator()))
537 return false;
538 }
539
540 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
541 if (MaxProm == 0)
542 return false;
543
544 unsigned Promoted = 0;
545 for (auto &Cand : LoopToCandidates[&L]) {
546
548 SSAUpdater SSA(&NewPHIs);
549 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
550
551 // If BFI is set, we will use it to guide the promotions.
552 if (BFI) {
553 auto *BB = Cand.first->getParent();
554 auto InstrCount = BFI->getBlockProfileCount(BB);
555 if (!InstrCount)
556 continue;
557 auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
558 // If the average loop trip count is not greater than 1.5, we skip
559 // promotion.
560 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
561 continue;
562 }
563
564 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
565 L.getLoopPreheader(), ExitBlocks,
566 InsertPts, LoopToCandidates, LI);
567 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
568 Promoted++;
569 if (Promoted >= MaxProm)
570 break;
571
572 (*NumPromoted)++;
573 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
574 break;
575 }
576
577 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
578 << L.getLoopDepth() << ")\n");
579 return Promoted != 0;
580 }
581
582private:
583 bool allowSpeculativeCounterPromotion(Loop *LP) {
584 SmallVector<BasicBlock *, 8> ExitingBlocks;
585 L.getExitingBlocks(ExitingBlocks);
586 // Not considierered speculative.
587 if (ExitingBlocks.size() == 1)
588 return true;
589 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
590 return false;
591 return true;
592 }
593
594 // Check whether the loop satisfies the basic conditions needed to perform
595 // Counter Promotions.
596 bool
597 isPromotionPossible(Loop *LP,
598 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
599 // We can't insert into a catchswitch.
600 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
601 return isa<CatchSwitchInst>(Exit->getTerminator());
602 }))
603 return false;
604
605 if (!LP->hasDedicatedExits())
606 return false;
607
608 BasicBlock *PH = LP->getLoopPreheader();
609 if (!PH)
610 return false;
611
612 return true;
613 }
614
615 // Returns the max number of Counter Promotions for LP.
616 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
617 SmallVector<BasicBlock *, 8> LoopExitBlocks;
618 LP->getExitBlocks(LoopExitBlocks);
619 if (!isPromotionPossible(LP, LoopExitBlocks))
620 return 0;
621
622 SmallVector<BasicBlock *, 8> ExitingBlocks;
623 LP->getExitingBlocks(ExitingBlocks);
624
625 // If BFI is set, we do more aggressive promotions based on BFI.
626 if (BFI)
627 return (unsigned)-1;
628
629 // Not considierered speculative.
630 if (ExitingBlocks.size() == 1)
631 return MaxNumOfPromotionsPerLoop;
632
633 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
634 return 0;
635
636 // Whether the target block is in a loop does not matter:
637 if (SpeculativeCounterPromotionToLoop)
638 return MaxNumOfPromotionsPerLoop;
639
640 // Now check the target block:
641 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
642 for (auto *TargetBlock : LoopExitBlocks) {
643 auto *TargetLoop = LI.getLoopFor(TargetBlock);
644 if (!TargetLoop)
645 continue;
646 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
647 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
648 MaxProm =
649 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
650 PendingCandsInTarget);
651 }
652 return MaxProm;
653 }
654
658 Loop &L;
659 LoopInfo &LI;
661};
662
663enum class ValueProfilingCallType {
664 // Individual values are tracked. Currently used for indiret call target
665 // profiling.
666 Default,
667
668 // MemOp: the memop size value profiling.
669 MemOp
670};
671
672} // end anonymous namespace
673
678 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
680 };
681 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
682 if (!Lowerer.lower())
683 return PreservedAnalyses::all();
684
686}
687
688//
689// Perform instrumentation sampling.
690//
691// There are 3 favors of sampling:
692// (1) Full burst sampling: We transform:
693// Increment_Instruction;
694// to:
695// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
696// Increment_Instruction;
697// }
698// __llvm_profile_sampling__ += 1;
699// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
700// __llvm_profile_sampling__ = 0;
701// }
702//
703// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
704// counters (value-instrumentation and edge instrumentation).
705//
706// (2) Fast burst sampling:
707// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
708// wrap around to zero when overflows. In this case, the second check is
709// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
710// set to 65536 (64K). The code after:
711// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
712// Increment_Instruction;
713// }
714// __llvm_profile_sampling__ += 1;
715//
716// (3) Simple sampling:
717// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
718// __llvm_profile_sampling__ += 1;
719// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
720// __llvm_profile_sampling__ = 0;
721// Increment_Instruction;
722// }
723//
724// Note that, the code snippet after the transformation can still be counter
725// promoted. However, with sampling enabled, counter updates are expected to
726// be infrequent, making the benefits of counter promotion negligible.
727// Moreover, counter promotion can potentially cause issues in server
728// applications, particularly when the counters are dumped without a clean
729// exit. To mitigate this risk, counter promotion is disabled by default when
730// sampling is enabled. This behavior can be overridden using the internal
731// option.
732void InstrLowerer::doSampling(Instruction *I) {
733 if (!isSamplingEnabled())
734 return;
735
736 SampledInstrumentationConfig config = getSampledInstrumentationConfig();
737 auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
738 if (config.UseShort)
739 return Builder.getInt16(C);
740 else
741 return Builder.getInt32(C);
742 };
743
744 IntegerType *SamplingVarTy;
745 if (config.UseShort)
746 SamplingVarTy = Type::getInt16Ty(M.getContext());
747 else
748 SamplingVarTy = Type::getInt32Ty(M.getContext());
749 auto *SamplingVar =
750 M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
751 assert(SamplingVar && "SamplingVar not set properly");
752
753 // Create the condition for checking the burst duration.
754 Instruction *SamplingVarIncr;
755 Value *NewSamplingVarVal;
756 MDBuilder MDB(I->getContext());
757 MDNode *BranchWeight;
758 IRBuilder<> CondBuilder(I);
759 auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);
760 if (config.IsSimpleSampling) {
761 // For the simple sampling, just create the load and increments.
762 IRBuilder<> IncBuilder(I);
763 NewSamplingVarVal =
764 IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
765 SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
766 } else {
767 // For the burst-sampling, create the conditional update.
768 auto *DurationCond = CondBuilder.CreateICmpULE(
769 LoadSamplingVar, GetConstant(CondBuilder, config.BurstDuration - 1));
770 BranchWeight = MDB.createBranchWeights(
771 config.BurstDuration, config.Period - config.BurstDuration);
773 DurationCond, I, /* Unreachable */ false, BranchWeight);
774 IRBuilder<> IncBuilder(I);
775 NewSamplingVarVal =
776 IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
777 SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
778 I->moveBefore(ThenTerm);
779 }
780
781 if (config.IsFastSampling)
782 return;
783
784 // Create the condition for checking the period.
785 Instruction *ThenTerm, *ElseTerm;
786 IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
787 auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
788 NewSamplingVarVal, GetConstant(PeriodCondBuilder, config.Period));
789 BranchWeight = MDB.createBranchWeights(1, config.Period - 1);
790 SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,
791 &ElseTerm, BranchWeight);
792
793 // For the simple sampling, the counter update happens in sampling var reset.
794 if (config.IsSimpleSampling)
795 I->moveBefore(ThenTerm);
796
797 IRBuilder<> ResetBuilder(ThenTerm);
798 ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar);
799 SamplingVarIncr->moveBefore(ElseTerm);
800}
801
802bool InstrLowerer::lowerIntrinsics(Function *F) {
803 bool MadeChange = false;
804 PromotionCandidates.clear();
806
807 // To ensure compatibility with sampling, we save the intrinsics into
808 // a buffer to prevent potential breakage of the iterator (as the
809 // intrinsics will be moved to a different BB).
810 for (BasicBlock &BB : *F) {
811 for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
812 if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
813 InstrProfInsts.push_back(IP);
814 }
815 }
816
817 for (auto *Instr : InstrProfInsts) {
818 doSampling(Instr);
819 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {
820 lowerIncrement(IPIS);
821 MadeChange = true;
822 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {
823 lowerIncrement(IPI);
824 MadeChange = true;
825 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {
826 lowerTimestamp(IPC);
827 MadeChange = true;
828 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {
829 lowerCover(IPC);
830 MadeChange = true;
831 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {
832 lowerValueProfileInst(IPVP);
833 MadeChange = true;
834 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {
835 IPMP->eraseFromParent();
836 MadeChange = true;
837 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {
838 lowerMCDCTestVectorBitmapUpdate(IPBU);
839 MadeChange = true;
840 }
841 }
842
843 if (!MadeChange)
844 return false;
845
846 promoteCounterLoadStores(F);
847 return true;
848}
849
850bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
851 // Mach-O don't support weak external references.
852 if (TT.isOSBinFormatMachO())
853 return false;
854
855 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
856 return RuntimeCounterRelocation;
857
858 // Fuchsia uses runtime counter relocation by default.
859 return TT.isOSFuchsia();
860}
861
862bool InstrLowerer::isSamplingEnabled() const {
863 if (SampledInstr.getNumOccurrences() > 0)
864 return SampledInstr;
865 return Options.Sampling;
866}
867
868bool InstrLowerer::isCounterPromotionEnabled() const {
869 if (DoCounterPromotion.getNumOccurrences() > 0)
870 return DoCounterPromotion;
871
872 return Options.DoCounterPromotion;
873}
874
875void InstrLowerer::promoteCounterLoadStores(Function *F) {
876 if (!isCounterPromotionEnabled())
877 return;
878
879 DominatorTree DT(*F);
880 LoopInfo LI(DT);
881 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
882
883 std::unique_ptr<BlockFrequencyInfo> BFI;
884 if (Options.UseBFIInPromotion) {
885 std::unique_ptr<BranchProbabilityInfo> BPI;
886 BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
887 BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
888 }
889
890 for (const auto &LoadStore : PromotionCandidates) {
891 auto *CounterLoad = LoadStore.first;
892 auto *CounterStore = LoadStore.second;
893 BasicBlock *BB = CounterLoad->getParent();
894 Loop *ParentLoop = LI.getLoopFor(BB);
895 if (!ParentLoop)
896 continue;
897 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
898 }
899
901
902 // Do a post-order traversal of the loops so that counter updates can be
903 // iteratively hoisted outside the loop nest.
904 for (auto *Loop : llvm::reverse(Loops)) {
905 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
906 Promoter.run(&TotalCountersPromoted);
907 }
908}
909
911 // On Fuchsia, we only need runtime hook if any counters are present.
912 if (TT.isOSFuchsia())
913 return false;
914
915 return true;
916}
917
918/// Check if the module contains uses of any profiling intrinsics.
920 auto containsIntrinsic = [&](int ID) {
921 if (auto *F = Intrinsic::getDeclarationIfExists(&M, ID))
922 return !F->use_empty();
923 return false;
924 };
925 return containsIntrinsic(Intrinsic::instrprof_cover) ||
926 containsIntrinsic(Intrinsic::instrprof_increment) ||
927 containsIntrinsic(Intrinsic::instrprof_increment_step) ||
928 containsIntrinsic(Intrinsic::instrprof_timestamp) ||
929 containsIntrinsic(Intrinsic::instrprof_value_profile);
930}
931
932bool InstrLowerer::lower() {
933 bool MadeChange = false;
934 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
935 if (NeedsRuntimeHook)
936 MadeChange = emitRuntimeHook();
937
938 if (!IsCS && isSamplingEnabled())
940
941 bool ContainsProfiling = containsProfilingIntrinsics(M);
942 GlobalVariable *CoverageNamesVar =
943 M.getNamedGlobal(getCoverageUnusedNamesVarName());
944 // Improve compile time by avoiding linear scans when there is no work.
945 if (!ContainsProfiling && !CoverageNamesVar)
946 return MadeChange;
947
948 // We did not know how many value sites there would be inside
949 // the instrumented function. This is counting the number of instrumented
950 // target value sites to enter it as field in the profile data variable.
951 for (Function &F : M) {
952 InstrProfCntrInstBase *FirstProfInst = nullptr;
953 for (BasicBlock &BB : F) {
954 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
955 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
956 computeNumValueSiteCounts(Ind);
957 else {
958 if (FirstProfInst == nullptr &&
959 (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))
960 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);
961 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
962 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))
963 static_cast<void>(getOrCreateRegionBitmaps(Params));
964 }
965 }
966 }
967
968 // Use a profile intrinsic to create the region counters and data variable.
969 // Also create the data variable based on the MCDCParams.
970 if (FirstProfInst != nullptr) {
971 static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
972 }
973 }
974
976 for (GlobalVariable &GV : M.globals())
977 // Global variables with type metadata are virtual table variables.
978 if (GV.hasMetadata(LLVMContext::MD_type))
979 getOrCreateVTableProfData(&GV);
980
981 for (Function &F : M)
982 MadeChange |= lowerIntrinsics(&F);
983
984 if (CoverageNamesVar) {
985 lowerCoverageData(CoverageNamesVar);
986 MadeChange = true;
987 }
988
989 if (!MadeChange)
990 return false;
991
992 emitVNodes();
993 emitNameData();
994 emitVTableNames();
995
996 // Emit runtime hook for the cases where the target does not unconditionally
997 // require pulling in profile runtime, and coverage is enabled on code that is
998 // not eliminated by the front-end, e.g. unused functions with internal
999 // linkage.
1000 if (!NeedsRuntimeHook && ContainsProfiling)
1001 emitRuntimeHook();
1002
1003 emitRegistration();
1004 emitUses();
1005 emitInitialization();
1006 return true;
1007}
1008
1010 Module &M, const TargetLibraryInfo &TLI,
1011 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1012 LLVMContext &Ctx = M.getContext();
1013 auto *ReturnTy = Type::getVoidTy(M.getContext());
1014
1015 AttributeList AL;
1016 if (auto AK = TLI.getExtAttrForI32Param(false))
1017 AL = AL.addParamAttribute(M.getContext(), 2, AK);
1018
1019 assert((CallType == ValueProfilingCallType::Default ||
1020 CallType == ValueProfilingCallType::MemOp) &&
1021 "Must be Default or MemOp");
1022 Type *ParamTypes[] = {
1023#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1025 };
1026 auto *ValueProfilingCallTy =
1027 FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);
1028 StringRef FuncName = CallType == ValueProfilingCallType::Default
1031 return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
1032}
1033
1034void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1035 GlobalVariable *Name = Ind->getName();
1037 uint64_t Index = Ind->getIndex()->getZExtValue();
1038 auto &PD = ProfileDataMap[Name];
1039 PD.NumValueSites[ValueKind] =
1040 std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
1041}
1042
1043void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1044 // TODO: Value profiling heavily depends on the data section which is omitted
1045 // in lightweight mode. We need to move the value profile pointer to the
1046 // Counter struct to get this working.
1047 assert(
1049 "Value profiling is not yet supported with lightweight instrumentation");
1050 GlobalVariable *Name = Ind->getName();
1051 auto It = ProfileDataMap.find(Name);
1052 assert(It != ProfileDataMap.end() && It->second.DataVar &&
1053 "value profiling detected in function with no counter incerement");
1054
1055 GlobalVariable *DataVar = It->second.DataVar;
1057 uint64_t Index = Ind->getIndex()->getZExtValue();
1058 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1059 Index += It->second.NumValueSites[Kind];
1060
1061 IRBuilder<> Builder(Ind);
1062 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1063 llvm::InstrProfValueKind::IPVK_MemOPSize);
1064 CallInst *Call = nullptr;
1065 auto *TLI = &GetTLI(*Ind->getFunction());
1066 auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1067 DataVar, PointerType::get(M.getContext(), 0));
1068
1069 // To support value profiling calls within Windows exception handlers, funclet
1070 // information contained within operand bundles needs to be copied over to
1071 // the library call. This is required for the IR to be processed by the
1072 // WinEHPrepare pass.
1074 Ind->getOperandBundlesAsDefs(OpBundles);
1075 if (!IsMemOpSize) {
1076 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1077 Builder.getInt32(Index)};
1078 Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
1079 OpBundles);
1080 } else {
1081 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1082 Builder.getInt32(Index)};
1083 Call = Builder.CreateCall(
1084 getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
1085 Args, OpBundles);
1086 }
1087 if (auto AK = TLI->getExtAttrForI32Param(false))
1088 Call->addParamAttr(2, AK);
1089 Ind->replaceAllUsesWith(Call);
1090 Ind->eraseFromParent();
1091}
1092
1093GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1094 GlobalVariable *Bias = M.getGlobalVariable(VarName);
1095 if (Bias)
1096 return Bias;
1097
1098 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1099
1100 // Compiler must define this variable when runtime counter relocation
1101 // is being used. Runtime has a weak external reference that is used
1102 // to check whether that's the case or not.
1103 Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1104 Constant::getNullValue(Int64Ty), VarName);
1106 // A definition that's weak (linkonce_odr) without being in a COMDAT
1107 // section wouldn't lead to link errors, but it would lead to a dead
1108 // data word from every TU but one. Putting it in COMDAT ensures there
1109 // will be exactly one data slot in the link.
1110 if (TT.supportsCOMDAT())
1111 Bias->setComdat(M.getOrInsertComdat(VarName));
1112
1113 return Bias;
1114}
1115
1116Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1117 auto *Counters = getOrCreateRegionCounters(I);
1118 IRBuilder<> Builder(I);
1119
1120 if (isa<InstrProfTimestampInst>(I))
1121 Counters->setAlignment(Align(8));
1122
1123 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1124 Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
1125
1126 if (!isRuntimeCounterRelocationEnabled())
1127 return Addr;
1128
1129 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1130 Function *Fn = I->getParent()->getParent();
1131 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1132 if (!BiasLI) {
1133 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1134 auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName());
1135 BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias");
1136 // Bias doesn't change after startup.
1137 BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1138 MDNode::get(M.getContext(), {}));
1139 }
1140 auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
1141 return Builder.CreateIntToPtr(Add, Addr->getType());
1142}
1143
1144Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1145 auto *Bitmaps = getOrCreateRegionBitmaps(I);
1146 if (!isRuntimeCounterRelocationEnabled())
1147 return Bitmaps;
1148
1149 // Put BiasLI onto the entry block.
1150 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1151 Function *Fn = I->getFunction();
1152 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1153 auto *Bias = getOrCreateBiasVar(getInstrProfBitmapBiasVarName());
1154 auto *BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profbm_bias");
1155 // Assume BiasLI invariant (in the function at least)
1156 BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1157 MDNode::get(M.getContext(), {}));
1158
1159 // Add Bias to Bitmaps and put it before the intrinsic.
1160 IRBuilder<> Builder(I);
1161 return Builder.CreatePtrAdd(Bitmaps, BiasLI, "profbm_addr");
1162}
1163
1164void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1165 auto *Addr = getCounterAddress(CoverInstruction);
1166 IRBuilder<> Builder(CoverInstruction);
1167 if (ConditionalCounterUpdate) {
1168 Instruction *SplitBefore = CoverInstruction->getNextNode();
1169 auto &Ctx = CoverInstruction->getParent()->getContext();
1170 auto *Int8Ty = llvm::Type::getInt8Ty(Ctx);
1171 Value *Load = Builder.CreateLoad(Int8Ty, Addr, "pgocount");
1172 Value *Cmp = Builder.CreateIsNotNull(Load, "pgocount.ifnonzero");
1173 Instruction *ThenBranch =
1174 SplitBlockAndInsertIfThen(Cmp, SplitBefore, false);
1175 Builder.SetInsertPoint(ThenBranch);
1176 }
1177
1178 // We store zero to represent that this block is covered.
1179 Builder.CreateStore(Builder.getInt8(0), Addr);
1180 CoverInstruction->eraseFromParent();
1181}
1182
1183void InstrLowerer::lowerTimestamp(
1184 InstrProfTimestampInst *TimestampInstruction) {
1185 assert(TimestampInstruction->getIndex()->isZeroValue() &&
1186 "timestamp probes are always the first probe for a function");
1187 auto &Ctx = M.getContext();
1188 auto *TimestampAddr = getCounterAddress(TimestampInstruction);
1189 IRBuilder<> Builder(TimestampInstruction);
1190 auto *CalleeTy =
1191 FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
1192 auto Callee = M.getOrInsertFunction(
1193 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy);
1194 Builder.CreateCall(Callee, {TimestampAddr});
1195 TimestampInstruction->eraseFromParent();
1196}
1197
1198void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1199 auto *Addr = getCounterAddress(Inc);
1200
1201 IRBuilder<> Builder(Inc);
1202 if (Options.Atomic || AtomicCounterUpdateAll ||
1203 (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
1206 } else {
1207 Value *IncStep = Inc->getStep();
1208 Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
1209 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
1210 auto *Store = Builder.CreateStore(Count, Addr);
1211 if (isCounterPromotionEnabled())
1212 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
1213 }
1214 Inc->eraseFromParent();
1215}
1216
1217void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1218 ConstantArray *Names =
1219 cast<ConstantArray>(CoverageNamesVar->getInitializer());
1220 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1221 Constant *NC = Names->getOperand(I);
1222 Value *V = NC->stripPointerCasts();
1223 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1224 GlobalVariable *Name = cast<GlobalVariable>(V);
1225
1226 Name->setLinkage(GlobalValue::PrivateLinkage);
1227 ReferencedNames.push_back(Name);
1228 if (isa<ConstantExpr>(NC))
1229 NC->dropAllReferences();
1230 }
1231 CoverageNamesVar->eraseFromParent();
1232}
1233
1234void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1236 auto &Ctx = M.getContext();
1237 IRBuilder<> Builder(Update);
1238 auto *Int8Ty = Type::getInt8Ty(Ctx);
1239 auto *Int32Ty = Type::getInt32Ty(Ctx);
1240 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1241 auto *BitmapAddr = getBitmapAddress(Update);
1242
1243 // Load Temp Val + BitmapIdx.
1244 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1245 auto *Temp = Builder.CreateAdd(
1246 Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"),
1247 Update->getBitmapIndex());
1248
1249 // Calculate byte offset using div8.
1250 // %1 = lshr i32 %mcdc.temp, 3
1251 auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);
1252
1253 // Add byte offset to section base byte address.
1254 // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1255 auto *BitmapByteAddr =
1256 Builder.CreateInBoundsPtrAdd(BitmapAddr, BitmapByteOffset);
1257
1258 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1259 // %5 = and i32 %mcdc.temp, 7
1260 // %6 = trunc i32 %5 to i8
1261 auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);
1262
1263 // Shift bit offset left to form a bitmap.
1264 // %7 = shl i8 1, %6
1265 auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);
1266
1267 // Load profile bitmap byte.
1268 // %mcdc.bits = load i8, ptr %4, align 1
1269 auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits");
1270
1271 if (Options.Atomic || AtomicCounterUpdateAll) {
1272 // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1273 // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1274 // early testing.
1275 auto *Masked = Builder.CreateAnd(Bitmap, ShiftedVal);
1276 auto *ShouldStore = Builder.CreateICmpNE(Masked, ShiftedVal);
1277
1278 // Assume updating will be rare.
1279 auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1280 Instruction *ThenBranch =
1281 SplitBlockAndInsertIfThen(ShouldStore, Update, false, Unlikely);
1282
1283 // Execute if (unlikely(ShouldStore)).
1284 Builder.SetInsertPoint(ThenBranch);
1285 Builder.CreateAtomicRMW(AtomicRMWInst::Or, BitmapByteAddr, ShiftedVal,
1287 } else {
1288 // Perform logical OR of profile bitmap byte and shifted bit offset.
1289 // %8 = or i8 %mcdc.bits, %7
1290 auto *Result = Builder.CreateOr(Bitmap, ShiftedVal);
1291
1292 // Store the updated profile bitmap byte.
1293 // store i8 %8, ptr %3, align 1
1294 Builder.CreateStore(Result, BitmapByteAddr);
1295 }
1296
1297 Update->eraseFromParent();
1298}
1299
1300/// Get the name of a profiling variable for a particular function.
1301static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1302 bool &Renamed) {
1303 StringRef NamePrefix = getInstrProfNameVarPrefix();
1304 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
1305 Function *F = Inc->getParent()->getParent();
1306 Module *M = F->getParent();
1307 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1309 Renamed = false;
1310 return (Prefix + Name).str();
1311 }
1312 Renamed = true;
1313 uint64_t FuncHash = Inc->getHash()->getZExtValue();
1314 SmallVector<char, 24> HashPostfix;
1315 if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
1316 return (Prefix + Name).str();
1317 return (Prefix + Name + "." + Twine(FuncHash)).str();
1318}
1319
1321 // Only record function addresses if IR PGO is enabled or if clang value
1322 // profiling is enabled. Recording function addresses greatly increases object
1323 // file size, because it prevents the inliner from deleting functions that
1324 // have been inlined everywhere.
1325 if (!profDataReferencedByCode(*F->getParent()))
1326 return false;
1327
1328 // Check the linkage
1329 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1330 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1331 !HasAvailableExternallyLinkage)
1332 return true;
1333
1334 // A function marked 'alwaysinline' with available_externally linkage can't
1335 // have its address taken. Doing so would create an undefined external ref to
1336 // the function, which would fail to link.
1337 if (HasAvailableExternallyLinkage &&
1338 F->hasFnAttribute(Attribute::AlwaysInline))
1339 return false;
1340
1341 // Prohibit function address recording if the function is both internal and
1342 // COMDAT. This avoids the profile data variable referencing internal symbols
1343 // in COMDAT.
1344 if (F->hasLocalLinkage() && F->hasComdat())
1345 return false;
1346
1347 // Check uses of this function for other than direct calls or invokes to it.
1348 // Inline virtual functions have linkeOnceODR linkage. When a key method
1349 // exists, the vtable will only be emitted in the TU where the key method
1350 // is defined. In a TU where vtable is not available, the function won't
1351 // be 'addresstaken'. If its address is not recorded here, the profile data
1352 // with missing address may be picked by the linker leading to missing
1353 // indirect call target info.
1354 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1355}
1356
1357static inline bool shouldUsePublicSymbol(Function *Fn) {
1358 // It isn't legal to make an alias of this function at all
1359 if (Fn->isDeclarationForLinker())
1360 return true;
1361
1362 // Symbols with local linkage can just use the symbol directly without
1363 // introducing relocations
1364 if (Fn->hasLocalLinkage())
1365 return true;
1366
1367 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1368 // unfavorable interaction between the new alias and the alias renaming done
1369 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1370 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1371 // it creates unique names for each alias, resulting in duplicated symbols. In
1372 // the future, we should update the CFI related passes to migrate these
1373 // aliases to the same module as the jump-table they refer to will be defined.
1374 if (Fn->hasMetadata(LLVMContext::MD_type))
1375 return true;
1376
1377 // For comdat functions, an alias would need the same linkage as the original
1378 // function and hidden visibility. There is no point in adding an alias with
1379 // identical linkage an visibility to avoid introducing symbolic relocations.
1380 if (Fn->hasComdat() &&
1382 return true;
1383
1384 // its OK to use an alias
1385 return false;
1386}
1387
1389 auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());
1390 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1391 if (!shouldRecordFunctionAddr(Fn))
1392 return ConstantPointerNull::get(Int8PtrTy);
1393
1394 // If we can't use an alias, we must use the public symbol, even though this
1395 // may require a symbolic relocation.
1396 if (shouldUsePublicSymbol(Fn))
1397 return Fn;
1398
1399 // When possible use a private alias to avoid symbolic relocations.
1401 Fn->getName() + ".local", Fn);
1402
1403 // When the instrumented function is a COMDAT function, we cannot use a
1404 // private alias. If we did, we would create reference to a local label in
1405 // this function's section. If this version of the function isn't selected by
1406 // the linker, then the metadata would introduce a reference to a discarded
1407 // section. So, for COMDAT functions, we need to adjust the linkage of the
1408 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1409 // the dynamic symbol table.
1410 //
1411 // Note that this handles COMDAT functions with visibility other than Hidden,
1412 // since that case is covered in shouldUsePublicSymbol()
1413 if (Fn->hasComdat()) {
1414 GA->setLinkage(Fn->getLinkage());
1416 }
1417
1418 // appendToCompilerUsed(*Fn->getParent(), {GA});
1419
1420 return GA;
1421}
1422
1424 // compiler-rt uses linker support to get data/counters/name start/end for
1425 // ELF, COFF, Mach-O, XCOFF, and Wasm.
1426 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1427 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF() ||
1428 TT.isOSBinFormatWasm())
1429 return false;
1430
1431 return true;
1432}
1433
1434void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1435 StringRef CounterGroupName) {
1436 // Place lowered global variables in a comdat group if the associated function
1437 // or global variable is a COMDAT. This will make sure that only one copy of
1438 // global variable (e.g. function counters) of the COMDAT function will be
1439 // emitted after linking.
1440 bool NeedComdat = needsComdatForCounter(*GO, M);
1441 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1442
1443 if (!UseComdat)
1444 return;
1445
1446 // Keep in mind that this pass may run before the inliner, so we need to
1447 // create a new comdat group (for counters, profiling data, etc). If we use
1448 // the comdat of the parent function, that will result in relocations against
1449 // discarded sections.
1450 //
1451 // If the data variable is referenced by code, non-counter variables (notably
1452 // profiling data) and counters have to be in different comdats for COFF
1453 // because the Visual C++ linker will report duplicate symbol errors if there
1454 // are multiple external symbols with the same name marked
1455 // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1456 StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1457 ? GV->getName()
1458 : CounterGroupName;
1459 Comdat *C = M.getOrInsertComdat(GroupName);
1460
1461 if (!NeedComdat) {
1462 // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1463 //
1464 // For ELF, when not using COMDAT, put counters, data and values into a
1465 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1466 // allows -z start-stop-gc to discard the entire group when the function is
1467 // discarded.
1468 C->setSelectionKind(Comdat::NoDeduplicate);
1469 }
1470 GV->setComdat(C);
1471 // COFF doesn't allow the comdat group leader to have private linkage, so
1472 // upgrade private linkage to internal linkage to produce a symbol table
1473 // entry.
1474 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1476}
1477
1479 if (!profDataReferencedByCode(*GV->getParent()))
1480 return false;
1481
1482 if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1484 return true;
1485
1486 // This avoids the profile data from referencing internal symbols in
1487 // COMDAT.
1488 if (GV->hasLocalLinkage() && GV->hasComdat())
1489 return false;
1490
1491 return true;
1492}
1493
1494// FIXME: Introduce an internal alias like what's done for functions to reduce
1495// the number of relocation entries.
1497 auto *Int8PtrTy = PointerType::getUnqual(GV->getContext());
1498
1499 // Store a nullptr in __profvt_ if a real address shouldn't be used.
1500 if (!shouldRecordVTableAddr(GV))
1501 return ConstantPointerNull::get(Int8PtrTy);
1502
1503 return ConstantExpr::getBitCast(GV, Int8PtrTy);
1504}
1505
1506void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1508 "Value profiling is not supported with lightweight instrumentation");
1510 return;
1511
1512 // Skip llvm internal global variable or __prof variables.
1513 if (GV->getName().starts_with("llvm.") ||
1514 GV->getName().starts_with("__llvm") ||
1515 GV->getName().starts_with("__prof"))
1516 return;
1517
1518 // VTableProfData already created
1519 auto It = VTableDataMap.find(GV);
1520 if (It != VTableDataMap.end() && It->second)
1521 return;
1522
1525
1526 // This is to keep consistent with per-function profile data
1527 // for correctness.
1528 if (TT.isOSBinFormatXCOFF()) {
1530 Visibility = GlobalValue::DefaultVisibility;
1531 }
1532
1533 LLVMContext &Ctx = M.getContext();
1534 Type *DataTypes[] = {
1535#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1537#undef INSTR_PROF_VTABLE_DATA
1538 };
1539
1540 auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1541
1542 // Used by INSTR_PROF_VTABLE_DATA MACRO
1543 Constant *VTableAddr = getVTableAddrForProfData(GV);
1544 const std::string PGOVTableName = getPGOName(*GV);
1545 // Record the length of the vtable. This is needed since vtable pointers
1546 // loaded from C++ objects might be from the middle of a vtable definition.
1547 uint32_t VTableSizeVal =
1548 M.getDataLayout().getTypeAllocSize(GV->getValueType());
1549
1550 Constant *DataVals[] = {
1551#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1553#undef INSTR_PROF_VTABLE_DATA
1554 };
1555
1556 auto *Data =
1557 new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1558 ConstantStruct::get(DataTy, DataVals),
1559 getInstrProfVTableVarPrefix() + PGOVTableName);
1560
1561 Data->setVisibility(Visibility);
1562 Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
1563 Data->setAlignment(Align(8));
1564
1565 maybeSetComdat(Data, GV, Data->getName());
1566
1567 VTableDataMap[GV] = Data;
1568
1569 ReferencedVTables.push_back(GV);
1570
1571 // VTable <Hash, Addr> is used by runtime but not referenced by other
1572 // sections. Conservatively mark it linker retained.
1573 UsedVars.push_back(Data);
1574}
1575
1576GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1577 InstrProfSectKind IPSK) {
1578 GlobalVariable *NamePtr = Inc->getName();
1579
1580 // Match the linkage and visibility of the name global.
1581 Function *Fn = Inc->getParent()->getParent();
1583 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1584
1585 // Use internal rather than private linkage so the counter variable shows up
1586 // in the symbol table when using debug info for correlation.
1587 if ((DebugInfoCorrelate ||
1589 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1591
1592 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1593 // symbols in the same csect won't be discarded. When there are duplicate weak
1594 // symbols, we can NOT guarantee that the relocations get resolved to the
1595 // intended weak symbol, so we can not ensure the correctness of the relative
1596 // CounterPtr, so we have to use private linkage for counter and data symbols.
1597 if (TT.isOSBinFormatXCOFF()) {
1599 Visibility = GlobalValue::DefaultVisibility;
1600 }
1601 // Move the name variable to the right section.
1602 bool Renamed;
1604 StringRef VarPrefix;
1605 std::string VarName;
1606 if (IPSK == IPSK_cnts) {
1607 VarPrefix = getInstrProfCountersVarPrefix();
1608 VarName = getVarName(Inc, VarPrefix, Renamed);
1609 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc);
1610 Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);
1611 } else if (IPSK == IPSK_bitmap) {
1612 VarPrefix = getInstrProfBitmapVarPrefix();
1613 VarName = getVarName(Inc, VarPrefix, Renamed);
1614 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1615 dyn_cast<InstrProfMCDCBitmapInstBase>(Inc);
1616 Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);
1617 } else {
1618 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1619 }
1620
1621 Ptr->setVisibility(Visibility);
1622 // Put the counters and bitmaps in their own sections so linkers can
1623 // remove unneeded sections.
1624 Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));
1625 Ptr->setLinkage(Linkage);
1626 maybeSetComdat(Ptr, Fn, VarName);
1627 return Ptr;
1628}
1629
1631InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1633 GlobalValue::LinkageTypes Linkage) {
1634 uint64_t NumBytes = Inc->getNumBitmapBytes();
1635 auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);
1636 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1637 Constant::getNullValue(BitmapTy), Name);
1638 GV->setAlignment(Align(1));
1639 return GV;
1640}
1641
1643InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1644 GlobalVariable *NamePtr = Inc->getName();
1645 auto &PD = ProfileDataMap[NamePtr];
1646 if (PD.RegionBitmaps)
1647 return PD.RegionBitmaps;
1648
1649 // If RegionBitmaps doesn't already exist, create it by first setting up
1650 // the corresponding profile section.
1651 auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);
1652 PD.RegionBitmaps = BitmapPtr;
1653 PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1654 return PD.RegionBitmaps;
1655}
1656
1658InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1659 GlobalValue::LinkageTypes Linkage) {
1660 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1661 auto &Ctx = M.getContext();
1662 GlobalVariable *GV;
1663 if (isa<InstrProfCoverInst>(Inc)) {
1664 auto *CounterTy = Type::getInt8Ty(Ctx);
1665 auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
1666 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1667 std::vector<Constant *> InitialValues(NumCounters,
1668 Constant::getAllOnesValue(CounterTy));
1669 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1670 ConstantArray::get(CounterArrTy, InitialValues),
1671 Name);
1672 GV->setAlignment(Align(1));
1673 } else {
1674 auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
1675 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1676 Constant::getNullValue(CounterTy), Name);
1677 GV->setAlignment(Align(8));
1678 }
1679 return GV;
1680}
1681
1683InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1684 GlobalVariable *NamePtr = Inc->getName();
1685 auto &PD = ProfileDataMap[NamePtr];
1686 if (PD.RegionCounters)
1687 return PD.RegionCounters;
1688
1689 // If RegionCounters doesn't already exist, create it by first setting up
1690 // the corresponding profile section.
1691 auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
1692 PD.RegionCounters = CounterPtr;
1693
1694 if (DebugInfoCorrelate ||
1696 LLVMContext &Ctx = M.getContext();
1697 Function *Fn = Inc->getParent()->getParent();
1698 if (auto *SP = Fn->getSubprogram()) {
1699 DIBuilder DB(M, true, SP->getUnit());
1700 Metadata *FunctionNameAnnotation[] = {
1703 };
1704 Metadata *CFGHashAnnotation[] = {
1707 };
1708 Metadata *NumCountersAnnotation[] = {
1711 };
1712 auto Annotations = DB.getOrCreateArray({
1713 MDNode::get(Ctx, FunctionNameAnnotation),
1714 MDNode::get(Ctx, CFGHashAnnotation),
1715 MDNode::get(Ctx, NumCountersAnnotation),
1716 });
1717 auto *DICounter = DB.createGlobalVariableExpression(
1718 SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
1719 /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
1720 CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
1721 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1722 Annotations);
1723 CounterPtr->addDebugInfo(DICounter);
1724 DB.finalize();
1725 }
1726
1727 // Mark the counter variable as used so that it isn't optimized out.
1728 CompilerUsedVars.push_back(PD.RegionCounters);
1729 }
1730
1731 // Create the data variable (if it doesn't already exist).
1732 createDataVariable(Inc);
1733
1734 return PD.RegionCounters;
1735}
1736
1737void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1738 // When debug information is correlated to profile data, a data variable
1739 // is not needed.
1741 return;
1742
1743 GlobalVariable *NamePtr = Inc->getName();
1744 auto &PD = ProfileDataMap[NamePtr];
1745
1746 // Return if data variable was already created.
1747 if (PD.DataVar)
1748 return;
1749
1750 LLVMContext &Ctx = M.getContext();
1751
1752 Function *Fn = Inc->getParent()->getParent();
1754 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1755
1756 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1757 // symbols in the same csect won't be discarded. When there are duplicate weak
1758 // symbols, we can NOT guarantee that the relocations get resolved to the
1759 // intended weak symbol, so we can not ensure the correctness of the relative
1760 // CounterPtr, so we have to use private linkage for counter and data symbols.
1761 if (TT.isOSBinFormatXCOFF()) {
1763 Visibility = GlobalValue::DefaultVisibility;
1764 }
1765
1766 bool NeedComdat = needsComdatForCounter(*Fn, M);
1767 bool Renamed;
1768
1769 // The Data Variable section is anchored to profile counters.
1770 std::string CntsVarName =
1772 std::string DataVarName =
1773 getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
1774
1775 auto *Int8PtrTy = PointerType::getUnqual(Ctx);
1776 // Allocate statically the array of pointers to value profile nodes for
1777 // the current function.
1778 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
1779 uint64_t NS = 0;
1780 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1781 NS += PD.NumValueSites[Kind];
1782 if (NS > 0 && ValueProfileStaticAlloc &&
1784 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
1785 auto *ValuesVar = new GlobalVariable(
1786 M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
1787 getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
1788 ValuesVar->setVisibility(Visibility);
1789 setGlobalVariableLargeSection(TT, *ValuesVar);
1790 ValuesVar->setSection(
1791 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
1792 ValuesVar->setAlignment(Align(8));
1793 maybeSetComdat(ValuesVar, Fn, CntsVarName);
1795 ValuesVar, PointerType::get(Fn->getContext(), 0));
1796 }
1797
1798 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1799 auto *CounterPtr = PD.RegionCounters;
1800
1801 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1802
1803 // Create data variable.
1804 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
1805 auto *Int16Ty = Type::getInt16Ty(Ctx);
1806 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
1807 Type *DataTypes[] = {
1808#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
1810 };
1811 auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1812
1813 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1814
1815 Constant *Int16ArrayVals[IPVK_Last + 1];
1816 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1817 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
1818
1819 if (isGPUProfTarget(M)) {
1822 }
1823 // If the data variable is not referenced by code (if we don't emit
1824 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1825 // data variable live under linker GC, the data variable can be private. This
1826 // optimization applies to ELF.
1827 //
1828 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1829 // to be false.
1830 //
1831 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1832 // that other copies must have the same CFG and cannot have value profiling.
1833 // If no hash suffix, other profd copies may be referenced by code.
1834 else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1835 (TT.isOSBinFormatELF() ||
1836 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1838 Visibility = GlobalValue::DefaultVisibility;
1839 }
1840 auto *Data =
1841 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1842 Constant *RelativeCounterPtr;
1843 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1844 Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);
1845 InstrProfSectKind DataSectionKind;
1846 // With binary profile correlation, profile data is not loaded into memory.
1847 // profile data must reference profile counter with an absolute relocation.
1849 DataSectionKind = IPSK_covdata;
1850 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
1851 if (BitmapPtr != nullptr)
1852 RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
1853 } else {
1854 // Reference the counter variable with a label difference (link-time
1855 // constant).
1856 DataSectionKind = IPSK_data;
1857 RelativeCounterPtr =
1858 ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
1859 ConstantExpr::getPtrToInt(Data, IntPtrTy));
1860 if (BitmapPtr != nullptr)
1861 RelativeBitmapPtr =
1863 ConstantExpr::getPtrToInt(Data, IntPtrTy));
1864 }
1865
1866 Constant *DataVals[] = {
1867#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1869 };
1870 Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
1871
1872 Data->setVisibility(Visibility);
1873 Data->setSection(
1874 getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));
1875 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1876 maybeSetComdat(Data, Fn, CntsVarName);
1877
1878 PD.DataVar = Data;
1879
1880 // Mark the data variable as used so that it isn't stripped out.
1881 CompilerUsedVars.push_back(Data);
1882 // Now that the linkage set by the FE has been passed to the data and counter
1883 // variables, reset Name variable's linkage and visibility to private so that
1884 // it can be removed later by the compiler.
1886 // Collect the referenced names to be used by emitNameData.
1887 ReferencedNames.push_back(NamePtr);
1888}
1889
1890void InstrLowerer::emitVNodes() {
1891 if (!ValueProfileStaticAlloc)
1892 return;
1893
1894 // For now only support this on platforms that do
1895 // not require runtime registration to discover
1896 // named section start/end.
1898 return;
1899
1900 size_t TotalNS = 0;
1901 for (auto &PD : ProfileDataMap) {
1902 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1903 TotalNS += PD.second.NumValueSites[Kind];
1904 }
1905
1906 if (!TotalNS)
1907 return;
1908
1909 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
1910// Heuristic for small programs with very few total value sites.
1911// The default value of vp-counters-per-site is chosen based on
1912// the observation that large apps usually have a low percentage
1913// of value sites that actually have any profile data, and thus
1914// the average number of counters per site is low. For small
1915// apps with very few sites, this may not be true. Bump up the
1916// number of counters in this case.
1917#define INSTR_PROF_MIN_VAL_COUNTS 10
1918 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
1919 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
1920
1921 auto &Ctx = M.getContext();
1922 Type *VNodeTypes[] = {
1923#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
1925 };
1926 auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));
1927
1928 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
1929 auto *VNodesVar = new GlobalVariable(
1930 M, VNodesTy, false, GlobalValue::PrivateLinkage,
1932 setGlobalVariableLargeSection(TT, *VNodesVar);
1933 VNodesVar->setSection(
1934 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
1935 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));
1936 // VNodesVar is used by runtime but not referenced via relocation by other
1937 // sections. Conservatively make it linker retained.
1938 UsedVars.push_back(VNodesVar);
1939}
1940
1941void InstrLowerer::emitNameData() {
1942 std::string UncompressedData;
1943
1944 if (ReferencedNames.empty())
1945 return;
1946
1947 std::string CompressedNameStr;
1948 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
1950 report_fatal_error(Twine(toString(std::move(E))), false);
1951 }
1952
1953 auto &Ctx = M.getContext();
1954 auto *NamesVal =
1955 ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
1956 NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
1959
1960 // Make names variable public if current target is a GPU
1961 if (isGPUProfTarget(M)) {
1962 NamesVar->setLinkage(GlobalValue::ExternalLinkage);
1964 }
1965
1966 NamesSize = CompressedNameStr.size();
1967 setGlobalVariableLargeSection(TT, *NamesVar);
1968 NamesVar->setSection(
1970 ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())
1971 : getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
1972 // On COFF, it's important to reduce the alignment down to 1 to prevent the
1973 // linker from inserting padding before the start of the names section or
1974 // between names entries.
1975 NamesVar->setAlignment(Align(1));
1976 // NamesVar is used by runtime but not referenced via relocation by other
1977 // sections. Conservatively make it linker retained.
1978 UsedVars.push_back(NamesVar);
1979
1980 for (auto *NamePtr : ReferencedNames)
1981 NamePtr->eraseFromParent();
1982}
1983
1984void InstrLowerer::emitVTableNames() {
1985 if (!EnableVTableValueProfiling || ReferencedVTables.empty())
1986 return;
1987
1988 // Collect the PGO names of referenced vtables and compress them.
1989 std::string CompressedVTableNames;
1990 if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,
1992 report_fatal_error(Twine(toString(std::move(E))), false);
1993 }
1994
1995 auto &Ctx = M.getContext();
1996 auto *VTableNamesVal = ConstantDataArray::getString(
1997 Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
1998 GlobalVariable *VTableNamesVar =
1999 new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
2000 GlobalValue::PrivateLinkage, VTableNamesVal,
2002 VTableNamesVar->setSection(
2003 getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
2004 VTableNamesVar->setAlignment(Align(1));
2005 // Make VTableNames linker retained.
2006 UsedVars.push_back(VTableNamesVar);
2007}
2008
2009void InstrLowerer::emitRegistration() {
2011 return;
2012
2013 // Construct the function.
2014 auto *VoidTy = Type::getVoidTy(M.getContext());
2015 auto *VoidPtrTy = PointerType::getUnqual(M.getContext());
2016 auto *Int64Ty = Type::getInt64Ty(M.getContext());
2017 auto *RegisterFTy = FunctionType::get(VoidTy, false);
2018 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
2020 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2021 if (Options.NoRedZone)
2022 RegisterF->addFnAttr(Attribute::NoRedZone);
2023
2024 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
2025 auto *RuntimeRegisterF =
2028
2029 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
2030 for (Value *Data : CompilerUsedVars)
2031 if (!isa<Function>(Data))
2032 // Check for addrspace cast when profiling GPU
2033 IRB.CreateCall(RuntimeRegisterF,
2034 IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
2035 for (Value *Data : UsedVars)
2036 if (Data != NamesVar && !isa<Function>(Data))
2037 IRB.CreateCall(RuntimeRegisterF,
2038 IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
2039
2040 if (NamesVar) {
2041 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2042 auto *NamesRegisterTy =
2043 FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);
2044 auto *NamesRegisterF =
2047 IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
2048 NamesVar, VoidPtrTy),
2049 IRB.getInt64(NamesSize)});
2050 }
2051
2052 IRB.CreateRetVoid();
2053}
2054
2055bool InstrLowerer::emitRuntimeHook() {
2056 // We expect the linker to be invoked with -u<hook_var> flag for Linux
2057 // in which case there is no need to emit the external variable.
2058 if (TT.isOSLinux() || TT.isOSAIX())
2059 return false;
2060
2061 // If the module's provided its own runtime, we don't need to do anything.
2062 if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))
2063 return false;
2064
2065 // Declare an external variable that will pull in the runtime initialization.
2066 auto *Int32Ty = Type::getInt32Ty(M.getContext());
2067 auto *Var =
2068 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2070 if (isGPUProfTarget(M))
2071 Var->setVisibility(GlobalValue::ProtectedVisibility);
2072 else
2073 Var->setVisibility(GlobalValue::HiddenVisibility);
2074
2075 if (TT.isOSBinFormatELF() && !TT.isPS()) {
2076 // Mark the user variable as used so that it isn't stripped out.
2077 CompilerUsedVars.push_back(Var);
2078 } else {
2079 // Make a function that uses it.
2080 auto *User = Function::Create(FunctionType::get(Int32Ty, false),
2083 User->addFnAttr(Attribute::NoInline);
2084 if (Options.NoRedZone)
2085 User->addFnAttr(Attribute::NoRedZone);
2086 User->setVisibility(GlobalValue::HiddenVisibility);
2087 if (TT.supportsCOMDAT())
2088 User->setComdat(M.getOrInsertComdat(User->getName()));
2089
2090 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));
2091 auto *Load = IRB.CreateLoad(Int32Ty, Var);
2092 IRB.CreateRet(Load);
2093
2094 // Mark the function as used so that it isn't stripped out.
2095 CompilerUsedVars.push_back(User);
2096 }
2097 return true;
2098}
2099
2100void InstrLowerer::emitUses() {
2101 // The metadata sections are parallel arrays. Optimizers (e.g.
2102 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2103 // we conservatively retain all unconditionally in the compiler.
2104 //
2105 // On ELF and Mach-O, the linker can guarantee the associated sections will be
2106 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2107 // Similarly on COFF, if prof data is not referenced by code we use one comdat
2108 // and ensure this GC property as well. Otherwise, we have to conservatively
2109 // make all of the sections retained by the linker.
2110 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2111 (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2112 appendToCompilerUsed(M, CompilerUsedVars);
2113 else
2114 appendToUsed(M, CompilerUsedVars);
2115
2116 // We do not add proper references from used metadata sections to NamesVar and
2117 // VNodesVar, so we have to be conservative and place them in llvm.used
2118 // regardless of the target,
2119 appendToUsed(M, UsedVars);
2120}
2121
2122void InstrLowerer::emitInitialization() {
2123 // Create ProfileFileName variable. Don't don't this for the
2124 // context-sensitive instrumentation lowering: This lowering is after
2125 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2126 // have already create the variable before LTO/ThinLTO linking.
2127 if (!IsCS)
2128 createProfileFileNameVar(M, Options.InstrProfileOutput);
2129 Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());
2130 if (!RegisterF)
2131 return;
2132
2133 // Create the initialization function.
2134 auto *VoidTy = Type::getVoidTy(M.getContext());
2135 auto *F = Function::Create(FunctionType::get(VoidTy, false),
2138 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2139 F->addFnAttr(Attribute::NoInline);
2140 if (Options.NoRedZone)
2141 F->addFnAttr(Attribute::NoRedZone);
2142
2143 // Add the basic block and the necessary calls.
2144 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));
2145 IRB.CreateCall(RegisterF, {});
2146 IRB.CreateRetVoid();
2147
2148 appendToGlobalCtors(M, F, 0);
2149}
2150
2151namespace llvm {
2152// Create the variable for profile sampling.
2154 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
2155 IntegerType *SamplingVarTy;
2156 Constant *ValueZero;
2157 if (getSampledInstrumentationConfig().UseShort) {
2158 SamplingVarTy = Type::getInt16Ty(M.getContext());
2159 ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0));
2160 } else {
2161 SamplingVarTy = Type::getInt32Ty(M.getContext());
2162 ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0));
2163 }
2164 auto SamplingVar = new GlobalVariable(
2165 M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2166 SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2167 SamplingVar->setThreadLocal(true);
2168 Triple TT(M.getTargetTriple());
2169 if (TT.supportsCOMDAT()) {
2170 SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2171 SamplingVar->setComdat(M.getOrInsertComdat(VarName));
2172 }
2173 appendToCompilerUsed(M, SamplingVar);
2174}
2175} // namespace llvm
This file contains the simple types necessary to represent the attributes associated with functions a...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
#define LLVM_DEBUG(...)
Definition: Debug.h:106
@ Default
Definition: DwarfDebug.cpp:87
uint64_t Addr
std::string Name
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
static bool shouldRecordVTableAddr(GlobalVariable *GV)
static bool shouldRecordFunctionAddr(Function *F)
static bool needsRuntimeHookUnconditionally(const Triple &TT)
static bool containsProfilingIntrinsics(Module &M)
Check if the module contains uses of any profiling intrinsics.
static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix, bool &Renamed)
Get the name of a profiling variable for a particular function.
#define INSTR_PROF_MIN_VAL_COUNTS
static Constant * getFuncAddrForProfData(Function *Fn)
static bool shouldUsePublicSymbol(Function *Fn)
static FunctionCallee getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI, ValueProfilingCallType CallType=ValueProfilingCallType::Default)
static Constant * getVTableAddrForProfData(GlobalVariable *GV)
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT)
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Memory SSA
Definition: MemorySSA.cpp:72
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Annotations lets you mark points and ranges inside source code, for tests:
Definition: Annotations.h:53
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
@ Add
*p = old + v
Definition: Instructions.h:720
@ Or
*p = old | v
Definition: Instructions.h:728
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Instruction & front() const
Definition: BasicBlock.h:471
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis providing branch probability information.
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
This class represents a function call, abstracting a target machine's calling convention.
@ NoDeduplicate
No deduplication is performed.
Definition: Comdat.h:39
ConstantArray - Constant Array Declarations.
Definition: Constants.h:427
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:2990
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2268
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2644
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2293
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2321
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1826
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1378
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:403
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
bool isZeroValue() const
Return true if the value is negative zero or null value.
Definition: Constants.cpp:76
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1874
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:557
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:589
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:143
void setComdat(Comdat *C)
Definition: Globals.cpp:212
bool hasComdat() const
Definition: GlobalObject.h:127
void setSection(StringRef S)
Change the section for this global.
Definition: Globals.cpp:273
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:515
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:248
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:296
LinkageTypes getLinkage() const
Definition: GlobalValue.h:546
bool hasLocalLinkage() const
Definition: GlobalValue.h:528
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:537
bool isDeclarationForLinker() const
Definition: GlobalValue.h:618
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:66
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:67
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ProtectedVisibility
The GV is protected.
Definition: GlobalValue.h:69
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:512
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Globals.cpp:488
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2150
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:473
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2002
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1813
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Definition: IRBuilder.h:1936
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1826
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2145
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2580
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1877
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
Definition: IRBuilder.h:478
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition: IRBuilder.h:2007
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
A base class for all instrprof counter intrinsics.
ConstantInt * getIndex() const
ConstantInt * getNumCounters() const
static const char * FunctionNameAttributeName
static const char * CFGHashAttributeName
static const char * NumCountersAttributeName
This represents the llvm.instrprof.cover intrinsic.
This represents the llvm.instrprof.increment intrinsic.
A base class for all instrprof intrinsics.
GlobalVariable * getName() const
ConstantInt * getHash() const
A base class for instrprof mcdc intrinsics that require global bitmap bytes.
This represents the llvm.instrprof.mcdc.tvbitmap.update intrinsic.
ConstantInt * getBitmapIndex() const
This represents the llvm.instrprof.timestamp intrinsic.
This represents the llvm.instrprof.value.profile intrinsic.
ConstantInt * getIndex() const
ConstantInt * getValueKind() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition: SSAUpdater.h:151
virtual void doExtraRewritesBeforeFinalDeletion()
This hook is invoked after all the stores are found and inserted as available values.
Definition: SSAUpdater.h:176
An instruction for reading from memory.
Definition: Instructions.h:176
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition: MDBuilder.cpp:47
Metadata node.
Definition: Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:40
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:571
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
const ParentTy * getParent() const
Definition: ilist_node.h:32
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
ValueKind
Value kinds.
@ Exit
Definition: COFF.h:845
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getDeclarationIfExists(Module *M, ID id, ArrayRef< Type * > Tys, FunctionType *FT=nullptr)
This version supports overloaded intrinsics.
Definition: Intrinsics.cpp:746
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
Definition: X86BaseInfo.h:721
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
StringRef getInstrProfNameVarPrefix()
Return the name prefix of variables containing instrumented function names.
Definition: InstrProf.h:92
StringRef getInstrProfRuntimeHookVarName()
Return the name of the hook variable defined in profile runtime library.
Definition: InstrProf.h:163
void createProfileSamplingVar(Module &M)
StringRef getInstrProfBitmapVarPrefix()
Return the name prefix of profile bitmap variables.
Definition: InstrProf.h:104
cl::opt< bool > DoInstrProfNameCompression
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
StringRef getInstrProfVTableNamesVarName()
Definition: InstrProf.h:116
StringRef getInstrProfDataVarPrefix()
Return the name prefix of variables containing per-function control data.
Definition: InstrProf.h:98
StringRef getCoverageUnusedNamesVarName()
Return the name of the internal variable recording the array of PGO name vars referenced by the cover...
Definition: InstrProf.h:129
std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
Return the name of the profile section corresponding to IPSK.
Definition: InstrProf.cpp:236
cl::opt< bool > DebugInfoCorrelate
bool needsComdatForCounter(const GlobalObject &GV, const Module &M)
Check if we can use Comdat for profile variables.
Definition: InstrProf.cpp:1435
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:395
StringRef getInstrProfInitFuncName()
Return the name of the runtime initialization method that is generated by the compiler.
Definition: InstrProf.h:158
StringRef getInstrProfValuesVarPrefix()
Return the name prefix of value profile variables.
Definition: InstrProf.h:107
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
StringRef getInstrProfCounterBiasVarName()
Definition: InstrProf.h:173
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
StringRef getInstrProfRuntimeHookVarUseFuncName()
Return the name of the compiler generated function that references the runtime hook variable.
Definition: InstrProf.h:169
StringRef getInstrProfRegFuncsName()
Return the name of function that registers all the per-function control data at program startup time ...
Definition: InstrProf.h:138
Error collectPGOFuncNameStrings(ArrayRef< GlobalVariable * > NameVars, std::string &Result, bool doCompression=true)
Produce Result string with the same format described above.
Definition: InstrProf.cpp:726
InstrProfSectKind
Definition: InstrProf.h:60
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
StringRef getInstrProfCountersVarPrefix()
Return the name prefix of profile counter variables.
Definition: InstrProf.h:101
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar)
Return the initializer in string of the PGO name var NameVar.
Definition: InstrProf.cpp:719
StringRef getInstrProfBitmapBiasVarName()
Definition: InstrProf.h:177
StringRef getInstrProfValueProfMemOpFuncName()
Return the name profile runtime entry point to do memop size value profiling.
Definition: InstrProf.h:87
StringRef getInstrProfNamesRegFuncName()
Return the name of the runtime interface that registers the PGO name strings.
Definition: InstrProf.h:150
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
@ Add
Sum of integers.
Error collectVTableStrings(ArrayRef< GlobalVariable * > VTables, std::string &Result, bool doCompression)
Definition: InstrProf.cpp:736
void setGlobalVariableLargeSection(const Triple &TargetTriple, GlobalVariable &GV)
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1483
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1506
void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
Definition: ModuleUtils.cpp:74
bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
auto predecessors(const MachineBasicBlock *BB)
StringRef getInstrProfValueProfFuncName()
Return the name profile runtime entry point to do value profiling for a given site.
Definition: InstrProf.h:81
StringRef getInstrProfRegFuncName()
Return the name of the runtime interface that registers per-function control data for one instrumente...
Definition: InstrProf.h:144
const char * toString(DWARFSectionKind Kind)
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.
StringRef getInstrProfNamesVarName()
Return the name of the variable holding the strings (possibly compressed) of all function's PGO names...
Definition: InstrProf.h:114
bool isGPUProfTarget(const Module &M)
Determines whether module targets a GPU eligable for PGO instrumentation.
Definition: InstrProf.cpp:440
bool isIRPGOFlagSet(const Module *M)
Check if INSTR_PROF_RAW_VERSION_VAR is defined.
Definition: InstrProf.cpp:1461
StringRef getInstrProfVNodesVarName()
Return the name of value profile node array variables:
Definition: InstrProf.h:110
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
StringRef getInstrProfVTableVarPrefix()
Return the name prefix of variables containing virtual table profile data.
Definition: InstrProf.h:95
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Options for the frontend instrumentation based profiling pass.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117