LLVM 23.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/StringSet.h"
59#include "llvm/ADT/Twine.h"
60#include "llvm/ADT/iterator.h"
64#include "llvm/Analysis/CFG.h"
69#include "llvm/IR/Attributes.h"
70#include "llvm/IR/BasicBlock.h"
71#include "llvm/IR/CFG.h"
72#include "llvm/IR/Comdat.h"
73#include "llvm/IR/Constant.h"
74#include "llvm/IR/Constants.h"
76#include "llvm/IR/Dominators.h"
78#include "llvm/IR/Function.h"
79#include "llvm/IR/GlobalAlias.h"
80#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/IRBuilder.h"
83#include "llvm/IR/InstVisitor.h"
84#include "llvm/IR/InstrTypes.h"
85#include "llvm/IR/Instruction.h"
88#include "llvm/IR/Intrinsics.h"
89#include "llvm/IR/LLVMContext.h"
90#include "llvm/IR/MDBuilder.h"
91#include "llvm/IR/Module.h"
92#include "llvm/IR/PassManager.h"
95#include "llvm/IR/Type.h"
96#include "llvm/IR/Value.h"
100#include "llvm/Support/CRC.h"
101#include "llvm/Support/Casting.h"
105#include "llvm/Support/Debug.h"
106#include "llvm/Support/Error.h"
118#include <algorithm>
119#include <cassert>
120#include <cstdint>
121#include <memory>
122#include <numeric>
123#include <optional>
124#include <stack>
125#include <string>
126#include <unordered_map>
127#include <utility>
128#include <vector>
129
130using namespace llvm;
132
133#define DEBUG_TYPE "pgo-instrumentation"
134
135STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138STATISTIC(NumOfPGOEdge, "Number of edges.");
139STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOSelectInsts,
147 "Number of select instruction instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOMemIntrinsics,
149 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153STATISTIC(NumOfCSPGOFunc,
154 "Number of functions having valid profile counts in CSPGO.");
155STATISTIC(NumOfCSPGOMismatch,
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159
160// Command line option to specify the file to read profile from. This is
161// mainly used for testing.
163 "pgo-test-profile-file", cl::init(""), cl::Hidden,
164 cl::value_desc("filename"),
165 cl::desc("Specify the path of profile data file. This is "
166 "mainly for test purpose."));
168 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
169 cl::value_desc("filename"),
170 cl::desc("Specify the path of profile remapping file. This is mainly for "
171 "test purpose."));
172
173// Command line option to disable value profiling. The default is false:
174// i.e. value profiling is enabled by default. This is for debug purpose.
175static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
177 cl::desc("Disable Value Profiling"));
178
179// Command line option to set the maximum number of VP annotations to write to
180// the metadata for a single indirect call callsite.
182 "icp-max-annotations", cl::init(3), cl::Hidden,
183 cl::desc("Max number of annotations for a single indirect "
184 "call callsite"));
185
186// Command line option to set the maximum number of value annotations
187// to write to the metadata for a single memop intrinsic.
189 "memop-max-annotations", cl::init(4), cl::Hidden,
190 cl::desc("Max number of precise value annotations for a single memop"
191 "intrinsic"));
192
193// Command line option to control appending FunctionHash to the name of a COMDAT
194// function. This is to avoid the hash mismatch caused by the preinliner.
196 "do-comdat-renaming", cl::init(false), cl::Hidden,
197 cl::desc("Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
199
200namespace llvm {
201// Command line option to enable/disable the warning about missing profile
202// information.
203cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
205 cl::desc("Use this option to turn on/off "
206 "warnings about missing profile data for "
207 "functions."));
208
209// Command line option to enable/disable the warning about a hash mismatch in
210// the profile data.
212 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
213 cl::desc("Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
215
216// Command line option to enable/disable the warning about a hash mismatch in
217// the profile data for Comdat functions, which often turns out to be false
218// positive due to the pre-instrumentation inline.
220 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
221 cl::desc("The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
224
225// Command line option to enable/disable select instruction instrumentation.
226static cl::opt<bool>
227 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
228 cl::desc("Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
230
231// Command line option to turn on CFG dot or text dump of raw profile counts
233 "pgo-view-raw-counts", cl::Hidden,
234 cl::desc("A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
240 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
241 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
242 clEnumValN(PGOVCT_Text, "text", "show in text.")));
243
244// Command line option to enable/disable memop intrinsic call.size profiling.
245static cl::opt<bool>
246 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
247 cl::desc("Use this option to turn on/off "
248 "memory intrinsic size profiling."));
249
250// Emit branch probability as optimization remarks.
251static cl::opt<bool>
252 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
253 cl::desc("When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
257
259 "pgo-instrument-entry", cl::init(false), cl::Hidden,
260 cl::desc("Force to instrument function entry basicblock."));
261
262static cl::opt<bool>
263 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
265 cl::desc("Force to instrument loop entries."));
266
268 "pgo-function-entry-coverage", cl::Hidden,
269 cl::desc(
270 "Use this option to enable function entry coverage instrumentation."));
271
273 "pgo-block-coverage",
274 cl::desc("Use this option to enable basic block coverage instrumentation"));
275
276static cl::opt<bool>
277 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
278 cl::desc("Create a dot file of CFGs with block "
279 "coverage inference information"));
280
282 "pgo-temporal-instrumentation",
283 cl::desc("Use this option to enable temporal instrumentation"));
284
285static cl::opt<bool>
286 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
287 cl::desc("Fix function entry count in profile use."));
288
290 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
291 cl::desc("Print out the non-match BFI count if a hot raw profile count "
292 "becomes non-hot, or a cold raw profile count becomes hot. "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remarks-analysis=pgo."));
295
297 "pgo-verify-bfi", cl::init(false), cl::Hidden,
298 cl::desc("Print out mismatched BFI counts after setting profile metadata "
299 "The print is enabled under -Rpass-analysis=pgo, or "
300 "internal option -pass-remarks-analysis=pgo."));
301
303 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
304 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
305 "mismatched BFI if the difference percentage is greater than "
306 "this value (in percentage)."));
307
309 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
310 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
311 "profile count value is below."));
312
314 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
315 cl::value_desc("function name"),
316 cl::desc("Trace the hash of the function with this name."));
317
319 "pgo-function-size-threshold", cl::Hidden,
320 cl::desc("Do not instrument functions smaller than this threshold."));
321
323 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
324 cl::desc("Do not instrument functions with the number of critical edges "
325 " greater than this threshold."));
326
328 "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
329 cl::desc("For cold function instrumentation, skip instrumenting functions "
330 "whose entry count is above the given value."));
331
333 "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
334 cl::desc("For cold function instrumentation, treat count unknown(e.g. "
335 "unprofiled) functions as cold."));
336
338 "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
339 cl::desc("Enable cold function only instrumentation."));
340
342 "ctx-prof-skip-callsite-instr", cl::Hidden,
343 cl::desc("Do not instrument callsites to functions in this list. Intended "
344 "for testing."));
345
347
348// Command line option to turn on CFG dot dump after profile annotation.
349// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
351
352// Command line option to specify the name of the function for CFG dump
353// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
355
356// Command line option to enable vtable value profiling. Defined in
357// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
362} // namespace llvm
363
364namespace {
365class FunctionInstrumenter final {
366 Module &M;
367 Function &F;
369 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
370 BranchProbabilityInfo *const BPI;
371 BlockFrequencyInfo *const BFI;
372 LoopInfo *const LI;
373
374 const PGOInstrumentationType InstrumentationType;
375
376 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
377 // Ctx profiling implicitly captures indirect call cases, but not other
378 // values. Supporting other values is relatively straight-forward - just
379 // another counter range within the context.
380 bool isValueProfilingDisabled() const {
381 // Value profiling is disabled for GPU targets because the device-side
382 // profiling runtime does not yet implement
383 // __llvm_profile_instrument_target. The existing compiler-rt implementation
384 // uses a linked-list with locks and eviction policy that is not efficient
385 // for massively parallel GPU execution. A GPU-optimized implementation is
386 // left as future work.
387 return DisableValueProfiling ||
388 InstrumentationType == PGOInstrumentationType::CTXPROF ||
390 }
391
392 bool shouldInstrumentEntryBB() const {
393 return PGOInstrumentEntry ||
394 InstrumentationType == PGOInstrumentationType::CTXPROF;
395 }
396
397 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
398
399public:
400 FunctionInstrumenter(
402 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
403 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
404 LoopInfo *LI = nullptr,
406 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
407 LI(LI), InstrumentationType(InstrumentationType) {}
408
409 void instrument();
410};
411} // namespace
412
413// Return a string describing the branch condition that can be
414// used in static branch probability heuristics:
415static std::string getBranchCondString(Instruction *TI) {
417 if (!BI)
418 return std::string();
419
420 Value *Cond = BI->getCondition();
422 if (!CI)
423 return std::string();
424
425 std::string result;
426 raw_string_ostream OS(result);
427 OS << CI->getPredicate() << "_";
428 CI->getOperand(0)->getType()->print(OS, true);
429
430 Value *RHS = CI->getOperand(1);
432 if (CV) {
433 if (CV->isZero())
434 OS << "_Zero";
435 else if (CV->isOne())
436 OS << "_One";
437 else if (CV->isMinusOne())
438 OS << "_MinusOne";
439 else
440 OS << "_Const";
441 }
442 return result;
443}
444
445static const char *ValueProfKindDescr[] = {
446#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
448};
449
450// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
451// aware this is an ir_level profile so it can set the version flag.
452static GlobalVariable *
454 PGOInstrumentationType InstrumentationType) {
456 Type *IntTy64 = Type::getInt64Ty(M.getContext());
458 if (InstrumentationType == PGOInstrumentationType::CSFDO)
459 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
460 if (PGOInstrumentEntry ||
461 InstrumentationType == PGOInstrumentationType::CTXPROF)
462 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
464 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
466 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
468 ProfileVersion |=
471 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
473 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
474 auto IRLevelVersionVariable = new GlobalVariable(
475 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
476 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
477 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
478
479 Triple TT(M.getTargetTriple());
480 if (TT.supportsCOMDAT()) {
481 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
482 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
483 }
484 return IRLevelVersionVariable;
485}
486
487namespace {
488
489/// The select instruction visitor plays three roles specified
490/// by the mode. In \c VM_counting mode, it simply counts the number of
491/// select instructions. In \c VM_instrument mode, it inserts code to count
492/// the number times TrueValue of select is taken. In \c VM_annotate mode,
493/// it reads the profile data and annotate the select instruction with metadata.
494enum VisitMode { VM_counting, VM_instrument, VM_annotate };
495class PGOUseFunc;
496
497/// Instruction Visitor class to visit select instructions.
498struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
499 Function &F;
500 unsigned NSIs = 0; // Number of select instructions instrumented.
501 VisitMode Mode = VM_counting; // Visiting mode.
502 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
503 unsigned TotalNumCtrs = 0; // Total number of counters
504 GlobalValue *FuncNameVar = nullptr;
505 uint64_t FuncHash = 0;
506 PGOUseFunc *UseFunc = nullptr;
507 bool HasSingleByteCoverage;
508
509 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
510 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
511
512 void countSelects() {
513 NSIs = 0;
514 Mode = VM_counting;
515 visit(F);
516 }
517
518 // Visit the IR stream and instrument all select instructions. \p
519 // Ind is a pointer to the counter index variable; \p TotalNC
520 // is the total number of counters; \p FNV is the pointer to the
521 // PGO function name var; \p FHash is the function hash.
522 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
523 uint64_t FHash) {
524 Mode = VM_instrument;
525 CurCtrIdx = Ind;
526 TotalNumCtrs = TotalNC;
527 FuncHash = FHash;
528 FuncNameVar = FNV;
529 visit(F);
530 }
531
532 // Visit the IR stream and annotate all select instructions.
533 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
534 Mode = VM_annotate;
535 UseFunc = UF;
536 CurCtrIdx = Ind;
537 visit(F);
538 }
539
540 void instrumentOneSelectInst(SelectInst &SI);
541 void annotateOneSelectInst(SelectInst &SI);
542
543 // Visit \p SI instruction and perform tasks according to visit mode.
544 void visitSelectInst(SelectInst &SI);
545
546 // Return the number of select instructions. This needs be called after
547 // countSelects().
548 unsigned getNumOfSelectInsts() const { return NSIs; }
549};
550
551/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
552/// based instrumentation.
553/// Note that the CFG can be a multi-graph. So there might be multiple edges
554/// with the same SrcBB and DestBB.
555struct PGOEdge {
556 BasicBlock *SrcBB;
557 BasicBlock *DestBB;
558 uint64_t Weight;
559 bool InMST = false;
560 bool Removed = false;
561 bool IsCritical = false;
562
563 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
564 : SrcBB(Src), DestBB(Dest), Weight(W) {}
565
566 /// Return the information string of an edge.
567 std::string infoString() const {
568 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
569 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
570 .str();
571 }
572};
573
574/// This class stores the auxiliary information for each BB in the MST.
575struct PGOBBInfo {
576 PGOBBInfo *Group;
577 uint32_t Index;
578 uint32_t Rank = 0;
579
580 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
581
582 /// Return the information string of this object.
583 std::string infoString() const {
584 return (Twine("Index=") + Twine(Index)).str();
585 }
586};
587
588// This class implements the CFG edges. Note the CFG can be a multi-graph.
589template <class Edge, class BBInfo> class FuncPGOInstrumentation {
590private:
591 Function &F;
592
593 // Is this is context-sensitive instrumentation.
594 bool IsCS;
595
596 // A map that stores the Comdat group in function F.
597 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
598
599 ValueProfileCollector VPC;
600
601 void computeCFGHash();
602 void renameComdatFunction();
603
604public:
605 const TargetLibraryInfo &TLI;
606 std::vector<std::vector<VPCandidateInfo>> ValueSites;
607 SelectInstVisitor SIVisitor;
608 std::string FuncName;
609 std::string DeprecatedFuncName;
610 GlobalVariable *FuncNameVar;
611
612 // CFG hash value for this function.
613 uint64_t FunctionHash = 0;
614
615 // The Minimum Spanning Tree of function CFG.
616 CFGMST<Edge, BBInfo> MST;
617
618 const std::optional<BlockCoverageInference> BCI;
619
620 static std::optional<BlockCoverageInference>
621 constructBCI(Function &Func, bool HasSingleByteCoverage,
622 bool InstrumentFuncEntry) {
623 if (HasSingleByteCoverage)
624 return BlockCoverageInference(Func, InstrumentFuncEntry);
625 return {};
626 }
627
628 // Collect all the BBs that will be instrumented, and store them in
629 // InstrumentBBs.
630 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
631
632 // Give an edge, find the BB that will be instrumented.
633 // Return nullptr if there is no BB to be instrumented.
635
636 // Return the auxiliary BB information.
637 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
638
639 // Return the auxiliary BB information if available.
640 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
641
642 // Dump edges and BB information.
643 void dumpInfo(StringRef Str = "") const {
644 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
645 " Hash: " + Twine(FunctionHash) + "\t" + Str);
646 }
647
648 FuncPGOInstrumentation(
649 Function &Func, TargetLibraryInfo &TLI,
650 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
651 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
652 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
653 bool IsCS = false, bool InstrumentFuncEntry = true,
654 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
655 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
656 TLI(TLI), ValueSites(IPVK_Last + 1),
657 SIVisitor(Func, HasSingleByteCoverage),
658 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
659 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
660 if (BCI && PGOViewBlockCoverageGraph)
661 BCI->viewBlockCoverageGraph();
662 // This should be done before CFG hash computation.
663 SIVisitor.countSelects();
664 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
665 if (!IsCS) {
666 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
667 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
668 NumOfPGOBB += MST.bbInfoSize();
669 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
671 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
672 } else {
673 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
674 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
675 NumOfCSPGOBB += MST.bbInfoSize();
676 }
677
678 FuncName = getIRPGOFuncName(F);
679 DeprecatedFuncName = getPGOFuncName(F);
680 computeCFGHash();
681 if (!ComdatMembers.empty())
682 renameComdatFunction();
683 LLVM_DEBUG(dumpInfo("after CFGMST"));
684
685 for (const auto &E : MST.allEdges()) {
686 if (E->Removed)
687 continue;
688 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
689 if (!E->InMST)
690 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
691 }
692
693 if (CreateGlobalVar)
694 FuncNameVar = createPGOFuncNameVar(F, FuncName);
695 }
696};
697
698} // end anonymous namespace
699
700// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
701// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
702// of selects, indirect calls, mem ops and edges.
703template <class Edge, class BBInfo>
704void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
705 std::vector<uint8_t> Indexes;
706 JamCRC JC;
707 for (auto &BB : F) {
708 for (BasicBlock *Succ : successors(&BB)) {
709 auto BI = findBBInfo(Succ);
710 if (BI == nullptr)
711 continue;
712 uint32_t Index = BI->Index;
713 for (int J = 0; J < 4; J++)
714 Indexes.push_back((uint8_t)(Index >> (J * 8)));
715 }
716 }
717 JC.update(Indexes);
718
719 JamCRC JCH;
720 // The higher 32 bits.
721 auto updateJCH = [&JCH](uint64_t Num) {
722 uint8_t Data[8];
724 JCH.update(Data);
725 };
726 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
727 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
728 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
729 if (BCI) {
730 updateJCH(BCI->getInstrumentedBlocksHash());
731 } else {
732 updateJCH((uint64_t)MST.numEdges());
733 }
734
735 // Hash format for context sensitive profile. Reserve 4 bits for other
736 // information.
737 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
738
739 // Reserve bit 60-63 for other information purpose.
741 if (IsCS)
743 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
744 << " CRC = " << JC.getCRC()
745 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
746 << ", Edges = " << MST.numEdges() << ", ICSites = "
747 << ValueSites[IPVK_IndirectCallTarget].size()
748 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
749 << ", High32 CRC = " << JCH.getCRC()
750 << ", Hash = " << FunctionHash << "\n";);
751
752 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
753 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
754 << " in building " << F.getParent()->getSourceFileName() << "\n";
755}
756
757// Check if we can safely rename this Comdat function.
758static bool canRenameComdat(
759 Function &F,
760 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
761 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
762 return false;
763
764 // FIXME: Current only handle those Comdat groups that only containing one
765 // function.
766 // (1) For a Comdat group containing multiple functions, we need to have a
767 // unique postfix based on the hashes for each function. There is a
768 // non-trivial code refactoring to do this efficiently.
769 // (2) Variables can not be renamed, so we can not rename Comdat function in a
770 // group including global vars.
771 Comdat *C = F.getComdat();
772 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
773 assert(!isa<GlobalAlias>(CM.second));
774 Function *FM = dyn_cast<Function>(CM.second);
775 if (FM != &F)
776 return false;
777 }
778 return true;
779}
780
781// Append the CFGHash to the Comdat function name.
782template <class Edge, class BBInfo>
783void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
784 if (!canRenameComdat(F, ComdatMembers))
785 return;
786 std::string OrigName = F.getName().str();
787 std::string NewFuncName =
788 Twine(F.getName() + "." + Twine(FunctionHash)).str();
789 F.setName(Twine(NewFuncName));
791 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
792 Comdat *NewComdat;
793 Module *M = F.getParent();
794 // For AvailableExternallyLinkage functions, change the linkage to
795 // LinkOnceODR and put them into comdat. This is because after renaming, there
796 // is no backup external copy available for the function.
797 if (!F.hasComdat()) {
799 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
801 F.setComdat(NewComdat);
802 return;
803 }
804
805 // This function belongs to a single function Comdat group.
806 Comdat *OrigComdat = F.getComdat();
807 std::string NewComdatName =
808 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
809 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
810 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
811
812 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
813 // Must be a function.
814 cast<Function>(CM.second)->setComdat(NewComdat);
815 }
816}
817
818/// Collect all the BBs that will be instruments and add them to
819/// `InstrumentBBs`.
820template <class Edge, class BBInfo>
821void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
822 std::vector<BasicBlock *> &InstrumentBBs) {
823 if (BCI) {
824 for (auto &BB : F)
825 if (BCI->shouldInstrumentBlock(BB))
826 InstrumentBBs.push_back(&BB);
827 return;
828 }
829
830 // Use a worklist as we will update the vector during the iteration.
831 std::vector<Edge *> EdgeList;
832 EdgeList.reserve(MST.numEdges());
833 for (const auto &E : MST.allEdges())
834 EdgeList.push_back(E.get());
835
836 for (auto &E : EdgeList) {
837 BasicBlock *InstrBB = getInstrBB(E);
838 if (InstrBB)
839 InstrumentBBs.push_back(InstrBB);
840 }
841}
842
843// Given a CFG E to be instrumented, find which BB to place the instrumented
844// code. The function will split the critical edge if necessary.
845template <class Edge, class BBInfo>
846BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
847 if (E->InMST || E->Removed)
848 return nullptr;
849
850 BasicBlock *SrcBB = E->SrcBB;
851 BasicBlock *DestBB = E->DestBB;
852 // For a fake edge, instrument the real BB.
853 if (SrcBB == nullptr)
854 return DestBB;
855 if (DestBB == nullptr)
856 return SrcBB;
857
858 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
859 // There are basic blocks (such as catchswitch) cannot be instrumented.
860 // If the returned first insertion point is the end of BB, skip this BB.
861 if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
862 return nullptr;
863 return BB;
864 };
865
866 // Instrument the SrcBB if it has a single successor,
867 // otherwise, the DestBB if this is not a critical edge.
868 Instruction *TI = SrcBB->getTerminator();
869 if (TI->getNumSuccessors() <= 1)
870 return canInstrument(SrcBB);
871 if (!E->IsCritical)
872 return canInstrument(DestBB);
873
874 // Some IndirectBr critical edges cannot be split by the previous
875 // SplitIndirectBrCriticalEdges call. Bail out.
876 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
877 BasicBlock *InstrBB =
878 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
879 if (!InstrBB) {
881 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
882 return nullptr;
883 }
884 // For a critical edge, we have to split. Instrument the newly
885 // created BB.
886 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
887 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
888 << " --> " << getBBInfo(DestBB).Index << "\n");
889 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
890 MST.addEdge(SrcBB, InstrBB, 0);
891 // Second one: Add new edge of InstrBB->DestBB.
892 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
893 NewEdge1.InMST = true;
894 E->Removed = true;
895
896 return canInstrument(InstrBB);
897}
898
899// When generating value profiling calls on Windows routines that make use of
900// handler funclets for exception processing an operand bundle needs to attached
901// to the called function. This routine will set \p OpBundles to contain the
902// funclet information, if any is needed, that should be placed on the generated
903// value profiling call for the value profile candidate call.
904static void
908 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
909 if (!OrigCall)
910 return;
911
912 if (!isa<IntrinsicInst>(OrigCall)) {
913 // The instrumentation call should belong to the same funclet as a
914 // non-intrinsic call, so just copy the operand bundle, if any exists.
915 std::optional<OperandBundleUse> ParentFunclet =
916 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
917 if (ParentFunclet)
918 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
919 } else {
920 // Intrinsics or other instructions do not get funclet information from the
921 // front-end. Need to use the BlockColors that was computed by the routine
922 // colorEHFunclets to determine whether a funclet is needed.
923 if (!BlockColors.empty()) {
924 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
925 assert(CV.size() == 1 && "non-unique color for block!");
927 if (EHPadIt->isEHPad())
928 OpBundles.emplace_back("funclet", &*EHPadIt);
929 }
930 }
931}
932
933// Visit all edge and instrument the edges not in MST, and do value profiling.
934// Critical edges will be split.
935void FunctionInstrumenter::instrument() {
936 if (!PGOBlockCoverage) {
937 // Split indirectbr critical edges here before computing the MST rather than
938 // later in getInstrBB() to avoid invalidating it.
939 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
940 }
941
942 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
943 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
944 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
945 InstrumentationType == PGOInstrumentationType::CSFDO,
946 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
948
949 auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
950 auto *const CFGHash =
951 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
952 // Make sure that pointer to global is passed in with zero addrspace
953 // This is relevant during GPU profiling
954 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
955 Name, PointerType::get(M.getContext(), 0));
957 auto &EntryBB = F.getEntryBlock();
958 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
959 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
960 // i32 <index>)
961 Builder.CreateIntrinsic(
962 Intrinsic::instrprof_cover,
963 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
964 return;
965 }
966
967 std::vector<BasicBlock *> InstrumentBBs;
968 FuncInfo.getInstrumentBBs(InstrumentBBs);
969 unsigned NumCounters =
970 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
971
972 if (IsCtxProf) {
973 StringSet<> SkipCSInstr(llvm::from_range, CtxPGOSkipCallsiteInstrument);
974
975 auto *CSIntrinsic =
976 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
977 // We want to count the instrumentable callsites, then instrument them. This
978 // is because the llvm.instrprof.callsite intrinsic has an argument (like
979 // the other instrprof intrinsics) capturing the total number of
980 // instrumented objects (counters, or callsites, in this case). In this
981 // case, we want that value so we can readily pass it to the compiler-rt
982 // APIs that may have to allocate memory based on the nr of callsites.
983 // The traversal logic is the same for both counting and instrumentation,
984 // just needs to be done in succession.
985 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
986 for (auto &BB : F)
987 for (auto &Instr : BB)
988 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
990 continue;
991 if (CS->getCalledFunction() &&
992 SkipCSInstr.contains(CS->getCalledFunction()->getName()))
993 continue;
994 Visitor(CS);
995 }
996 };
997 // First, count callsites.
998 uint32_t TotalNumCallsites = 0;
999 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; });
1000
1001 // Now instrument.
1002 uint32_t CallsiteIndex = 0;
1003 Visit([&](auto *CB) {
1004 IRBuilder<> Builder(CB);
1005 Builder.CreateCall(CSIntrinsic,
1006 {Name, CFGHash, Builder.getInt32(TotalNumCallsites),
1007 Builder.getInt32(CallsiteIndex++),
1008 CB->getCalledOperand()});
1009 });
1010 }
1011
1012 uint32_t I = 0;
1014 NumCounters += PGOBlockCoverage ? 8 : 1;
1015 auto &EntryBB = F.getEntryBlock();
1016 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
1017 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
1018 // i32 <index>)
1019 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
1020 {NormalizedNamePtr, CFGHash,
1021 Builder.getInt32(NumCounters),
1022 Builder.getInt32(I)});
1023 I += PGOBlockCoverage ? 8 : 1;
1024 }
1025
1026 for (auto *InstrBB : InstrumentBBs) {
1027 IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
1028 assert(Builder.GetInsertPoint() != InstrBB->end() &&
1029 "Cannot get the Instrumentation point");
1030 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
1031 // i32 <index>)
1032 Builder.CreateIntrinsic(PGOBlockCoverage ? Intrinsic::instrprof_cover
1033 : Intrinsic::instrprof_increment,
1034 {NormalizedNamePtr, CFGHash,
1035 Builder.getInt32(NumCounters),
1036 Builder.getInt32(I++)});
1037 }
1038
1039 // Now instrument select instructions:
1040 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name,
1041 FuncInfo.FunctionHash);
1042 assert(I == NumCounters);
1043
1044 if (isValueProfilingDisabled())
1045 return;
1046
1047 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1048
1049 // Intrinsic function calls do not have funclet operand bundles needed for
1050 // Windows exception handling attached to them. However, if value profiling is
1051 // inserted for one of these calls, then a funclet value will need to be set
1052 // on the instrumentation call based on the funclet coloring.
1053 DenseMap<BasicBlock *, ColorVector> BlockColors;
1054 if (F.hasPersonalityFn() &&
1055 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1056 BlockColors = colorEHFunclets(F);
1057
1058 // For each VP Kind, walk the VP candidates and instrument each one.
1059 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1060 unsigned SiteIndex = 0;
1061 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1062 continue;
1063
1064 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1065 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1066 << " site: CallSite Index = " << SiteIndex << "\n");
1067
1068 IRBuilder<> Builder(Cand.InsertPt);
1069 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1070 "Cannot get the Instrumentation point");
1071
1072 Value *ToProfile = nullptr;
1073 if (Cand.V->getType()->isIntegerTy())
1074 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1075 else if (Cand.V->getType()->isPointerTy())
1076 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1077 assert(ToProfile && "value profiling Value is of unexpected type");
1078
1079 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1080 Name, PointerType::get(M.getContext(), 0));
1081
1083 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1084 Builder.CreateCall(
1086 Intrinsic::instrprof_value_profile),
1087 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1088 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1089 OpBundles);
1090 }
1091 } // IPVK_First <= Kind <= IPVK_Last
1092}
1093
1094namespace {
1095
1096// This class represents a CFG edge in profile use compilation.
1097struct PGOUseEdge : public PGOEdge {
1098 using PGOEdge::PGOEdge;
1099
1100 std::optional<uint64_t> Count;
1101
1102 // Set edge count value
1103 void setEdgeCount(uint64_t Value) { Count = Value; }
1104
1105 // Return the information string for this object.
1106 std::string infoString() const {
1107 if (!Count)
1108 return PGOEdge::infoString();
1109 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1110 }
1111};
1112
1113using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1114
1115// This class stores the auxiliary information for each BB.
1116struct PGOUseBBInfo : public PGOBBInfo {
1117 std::optional<uint64_t> Count;
1118 int32_t UnknownCountInEdge = 0;
1119 int32_t UnknownCountOutEdge = 0;
1120 DirectEdges InEdges;
1121 DirectEdges OutEdges;
1122
1123 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1124
1125 // Set the profile count value for this BB.
1126 void setBBInfoCount(uint64_t Value) { Count = Value; }
1127
1128 // Return the information string of this object.
1129 std::string infoString() const {
1130 if (!Count)
1131 return PGOBBInfo::infoString();
1132 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1133 }
1134
1135 // Add an OutEdge and update the edge count.
1136 void addOutEdge(PGOUseEdge *E) {
1137 OutEdges.push_back(E);
1138 UnknownCountOutEdge++;
1139 }
1140
1141 // Add an InEdge and update the edge count.
1142 void addInEdge(PGOUseEdge *E) {
1143 InEdges.push_back(E);
1144 UnknownCountInEdge++;
1145 }
1146};
1147
1148} // end anonymous namespace
1149
1150// Sum up the count values for all the edges.
1152 uint64_t Total = 0;
1153 for (const auto &E : Edges) {
1154 if (E->Removed)
1155 continue;
1156 if (E->Count)
1157 Total += *E->Count;
1158 }
1159 return Total;
1160}
1161
1162namespace {
1163
1164class PGOUseFunc {
1165public:
1166 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1167 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1168 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1169 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1170 bool InstrumentFuncEntry, bool InstrumentLoopEntries,
1171 bool HasSingleByteCoverage)
1172 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1173 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1174 InstrumentFuncEntry, InstrumentLoopEntries,
1175 HasSingleByteCoverage),
1176 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1177
1178 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1179
1180 /// Get the profile record, assign it to \p ProfileRecord, handle errors if
1181 /// necessary, and assign \p ProgramMaxCount. \returns true if there are no
1182 /// errors.
1183 bool getRecord(IndexedInstrProfReader *PGOReader);
1184
1185 // Read counts for the instrumented BB from profile.
1186 bool readCounters(bool &AllZeros,
1188
1189 // Populate the counts for all BBs.
1190 void populateCounters();
1191
1192 // Set block coverage based on profile coverage values.
1193 void populateCoverage();
1194
1195 // Set the branch weights based on the count values.
1196 void setBranchWeights();
1197
1198 // Annotate the value profile call sites for all value kind.
1199 void annotateValueSites();
1200
1201 // Annotate the value profile call sites for one value kind.
1202 void annotateValueSites(uint32_t Kind);
1203
1204 // Annotate the irreducible loop header weights.
1205 void annotateIrrLoopHeaderWeights();
1206
1207 // Annotate per-block uniformity info for offload profiling.
1208 void setBlockUniformityAttribute();
1209
1210 // The hotness of the function from the profile count.
1211 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1212
1213 // Return the function hotness from the profile.
1214 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1215
1216 // Return the function hash.
1217 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1218
1219 // Return the profile record for this function;
1220 NamedInstrProfRecord &getProfileRecord() { return ProfileRecord; }
1221
1222 // Return the auxiliary BB information.
1223 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1224 return FuncInfo.getBBInfo(BB);
1225 }
1226
1227 // Return the auxiliary BB information if available.
1228 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1229 return FuncInfo.findBBInfo(BB);
1230 }
1231
1232 Function &getFunc() const { return F; }
1233
1234 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1235
1236 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1237
1238private:
1239 Function &F;
1240 Module *M;
1241 BlockFrequencyInfo *BFI;
1242 ProfileSummaryInfo *PSI;
1243
1244 // This member stores the shared information with class PGOGenFunc.
1245 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1246
1247 // The maximum count value in the profile. This is only used in PGO use
1248 // compilation.
1249 uint64_t ProgramMaxCount;
1250
1251 // Position of counter that remains to be read.
1252 uint32_t CountPosition = 0;
1253
1254 // Total size of the profile count for this function.
1255 uint32_t ProfileCountSize = 0;
1256
1257 // ProfileRecord for this function.
1258 NamedInstrProfRecord ProfileRecord;
1259
1260 // Function hotness info derived from profile.
1261 FuncFreqAttr FreqAttr;
1262
1263 // Is to use the context sensitive profile.
1264 bool IsCS;
1265
1266 ValueProfileCollector VPC;
1267
1268 // Find the Instrumented BB and set the value. Return false on error.
1269 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1270
1271 // Set the edge counter value for the unknown edge -- there should be only
1272 // one unknown edge.
1273 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1274
1275 // Set the hot/cold inline hints based on the count values.
1276 // FIXME: This function should be removed once the functionality in
1277 // the inliner is implemented.
1278 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1279 if (PSI->isHotCount(EntryCount))
1280 FreqAttr = FFA_Hot;
1281 else if (PSI->isColdCount(MaxCount))
1282 FreqAttr = FFA_Cold;
1283 }
1284};
1285
1286} // end anonymous namespace
1287
1288/// Set up InEdges/OutEdges for all BBs in the MST.
1290 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1291 // This is not required when there is block coverage inference.
1292 if (FuncInfo.BCI)
1293 return;
1294 for (const auto &E : FuncInfo.MST.allEdges()) {
1295 if (E->Removed)
1296 continue;
1297 const BasicBlock *SrcBB = E->SrcBB;
1298 const BasicBlock *DestBB = E->DestBB;
1299 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1300 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1301 SrcInfo.addOutEdge(E.get());
1302 DestInfo.addInEdge(E.get());
1303 }
1304}
1305
1306// Visit all the edges and assign the count value for the instrumented
1307// edges and the BB. Return false on error.
1308bool PGOUseFunc::setInstrumentedCounts(
1309 const std::vector<uint64_t> &CountFromProfile) {
1310
1311 std::vector<BasicBlock *> InstrumentBBs;
1312 FuncInfo.getInstrumentBBs(InstrumentBBs);
1313
1314 setupBBInfoEdges(FuncInfo);
1315
1316 unsigned NumInstrumentedBBs = InstrumentBBs.size();
1317 unsigned NumSelects = FuncInfo.SIVisitor.getNumOfSelectInsts();
1318 unsigned NumCounters = NumInstrumentedBBs + NumSelects;
1319 // The number of counters here should match the number of counters
1320 // in profile. Return if they mismatch.
1321 if (NumCounters != CountFromProfile.size()) {
1322 LLVM_DEBUG({
1323 dbgs() << "PGO COUNTER MISMATCH for function " << F.getName() << ":\n";
1324 dbgs() << " Expected counters: " << NumCounters << "\n";
1325 dbgs() << " - From instrumented edges: " << NumInstrumentedBBs << "\n";
1326 for (size_t i = 0; i < InstrumentBBs.size(); ++i) {
1327 dbgs() << " " << i << ": ";
1328 InstrumentBBs[i]->printAsOperand(dbgs(), false);
1329 dbgs() << "\n";
1330 }
1331 dbgs() << " - From select instructions: " << NumSelects << "\n";
1332 dbgs() << " Actual counters from profile: " << CountFromProfile.size()
1333 << "\n";
1334 });
1335 return false;
1336 }
1337 auto *FuncEntry = &*F.begin();
1338
1339 // Set the profile count to the Instrumented BBs.
1340 uint32_t I = 0;
1341 for (BasicBlock *InstrBB : InstrumentBBs) {
1342 uint64_t CountValue = CountFromProfile[I++];
1343 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1344 // If we reach here, we know that we have some nonzero count
1345 // values in this function. The entry count should not be 0.
1346 // Fix it if necessary.
1347 if (InstrBB == FuncEntry && CountValue == 0)
1348 CountValue = 1;
1349 Info.setBBInfoCount(CountValue);
1350 }
1351 ProfileCountSize = CountFromProfile.size();
1352 CountPosition = I;
1353
1354 // Set the edge count and update the count of unknown edges for BBs.
1355 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1356 E->setEdgeCount(Value);
1357 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1358 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1359 };
1360
1361 // Set the profile count the Instrumented edges. There are BBs that not in
1362 // MST but not instrumented. Need to set the edge count value so that we can
1363 // populate the profile counts later.
1364 for (const auto &E : FuncInfo.MST.allEdges()) {
1365 if (E->Removed || E->InMST)
1366 continue;
1367 const BasicBlock *SrcBB = E->SrcBB;
1368 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1369
1370 // If only one out-edge, the edge profile count should be the same as BB
1371 // profile count.
1372 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1373 setEdgeCount(E.get(), *SrcInfo.Count);
1374 else {
1375 const BasicBlock *DestBB = E->DestBB;
1376 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1377 // If only one in-edge, the edge profile count should be the same as BB
1378 // profile count.
1379 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1380 setEdgeCount(E.get(), *DestInfo.Count);
1381 }
1382 if (E->Count)
1383 continue;
1384 // E's count should have been set from profile. If not, this meenas E skips
1385 // the instrumentation. We set the count to 0.
1386 setEdgeCount(E.get(), 0);
1387 }
1388 return true;
1389}
1390
1391// Set the count value for the unknown edge. There should be one and only one
1392// unknown edge in Edges vector.
1393void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1394 for (auto &E : Edges) {
1395 if (E->Count)
1396 continue;
1397 E->setEdgeCount(Value);
1398
1399 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1400 getBBInfo(E->DestBB).UnknownCountInEdge--;
1401 return;
1402 }
1403 llvm_unreachable("Cannot find the unknown count edge");
1404}
1405
1406// Emit function metadata indicating PGO profile mismatch.
1408 const char MetadataName[] = "instr_prof_hash_mismatch";
1410 // If this metadata already exists, ignore.
1411 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1412 if (Existing) {
1413 MDTuple *Tuple = cast<MDTuple>(Existing);
1414 for (const auto &N : Tuple->operands()) {
1415 if (N.equalsStr(MetadataName))
1416 return;
1417 Names.push_back(N.get());
1418 }
1419 }
1420
1421 MDBuilder MDB(ctx);
1422 Names.push_back(MDB.createString(MetadataName));
1423 MDNode *MD = MDTuple::get(ctx, Names);
1424 F.setMetadata(LLVMContext::MD_annotation, MD);
1425}
1426
1427void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1428 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1429 auto &Ctx = M->getContext();
1430 auto Err = IPE.get();
1431 bool SkipWarning = false;
1432 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1433 << FuncInfo.FuncName << ": ");
1434 if (Err == instrprof_error::unknown_function) {
1435 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1436 SkipWarning = !PGOWarnMissing;
1437 LLVM_DEBUG(dbgs() << "unknown function");
1438 } else if (Err == instrprof_error::hash_mismatch ||
1439 Err == instrprof_error::malformed) {
1440 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1441 SkipWarning =
1444 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1446 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1447 << " skip=" << SkipWarning << ")");
1448 // Emit function metadata indicating PGO profile mismatch.
1449 annotateFunctionWithHashMismatch(F, M->getContext());
1450 }
1451
1452 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1453 if (SkipWarning)
1454 return;
1455
1456 std::string Msg =
1457 IPE.message() + std::string(" ") + F.getName().str() +
1458 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1459 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1460 std::string(" count discarded");
1461
1462 Ctx.diagnose(
1463 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1464 });
1465}
1466
1467bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) {
1468 uint64_t MismatchedFuncSum = 0;
1469 auto Result = PGOReader->getInstrProfRecord(
1470 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1471 &MismatchedFuncSum);
1472 if (Error E = Result.takeError()) {
1473 handleInstrProfError(std::move(E), MismatchedFuncSum);
1474 return false;
1475 }
1476 ProfileRecord = std::move(Result.get());
1477 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1478 return true;
1479}
1480
1481// Read the profile from ProfileFileName and assign the value to the
1482// instrumented BB and the edges. Return true if the profile are successfully
1483// read, and false on errors.
1484bool PGOUseFunc::readCounters(bool &AllZeros,
1486 auto &Ctx = M->getContext();
1487 PseudoKind = ProfileRecord.getCountPseudoKind();
1488 if (PseudoKind != InstrProfRecord::NotPseudo) {
1489 return true;
1490 }
1491 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1492
1493 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1494 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1495
1496 uint64_t ValueSum = 0;
1497 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1498 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1499 ValueSum += CountFromProfile[I];
1500 }
1501 AllZeros = (ValueSum == 0);
1502
1503 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1504
1505 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1506 getBBInfo(nullptr).UnknownCountInEdge = 2;
1507
1508 if (!setInstrumentedCounts(CountFromProfile)) {
1509 LLVM_DEBUG(
1510 dbgs() << "Inconsistent number of counts, skipping this function");
1511 Ctx.diagnose(DiagnosticInfoPGOProfile(
1512 M->getName().data(),
1513 Twine("Inconsistent number of counts in ") + F.getName().str() +
1514 Twine(": the profile may be stale or there is a function name "
1515 "collision."),
1516 DS_Warning));
1517 return false;
1518 }
1519 return true;
1520}
1521
1522void PGOUseFunc::populateCoverage() {
1523 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1524
1525 ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.Counts;
1526 DenseMap<const BasicBlock *, bool> Coverage;
1527 unsigned Index = 0;
1528 for (auto &BB : F)
1529 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1530 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1531 assert(Index == CountsFromProfile.size());
1532
1533 // For each B in InverseDependencies[A], if A is covered then B is covered.
1534 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1535 InverseDependencies;
1536 for (auto &BB : F) {
1537 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1538 // If Dep is covered then BB is covered.
1539 InverseDependencies[Dep].insert(&BB);
1540 }
1541 }
1542
1543 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1544 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1545 for (auto &[BB, IsCovered] : Coverage)
1546 if (IsCovered)
1547 CoveredBlocksToProcess.push(BB);
1548
1549 while (!CoveredBlocksToProcess.empty()) {
1550 auto *CoveredBlock = CoveredBlocksToProcess.top();
1551 assert(Coverage[CoveredBlock]);
1552 CoveredBlocksToProcess.pop();
1553 for (auto *BB : InverseDependencies[CoveredBlock]) {
1554 // If CoveredBlock is covered then BB is covered.
1555 bool &Cov = Coverage[BB];
1556 if (Cov)
1557 continue;
1558 Cov = true;
1559 CoveredBlocksToProcess.push(BB);
1560 }
1561 }
1562
1563 // Annotate block coverage.
1564 MDBuilder MDB(F.getContext());
1565 // We set the entry count to 10000 if the entry block is covered so that BFI
1566 // can propagate a fraction of this count to the other covered blocks.
1567 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1568 for (auto &BB : F) {
1569 // For a block A and its successor B, we set the edge weight as follows:
1570 // If A is covered and B is covered, set weight=1.
1571 // If A is covered and B is uncovered, set weight=0.
1572 // If A is uncovered, set weight=1.
1573 // This setup will allow BFI to give nonzero profile counts to only covered
1574 // blocks.
1575 SmallVector<uint32_t, 4> Weights;
1576 for (auto *Succ : successors(&BB))
1577 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1578 if (Weights.size() >= 2)
1579 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1580 /*IsExpected=*/false);
1581 }
1582
1583 unsigned NumCorruptCoverage = 0;
1584 DominatorTree DT(F);
1585 LoopInfo LI(DT);
1586 BranchProbabilityInfo BPI(F, LI);
1587 BlockFrequencyInfo BFI(F, BPI, LI);
1588 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1589 if (auto C = BFI.getBlockProfileCount(&BB))
1590 return C == 0;
1591 return {};
1592 };
1593 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1594 for (auto &BB : F) {
1595 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1596 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1597 << "\n");
1598 // In some cases it is possible to find a covered block that has no covered
1599 // successors, e.g., when a block calls a function that may call exit(). In
1600 // those cases, BFI could find its successor to be covered while BCI could
1601 // find its successor to be dead.
1602 const bool &Cov = Coverage[&BB];
1603 if (Cov == IsBlockDead(BB).value_or(false)) {
1604 LLVM_DEBUG(
1605 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1606 << ": BCI=" << (Cov ? "Covered" : "Dead") << " BFI="
1607 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1608 ++NumCorruptCoverage;
1609 }
1610 if (Cov)
1611 ++NumCoveredBlocks;
1612 }
1613 if (PGOVerifyBFI && NumCorruptCoverage) {
1614 auto &Ctx = M->getContext();
1615 Ctx.diagnose(DiagnosticInfoPGOProfile(
1616 M->getName().data(),
1617 Twine("Found inconsistent block coverage for function ") + F.getName() +
1618 " in " + Twine(NumCorruptCoverage) + " blocks.",
1619 DS_Warning));
1620 }
1622 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1623}
1624
1625// Populate the counters from instrumented BBs to all BBs.
1626// In the end of this operation, all BBs should have a valid count value.
1627void PGOUseFunc::populateCounters() {
1628 bool Changes = true;
1629 unsigned NumPasses = 0;
1630 while (Changes) {
1631 NumPasses++;
1632 Changes = false;
1633
1634 // For efficient traversal, it's better to start from the end as most
1635 // of the instrumented edges are at the end.
1636 for (auto &BB : reverse(F)) {
1637 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1638 if (UseBBInfo == nullptr)
1639 continue;
1640 if (!UseBBInfo->Count) {
1641 if (UseBBInfo->UnknownCountOutEdge == 0) {
1642 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1643 Changes = true;
1644 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1645 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1646 Changes = true;
1647 }
1648 }
1649 if (UseBBInfo->Count) {
1650 if (UseBBInfo->UnknownCountOutEdge == 1) {
1651 uint64_t Total = 0;
1652 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1653 // If the one of the successor block can early terminate (no-return),
1654 // we can end up with situation where out edge sum count is larger as
1655 // the source BB's count is collected by a post-dominated block.
1656 if (*UseBBInfo->Count > OutSum)
1657 Total = *UseBBInfo->Count - OutSum;
1658 setEdgeCount(UseBBInfo->OutEdges, Total);
1659 Changes = true;
1660 }
1661 if (UseBBInfo->UnknownCountInEdge == 1) {
1662 uint64_t Total = 0;
1663 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1664 if (*UseBBInfo->Count > InSum)
1665 Total = *UseBBInfo->Count - InSum;
1666 setEdgeCount(UseBBInfo->InEdges, Total);
1667 Changes = true;
1668 }
1669 }
1670 }
1671 }
1672
1673 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1674 (void)NumPasses;
1675#ifndef NDEBUG
1676 // Assert every BB has a valid counter.
1677 for (auto &BB : F) {
1678 auto BI = findBBInfo(&BB);
1679 if (BI == nullptr)
1680 continue;
1681 assert(BI->Count && "BB count is not valid");
1682 }
1683#endif
1684 // Now annotate select instructions. This may fixup impossible block counts.
1685 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1686 assert(CountPosition == ProfileCountSize);
1687
1688 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1689 uint64_t FuncMaxCount = FuncEntryCount;
1690 for (auto &BB : F) {
1691 auto BI = findBBInfo(&BB);
1692 if (BI == nullptr)
1693 continue;
1694 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1695 }
1696
1697 // Fix the obviously inconsistent entry count.
1698 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1699 FuncEntryCount = 1;
1700 F.setEntryCount(FuncEntryCount);
1701 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1702
1703 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1704}
1705
1706// Assign the scaled count values to the BB with multiple out edges.
1707void PGOUseFunc::setBranchWeights() {
1708 // Generate MD_prof metadata for every branch instruction.
1709 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1710 << " IsCS=" << IsCS << "\n");
1711 for (auto &BB : F) {
1712 Instruction *TI = BB.getTerminator();
1713 if (TI->getNumSuccessors() < 2)
1714 continue;
1715 if (!(isa<CondBrInst>(TI) || isa<SwitchInst>(TI) ||
1717 isa<CallBrInst>(TI)))
1718 continue;
1719
1720 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1721 if (!*BBCountInfo.Count)
1722 continue;
1723
1724 // We have a non-zero Branch BB.
1725
1726 // SuccessorCount can be greater than OutEdgesCount, because
1727 // removed edges don't appear in OutEdges.
1728 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1729 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1730 assert(OutEdgesCount <= SuccessorCount);
1731
1732 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1733 uint64_t MaxCount = 0;
1734 for (unsigned It = 0; It < OutEdgesCount; It++) {
1735 const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1736 const BasicBlock *SrcBB = E->SrcBB;
1737 const BasicBlock *DestBB = E->DestBB;
1738 if (DestBB == nullptr)
1739 continue;
1740 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1741 uint64_t EdgeCount = *E->Count;
1742 if (EdgeCount > MaxCount)
1743 MaxCount = EdgeCount;
1744 EdgeCounts[SuccNum] = EdgeCount;
1745 }
1746
1747 if (MaxCount)
1748 setProfMetadata(TI, EdgeCounts, MaxCount);
1749 else {
1750 // A zero MaxCount can come about when we have a BB with a positive
1751 // count, and whose successor blocks all have 0 count. This can happen
1752 // when there is no exit block and the code exits via a noreturn function.
1753 auto &Ctx = M->getContext();
1754 Ctx.diagnose(DiagnosticInfoPGOProfile(
1755 M->getName().data(),
1756 Twine("Profile in ") + F.getName().str() +
1757 Twine(" partially ignored") +
1758 Twine(", possibly due to the lack of a return path."),
1759 DS_Warning));
1760 }
1761 }
1762}
1763
1765 for (BasicBlock *Pred : predecessors(BB)) {
1766 if (isa<IndirectBrInst>(Pred->getTerminator()))
1767 return true;
1768 }
1769 return false;
1770}
1771
1772void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1773 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1774 // Find irr loop headers
1775 for (auto &BB : F) {
1776 // As a heuristic also annotate indrectbr targets as they have a high chance
1777 // to become an irreducible loop header after the indirectbr tail
1778 // duplication.
1779 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1780 Instruction *TI = BB.getTerminator();
1781 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1782 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1783 }
1784 }
1785}
1786
1787void PGOUseFunc::setBlockUniformityAttribute() {
1788 if (ProfileRecord.UniformityBits.empty())
1789 return;
1790
1791 // Annotate uniformity on each instrumented IR basic block so later codegen
1792 // passes (MachineFunction) can consume it without relying on fragile block
1793 // numbering heuristics.
1794 //
1795 // Metadata kind: LLVMContext::MD_block_uniformity_profile
1796 // Payload: i1 (true = uniform, false = divergent)
1797
1798 std::vector<BasicBlock *> InstrumentBBs;
1799 FuncInfo.getInstrumentBBs(InstrumentBBs);
1800
1801 LLVMContext &Ctx = F.getContext();
1802 Type *Int1Ty = Type::getInt1Ty(Ctx);
1803
1804 for (size_t I = 0, E = InstrumentBBs.size(); I < E; ++I) {
1805 BasicBlock *BB = InstrumentBBs[I];
1806 if (!BB || !BB->getTerminator())
1807 continue;
1808 bool IsUniform = ProfileRecord.isBlockUniform(I);
1809 auto *MD = MDNode::get(
1810 Ctx, ConstantAsMetadata::get(ConstantInt::get(Int1Ty, IsUniform)));
1811 BB->getTerminator()->setMetadata(LLVMContext::MD_block_uniformity_profile,
1812 MD);
1813 }
1814
1815 LLVM_DEBUG({
1816 dbgs() << "PGO: Set block uniformity profile for " << F.getName() << ": ";
1817 for (size_t I = 0, E = InstrumentBBs.size(); I < E; ++I)
1818 dbgs() << (ProfileRecord.isBlockUniform(I) ? 'U' : 'D');
1819 dbgs() << "\n";
1820 });
1821}
1822
1823void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1824 Module *M = F.getParent();
1825 IRBuilder<> Builder(&SI);
1826 Type *Int64Ty = Builder.getInt64Ty();
1827 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1828 auto *NormalizedFuncNameVarPtr =
1830 FuncNameVar, PointerType::get(M->getContext(), 0));
1831 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step,
1832 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1833 Builder.getInt32(TotalNumCtrs),
1834 Builder.getInt32(*CurCtrIdx), Step});
1835 ++(*CurCtrIdx);
1836}
1837
1838void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1839 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1840 assert(*CurCtrIdx < CountFromProfile.size() &&
1841 "Out of bound access of counters");
1842 uint64_t SCounts[2];
1843 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1844 ++(*CurCtrIdx);
1845 uint64_t TotalCount = 0;
1846 auto BI = UseFunc->findBBInfo(SI.getParent());
1847 if (BI != nullptr) {
1848 TotalCount = *BI->Count;
1849
1850 // Fix the block count if it is impossible.
1851 if (TotalCount < SCounts[0])
1852 BI->Count = SCounts[0];
1853 }
1854 // False Count
1855 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1856 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1857 if (MaxCount)
1858 setProfMetadata(&SI, SCounts, MaxCount);
1859}
1860
1861void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1862 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1863 return;
1864 // FIXME: do not handle this yet.
1865 if (SI.getCondition()->getType()->isVectorTy())
1866 return;
1867
1868 switch (Mode) {
1869 case VM_counting:
1870 NSIs++;
1871 return;
1872 case VM_instrument:
1873 instrumentOneSelectInst(SI);
1874 return;
1875 case VM_annotate:
1876 annotateOneSelectInst(SI);
1877 return;
1878 }
1879
1880 llvm_unreachable("Unknown visiting mode");
1881}
1882
1884 if (ValueProfKind == IPVK_MemOPSize)
1886 if (ValueProfKind == llvm::IPVK_VTableTarget)
1888 return MaxNumAnnotations;
1889}
1890
1891// Traverse all valuesites and annotate the instructions for all value kind.
1892void PGOUseFunc::annotateValueSites() {
1894 return;
1895
1896 // Create the PGOFuncName meta data.
1897 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1898
1899 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1900 annotateValueSites(Kind);
1901}
1902
1903// Annotate the instructions for a specific value kind.
1904void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1905 assert(Kind <= IPVK_Last);
1906 unsigned ValueSiteIndex = 0;
1907
1908 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1909
1910 // Since there isn't a reliable or fast way for profile reader to tell if a
1911 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1912 // value profile collector over the function IR to find the instrumented sites
1913 // iff function profile records shows the number of instrumented vtable sites
1914 // is not zero. Function cfg already takes the number of instrumented
1915 // indirect call sites into account so it doesn't hash the number of
1916 // instrumented vtables; as a side effect it makes it easier to enable
1917 // profiling and profile use in two steps if needed.
1918 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1919 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1920 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1922 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1923 auto &ValueSites = FuncInfo.ValueSites[Kind];
1924 if (NumValueSites != ValueSites.size()) {
1925 auto &Ctx = M->getContext();
1926 Ctx.diagnose(DiagnosticInfoPGOProfile(
1927 M->getName().data(),
1928 Twine("Inconsistent number of value sites for ") +
1929 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1930 F.getName().str() +
1931 Twine("\", possibly due to the use of a stale profile."),
1932 DS_Warning));
1933 return;
1934 }
1935
1936 for (VPCandidateInfo &I : ValueSites) {
1937 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1938 << "): Index = " << ValueSiteIndex << " out of "
1939 << NumValueSites << "\n");
1941 *M, *I.AnnotatedInst, ProfileRecord,
1942 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1943 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1944 ValueSiteIndex++;
1945 }
1946}
1947
1948// Collect the set of members for each Comdat in module M and store
1949// in ComdatMembers.
1951 Module &M,
1952 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1953 if (!DoComdatRenaming)
1954 return;
1955 for (Function &F : M)
1956 if (Comdat *C = F.getComdat())
1957 ComdatMembers.insert(std::make_pair(C, &F));
1958 for (GlobalVariable &GV : M.globals())
1959 if (Comdat *C = GV.getComdat())
1960 ComdatMembers.insert(std::make_pair(C, &GV));
1961 for (GlobalAlias &GA : M.aliases())
1962 if (Comdat *C = GA.getComdat())
1963 ComdatMembers.insert(std::make_pair(C, &GA));
1964}
1965
1966// Return true if we should not find instrumentation data for this function
1967static bool skipPGOUse(const Function &F) {
1968 if (F.isDeclaration())
1969 return true;
1970 // If there are too many critical edges, PGO might cause
1971 // compiler time problem. Skip PGO if the number of
1972 // critical edges execeed the threshold.
1973 unsigned NumCriticalEdges = 0;
1974 for (auto &BB : F) {
1975 const Instruction *TI = BB.getTerminator();
1976 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1977 if (isCriticalEdge(TI, I))
1978 NumCriticalEdges++;
1979 }
1980 }
1981 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1982 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1983 << ", NumCriticalEdges=" << NumCriticalEdges
1984 << " exceed the threshold. Skip PGO.\n");
1985 return true;
1986 }
1987 return false;
1988}
1989
1990// Return true if we should not instrument this function
1991static bool skipPGOGen(const Function &F) {
1992 if (skipPGOUse(F))
1993 return true;
1994 if (F.hasFnAttribute(llvm::Attribute::Naked))
1995 return true;
1996 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1997 return true;
1998 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1999 return true;
2000 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
2001 return true;
2003 if (auto EntryCount = F.getEntryCount())
2004 return *EntryCount > PGOColdInstrumentEntryThreshold;
2005 return !PGOTreatUnknownAsCold;
2006 }
2007 return false;
2008}
2009
2011 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2014 function_ref<LoopInfo *(Function &)> LookupLI,
2015 PGOInstrumentationType InstrumentationType) {
2016 // For the context-sensitive instrumentation, we should have a separated pass
2017 // (before LTO/ThinLTO linking) to create these variables.
2018 if (InstrumentationType == PGOInstrumentationType::FDO)
2019 createIRLevelProfileFlagVar(M, InstrumentationType);
2020
2021 Triple TT(M.getTargetTriple());
2022 LLVMContext &Ctx = M.getContext();
2023 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
2025 M.getName().data(),
2026 Twine("VTable value profiling is presently not "
2027 "supported for non-ELF object formats"),
2028 DS_Warning));
2029 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2030 collectComdatMembers(M, ComdatMembers);
2031
2032 for (auto &F : M) {
2033 if (skipPGOGen(F))
2034 continue;
2035 TargetLibraryInfo &TLI = LookupTLI(F);
2036 BranchProbabilityInfo *BPI = LookupBPI(F);
2037 BlockFrequencyInfo *BFI = LookupBFI(F);
2038 LoopInfo *LI = LookupLI(F);
2039 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
2040 InstrumentationType);
2041 FI.instrument();
2042 }
2043 return true;
2044}
2045
2046PreservedAnalyses
2048 createProfileFileNameVar(M, CSInstrName);
2049 // The variable in a comdat may be discarded by LTO. Ensure the declaration
2050 // will be retained.
2053 if (ProfileSampling)
2058 return PA;
2059}
2060
2063 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2064 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2065 return FAM.getResult<TargetLibraryAnalysis>(F);
2066 };
2067 auto LookupBPI = [&FAM](Function &F) {
2068 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2069 };
2070 auto LookupBFI = [&FAM](Function &F) {
2071 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2072 };
2073 auto LookupLI = [&FAM](Function &F) {
2074 return &FAM.getResult<LoopAnalysis>(F);
2075 };
2076
2077 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
2078 InstrumentationType))
2079 return PreservedAnalyses::all();
2080
2081 return PreservedAnalyses::none();
2082}
2083
2084// Using the ratio b/w sums of profile count values and BFI count values to
2085// adjust the func entry count.
2086static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
2087 BranchProbabilityInfo &NBPI) {
2088 Function &F = Func.getFunc();
2089 BlockFrequencyInfo NBFI(F, NBPI, LI);
2090#ifndef NDEBUG
2091 auto BFIEntryCount = F.getEntryCount();
2092 assert(BFIEntryCount && (*BFIEntryCount > 0) && "Invalid BFI Entrycount");
2093#endif
2094 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
2095 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
2096 for (auto &BBI : F) {
2097 uint64_t CountValue = 0;
2098 uint64_t BFICountValue = 0;
2099 if (!Func.findBBInfo(&BBI))
2100 continue;
2101 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2102 CountValue = *Func.getBBInfo(&BBI).Count;
2103 BFICountValue = *BFICount;
2104 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
2105 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
2106 }
2107 if (SumCount.isZero())
2108 return;
2109
2110 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
2111 "Incorrect sum of BFI counts");
2112 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
2113 return;
2114 double Scale = (SumCount / SumBFICount).convertToDouble();
2115 if (Scale < 1.001 && Scale > 0.999)
2116 return;
2117
2118 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
2119 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
2120 if (NewEntryCount == 0)
2121 NewEntryCount = 1;
2122 if (NewEntryCount != FuncEntryCount) {
2123 F.setEntryCount(NewEntryCount);
2124 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2125 << ", entry_count " << FuncEntryCount << " --> "
2126 << NewEntryCount << "\n");
2127 }
2128}
2129
2130// Compare the profile count values with BFI count values, and print out
2131// the non-matching ones.
2132static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2134 uint64_t HotCountThreshold,
2136 Function &F = Func.getFunc();
2137 BlockFrequencyInfo NBFI(F, NBPI, LI);
2138 // bool PrintFunc = false;
2139 bool HotBBOnly = PGOVerifyHotBFI;
2140 StringRef Msg;
2142
2143 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2144 for (auto &BBI : F) {
2145 PGOUseBBInfo *BBInfo = Func.findBBInfo(&BBI);
2146 if (!BBInfo)
2147 continue;
2148
2149 uint64_t CountValue = BBInfo->Count.value_or(CountValue);
2150 uint64_t BFICountValue = 0;
2151
2152 BBNum++;
2153 if (CountValue)
2154 NonZeroBBNum++;
2155 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2156 if (BFICount)
2157 BFICountValue = *BFICount;
2158
2159 if (HotBBOnly) {
2160 bool rawIsHot = CountValue >= HotCountThreshold;
2161 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2162 bool rawIsCold = CountValue <= ColdCountThreshold;
2163 bool ShowCount = false;
2164 if (rawIsHot && !BFIIsHot) {
2165 Msg = "raw-Hot to BFI-nonHot";
2166 ShowCount = true;
2167 } else if (rawIsCold && BFIIsHot) {
2168 Msg = "raw-Cold to BFI-Hot";
2169 ShowCount = true;
2170 }
2171 if (!ShowCount)
2172 continue;
2173 } else {
2174 if ((CountValue < PGOVerifyBFICutoff) &&
2175 (BFICountValue < PGOVerifyBFICutoff))
2176 continue;
2177 uint64_t Diff = (BFICountValue >= CountValue)
2178 ? BFICountValue - CountValue
2179 : CountValue - BFICountValue;
2180 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2181 continue;
2182 }
2183 BBMisMatchNum++;
2184
2185 ORE.emit([&]() {
2187 F.getSubprogram(), &BBI);
2188 Remark << "BB " << ore::NV("Block", BBI.getName())
2189 << " Count=" << ore::NV("Count", CountValue)
2190 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2191 if (!Msg.empty())
2192 Remark << " (" << Msg << ")";
2193 return Remark;
2194 });
2195 }
2196 if (BBMisMatchNum)
2197 ORE.emit([&]() {
2198 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2199 F.getSubprogram(), &F.getEntryBlock())
2200 << "In Func " << ore::NV("Function", F.getName())
2201 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2202 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2203 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2204 });
2205}
2206
2208 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2209 vfs::FileSystem &FS,
2210 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2213 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2214 bool IsCS) {
2215 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2216 auto &Ctx = M.getContext();
2217 // Read the counter array from file.
2218 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2219 ProfileRemappingFileName);
2220 if (Error E = ReaderOrErr.takeError()) {
2221 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2222 Ctx.diagnose(
2223 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2224 });
2225 return false;
2226 }
2227
2228 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2229 std::move(ReaderOrErr.get());
2230 if (!PGOReader) {
2231 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2232 StringRef("Cannot get PGOReader")));
2233 return false;
2234 }
2235 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2236 return false;
2237
2238 // TODO: might need to change the warning once the clang option is finalized.
2239 if (!PGOReader->isIRLevelProfile()) {
2240 Ctx.diagnose(DiagnosticInfoPGOProfile(
2241 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2242 return false;
2243 }
2244 if (PGOReader->functionEntryOnly()) {
2245 Ctx.diagnose(DiagnosticInfoPGOProfile(
2246 ProfileFileName.data(),
2247 "Function entry profiles are not yet supported for optimization"));
2248 return false;
2249 }
2250
2252 for (GlobalVariable &G : M.globals()) {
2253 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2254 continue;
2255
2256 // Create the PGOFuncName meta data.
2257 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2258 }
2259 }
2260
2261 // Add the profile summary (read from the header of the indexed summary) here
2262 // so that we can use it below when reading counters (which checks if the
2263 // function should be marked with a cold or inlinehint attribute).
2264 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2267 PSI->refresh();
2268
2269 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2270 collectComdatMembers(M, ComdatMembers);
2271 std::vector<Function *> HotFunctions;
2272 std::vector<Function *> ColdFunctions;
2273
2274 // If the profile marked as always instrument the entry BB, do the
2275 // same. Note this can be overwritten by the internal option in CFGMST.h
2276 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2277 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2278 InstrumentFuncEntry = PGOInstrumentEntry;
2279 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2280 if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
2281 InstrumentLoopEntries = PGOInstrumentLoopEntries;
2282
2283 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2284 for (auto &F : M) {
2285 if (skipPGOUse(F))
2286 continue;
2287 TargetLibraryInfo &TLI = LookupTLI(F);
2288 BranchProbabilityInfo *BPI = LookupBPI(F);
2289 BlockFrequencyInfo *BFI = LookupBFI(F);
2290 LoopInfo *LI = LookupLI(F);
2291 if (!HasSingleByteCoverage) {
2292 // Split indirectbr critical edges here before computing the MST rather
2293 // than later in getInstrBB() to avoid invalidating it.
2294 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2295 BFI);
2296 }
2297 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2298 InstrumentFuncEntry, InstrumentLoopEntries,
2299 HasSingleByteCoverage);
2300 if (!Func.getRecord(PGOReader.get()))
2301 continue;
2302 if (HasSingleByteCoverage) {
2303 Func.populateCoverage();
2304 continue;
2305 }
2306 // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
2307 // it means the profile for the function is unrepresentative and this
2308 // function is actually hot / warm. We will reset the function hot / cold
2309 // attribute and drop all the profile counters.
2311 bool AllZeros = false;
2312 if (!Func.readCounters(AllZeros, PseudoKind))
2313 continue;
2314 if (AllZeros) {
2315 F.setEntryCount(0);
2316 if (Func.getProgramMaxCount() != 0)
2317 ColdFunctions.push_back(&F);
2318 continue;
2319 }
2320 if (PseudoKind != InstrProfRecord::NotPseudo) {
2321 // Clear function attribute cold.
2322 if (F.hasFnAttribute(Attribute::Cold))
2323 F.removeFnAttr(Attribute::Cold);
2324 // Set function attribute as hot.
2325 if (PseudoKind == InstrProfRecord::PseudoHot)
2326 F.addFnAttr(Attribute::Hot);
2327 continue;
2328 }
2329 Func.populateCounters();
2330 Func.setBranchWeights();
2331 Func.annotateValueSites();
2332 Func.annotateIrrLoopHeaderWeights();
2333 Func.setBlockUniformityAttribute();
2334 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2335 if (FreqAttr == PGOUseFunc::FFA_Cold)
2336 ColdFunctions.push_back(&F);
2337 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2338 HotFunctions.push_back(&F);
2339 if (PGOViewCounts != PGOVCT_None &&
2340 (ViewBlockFreqFuncName.empty() ||
2341 F.getName() == ViewBlockFreqFuncName)) {
2343 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2344 std::make_unique<BranchProbabilityInfo>(F, LI);
2345 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2346 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2348 NewBFI->view();
2349 else if (PGOViewCounts == PGOVCT_Text) {
2350 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2351 NewBFI->print(dbgs());
2352 }
2353 }
2355 (ViewBlockFreqFuncName.empty() ||
2356 F.getName() == ViewBlockFreqFuncName)) {
2358 if (ViewBlockFreqFuncName.empty())
2359 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2360 else
2361 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2362 else if (PGOViewRawCounts == PGOVCT_Text) {
2363 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2364 Func.dumpInfo();
2365 }
2366 }
2367
2370 BranchProbabilityInfo NBPI(F, LI);
2371
2372 // Fix func entry count.
2373 if (PGOFixEntryCount)
2374 fixFuncEntryCount(Func, LI, NBPI);
2375
2376 // Verify BlockFrequency information.
2377 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2378 if (PGOVerifyHotBFI) {
2379 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2381 }
2382 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2383 }
2384 }
2385
2386 // Set function hotness attribute from the profile.
2387 // We have to apply these attributes at the end because their presence
2388 // can affect the BranchProbabilityInfo of any callers, resulting in an
2389 // inconsistent MST between prof-gen and prof-use.
2390 for (auto &F : HotFunctions) {
2391 F->addFnAttr(Attribute::InlineHint);
2392 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2393 << "\n");
2394 }
2395 for (auto &F : ColdFunctions) {
2396 // Only set when there is no Attribute::Hot set by the user. For Hot
2397 // attribute, user's annotation has the precedence over the profile.
2398 if (F->hasFnAttribute(Attribute::Hot)) {
2399 auto &Ctx = M.getContext();
2400 std::string Msg = std::string("Function ") + F->getName().str() +
2401 std::string(" is annotated as a hot function but"
2402 " the profile is cold");
2403 Ctx.diagnose(
2404 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2405 continue;
2406 }
2407 F->addFnAttr(Attribute::Cold);
2408 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2409 << "\n");
2410 }
2411 return true;
2412}
2413
2415 std::string Filename, std::string RemappingFilename, bool IsCS,
2417 : ProfileFileName(std::move(Filename)),
2418 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2419 FS(std::move(VFS)) {
2420 if (!PGOTestProfileFile.empty())
2421 ProfileFileName = PGOTestProfileFile;
2423 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2424 if (!FS)
2426}
2427
2430
2431 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2432 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2433 return FAM.getResult<TargetLibraryAnalysis>(F);
2434 };
2435 auto LookupBPI = [&FAM](Function &F) {
2436 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2437 };
2438 auto LookupBFI = [&FAM](Function &F) {
2439 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2440 };
2441 auto LookupLI = [&FAM](Function &F) {
2442 return &FAM.getResult<LoopAnalysis>(F);
2443 };
2444
2445 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2446 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2447 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2448 IsCS))
2449 return PreservedAnalyses::all();
2450
2451 return PreservedAnalyses::none();
2452}
2453
2454static std::string getSimpleNodeName(const BasicBlock *Node) {
2455 if (!Node->getName().empty())
2456 return Node->getName().str();
2457
2458 std::string SimpleNodeName;
2459 raw_string_ostream OS(SimpleNodeName);
2460 Node->printAsOperand(OS, false);
2461 return SimpleNodeName;
2462}
2463
2465 uint64_t MaxCount) {
2466 auto Weights = downscaleWeights(EdgeCounts, MaxCount);
2467
2468 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W : Weights) {
2469 dbgs() << W << " ";
2470 } dbgs() << "\n");
2471
2472 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2473
2474 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2475
2477 std::string BrCondStr = getBranchCondString(TI);
2478 if (BrCondStr.empty())
2479 return;
2480
2481 uint64_t WSum =
2482 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2483 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2484 uint64_t TotalCount =
2485 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2486 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2487 uint64_t Scale = calculateCountScale(WSum);
2488 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2489 scaleBranchCount(WSum, Scale));
2490 std::string BranchProbStr;
2491 raw_string_ostream OS(BranchProbStr);
2492 OS << BP;
2493 OS << " (total count : " << TotalCount << ")";
2494 Function *F = TI->getParent()->getParent();
2496 ORE.emit([&]() {
2497 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2498 << BrCondStr << " is true with probability : " << BranchProbStr;
2499 });
2500 }
2501}
2502
2503namespace llvm {
2504
2506 MDBuilder MDB(M->getContext());
2507 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2509}
2510
2511template <> struct GraphTraits<PGOUseFunc *> {
2512 using NodeRef = const BasicBlock *;
2515
2516 static NodeRef getEntryNode(const PGOUseFunc *G) {
2517 return &G->getFunc().front();
2518 }
2519
2521 return succ_begin(N);
2522 }
2523
2524 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2525
2526 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2527 return nodes_iterator(G->getFunc().begin());
2528 }
2529
2530 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2531 return nodes_iterator(G->getFunc().end());
2532 }
2533};
2534
2535template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2536 explicit DOTGraphTraits(bool isSimple = false)
2538
2539 static std::string getGraphName(const PGOUseFunc *G) {
2540 return std::string(G->getFunc().getName());
2541 }
2542
2543 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2544 std::string Result;
2545 raw_string_ostream OS(Result);
2546
2547 OS << getSimpleNodeName(Node) << ":\\l";
2548 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2549 OS << "Count : ";
2550 if (BI && BI->Count)
2551 OS << *BI->Count << "\\l";
2552 else
2553 OS << "Unknown\\l";
2554
2555 if (!PGOInstrSelect)
2556 return Result;
2557
2558 for (const Instruction &I : *Node) {
2559 if (!isa<SelectInst>(&I))
2560 continue;
2561 // Display scaled counts for SELECT instruction:
2562 OS << "SELECT : { T = ";
2563 uint64_t TC, FC;
2564 bool HasProf = extractBranchWeights(I, TC, FC);
2565 if (!HasProf)
2566 OS << "Unknown, F = Unknown }\\l";
2567 else
2568 OS << TC << ", F = " << FC << " }\\l";
2569 }
2570 return Result;
2571 }
2572};
2573
2574} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_ABI
Definition Compiler.h:215
This file contains the declarations for the subclasses of Constant, which represent the different fla...
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INSTR_PROF_QUOTE(x)
#define VARIANT_MASK_CSIR_PROF
#define VARIANT_MASK_DBG_CORRELATE
#define INSTR_PROF_RAW_VERSION
#define INSTR_PROF_RAW_VERSION_VAR
#define VARIANT_MASK_TEMPORAL_PROF
#define VARIANT_MASK_IR_PROF
#define VARIANT_MASK_BYTE_COVERAGE
#define VARIANT_MASK_INSTR_ENTRY
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY
#define VARIANT_MASK_INSTR_LOOP_ENTRIES
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static constexpr StringLiteral Filename
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of precise value annotations for a single memop" "intrinsic"))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, bool IsCS)
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
ValueProfileCollector::CandidateInfo VPCandidateInfo
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType)
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static bool isIndirectBrTarget(BasicBlock *BB)
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is " "mainly for test purpose."))
static std::string getBranchCondString(Instruction *TI)
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
StringSet - A set-like wrapper for the StringMap.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1144
Class for arbitrary precision integers.
Definition APInt.h:78
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI bool isIrrLoopHeader(const BasicBlock *BB)
Returns true if BB is an irreducible loop header block.
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition CFGMST.h:330
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition CFGMST.h:367
size_t numEdges() const
Definition CFGMST.h:373
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
LLVM_ABI StringRef getName() const
Definition Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
SelectionKind getSelectionKind() const
Definition Comdat.h:47
Conditional Branch instruction.
Value * getCondition() const
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
bool empty() const
Definition DenseMap.h:173
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:630
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition GlobalValue.h:57
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition GlobalValue.h:56
This instruction compares its operands according to the predicate given to the constructor.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Expected< NamedInstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Base class for instruction visitors.
Definition InstVisitor.h:78
static bool canInstrumentCallsite(const CallBase &CB)
instrprof_error get() const
Definition InstrProf.h:476
std::string message() const override
Return the error message as a string.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition CRC.h:53
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Definition CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
LLVM_ABI MDString * createString(StringRef Str)
Return the given string as metadata.
Definition MDBuilder.cpp:21
LLVM_ABI MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1554
Tuple of metadata.
Definition Metadata.h:1482
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1511
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Value * getOperand(unsigned i) const
Definition User.h:207
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
LLVM_ABI void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
void write64le(void *P, uint64_t V)
Definition Endian.h:478
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > PGOTreatUnknownAsCold("pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, cl::desc("For cold function instrumentation, treat count unknown(e.g. " "unprofiled) functions as cold."))
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
LLVM_ABI void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
LLVM_ABI void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition CFG.cpp:90
LLVM_ABI std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
RelativeUniformCounterPtr ValuesPtrExpr NumValueSites[IPVK_Last+1]
Definition InstrProf.h:95
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:1013
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr, DomTreeUpdater *DTU=nullptr)
LLVM_ABI DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
LLVM_ABI void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
cl::opt< unsigned > MaxNumVTableAnnotations("icp-max-num-vtables", cl::init(6), cl::Hidden, cl::desc("Max number of vtables annotated for a vtable load instruction."))
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
FuncHash
Definition InstrProf.h:78
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
LLVM_ABI GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), cl::Hidden, cl::desc("Force to instrument loop entries."))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
static cl::opt< uint64_t > PGOColdInstrumentEntryThreshold("pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, cl::desc("For cold function instrumentation, skip instrumenting functions " "whose entry count is above the given value."))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
InstrProfValueKind
Definition InstrProf.h:323
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition CFG.cpp:106
cl::opt< bool > PGOInstrumentColdFunctionOnly
cl::list< std::string > CtxPGOSkipCallsiteInstrument("ctx-prof-skip-callsite-instr", cl::Hidden, cl::desc("Do not instrument callsites to functions in this list. Intended " "for testing."))
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
TinyPtrVector< BasicBlock * > ColorVector
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
Instruction::const_succ_iterator const_succ_iterator
Definition CFG.h:127
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI SmallVector< uint32_t > downscaleWeights(ArrayRef< uint64_t > Weights, std::optional< uint64_t > KnownMaxCount=std::nullopt)
downscale the given weights preserving the ratio.
LLVM_ABI bool isGPUProfTarget(const Module &M)
Determines whether module targets a GPU eligable for PGO instrumentation.
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
#define N
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DefaultDOTGraphTraits(bool simple=false)
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
pointer_iterator< Function::const_iterator > nodes_iterator
bool isBlockUniform(unsigned BlockIdx) const
Check if a basic block is entered via a wave-uniform branch.
Definition InstrProf.h:952
std::vector< uint64_t > Counts
Definition InstrProf.h:907
CountPseudoKind getCountPseudoKind() const
Definition InstrProf.h:1035
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition InstrProf.h:1147
std::vector< uint8_t > UniformityBits
For AMDGPU offload profiling: 1 bit per basic block indicating whether the block is usually entered w...
Definition InstrProf.h:915
static void setCSFlagInHash(uint64_t &FuncHash)
Definition InstrProf.h:1128
static constexpr uint64_t FUNC_HASH_MASK
Definition InstrProf.h:1103