LLVM 20.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
116#include <algorithm>
117#include <cassert>
118#include <cstdint>
119#include <memory>
120#include <numeric>
121#include <optional>
122#include <stack>
123#include <string>
124#include <unordered_map>
125#include <utility>
126#include <vector>
127
128using namespace llvm;
131
132#define DEBUG_TYPE "pgo-instrumentation"
133
134STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
135STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
136STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
137STATISTIC(NumOfPGOEdge, "Number of edges.");
138STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
139STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
140STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
141STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
142STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
143STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
144STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
145STATISTIC(NumOfCSPGOSelectInsts,
146 "Number of select instruction instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOMemIntrinsics,
148 "Number of mem intrinsics instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
150STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
151STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
152STATISTIC(NumOfCSPGOFunc,
153 "Number of functions having valid profile counts in CSPGO.");
154STATISTIC(NumOfCSPGOMismatch,
155 "Number of functions having mismatch profile in CSPGO.");
156STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
157STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
158
159// Command line option to specify the file to read profile from. This is
160// mainly used for testing.
162 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
163 cl::value_desc("filename"),
164 cl::desc("Specify the path of profile data file. This is"
165 "mainly for test purpose."));
167 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
168 cl::value_desc("filename"),
169 cl::desc("Specify the path of profile remapping file. This is mainly for "
170 "test purpose."));
171
172// Command line option to disable value profiling. The default is false:
173// i.e. value profiling is enabled by default. This is for debug purpose.
174static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
176 cl::desc("Disable Value Profiling"));
177
178// Command line option to set the maximum number of VP annotations to write to
179// the metadata for a single indirect call callsite.
181 "icp-max-annotations", cl::init(3), cl::Hidden,
182 cl::desc("Max number of annotations for a single indirect "
183 "call callsite"));
184
185// Command line option to set the maximum number of value annotations
186// to write to the metadata for a single memop intrinsic.
188 "memop-max-annotations", cl::init(4), cl::Hidden,
189 cl::desc("Max number of preicise value annotations for a single memop"
190 "intrinsic"));
191
192// Command line option to control appending FunctionHash to the name of a COMDAT
193// function. This is to avoid the hash mismatch caused by the preinliner.
195 "do-comdat-renaming", cl::init(false), cl::Hidden,
196 cl::desc("Append function hash to the name of COMDAT function to avoid "
197 "function hash mismatch due to the preinliner"));
198
199namespace llvm {
200// Command line option to enable/disable the warning about missing profile
201// information.
202cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
204 cl::desc("Use this option to turn on/off "
205 "warnings about missing profile data for "
206 "functions."));
207
208// Command line option to enable/disable the warning about a hash mismatch in
209// the profile data.
211 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
212 cl::desc("Use this option to turn off/on "
213 "warnings about profile cfg mismatch."));
214
215// Command line option to enable/disable the warning about a hash mismatch in
216// the profile data for Comdat functions, which often turns out to be false
217// positive due to the pre-instrumentation inline.
219 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
220 cl::desc("The option is used to turn on/off "
221 "warnings about hash mismatch for comdat "
222 "or weak functions."));
223} // namespace llvm
224
225// Command line option to enable/disable select instruction instrumentation.
226static cl::opt<bool>
227 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
228 cl::desc("Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
230
231// Command line option to turn on CFG dot or text dump of raw profile counts
233 "pgo-view-raw-counts", cl::Hidden,
234 cl::desc("A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
240 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
241 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
242 clEnumValN(PGOVCT_Text, "text", "show in text.")));
243
244// Command line option to enable/disable memop intrinsic call.size profiling.
245static cl::opt<bool>
246 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
247 cl::desc("Use this option to turn on/off "
248 "memory intrinsic size profiling."));
249
250// Emit branch probability as optimization remarks.
251static cl::opt<bool>
252 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
253 cl::desc("When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
257
259 "pgo-instrument-entry", cl::init(false), cl::Hidden,
260 cl::desc("Force to instrument function entry basicblock."));
261
262static cl::opt<bool>
263 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
265 cl::desc("Force to instrument loop entries."));
266
268 "pgo-function-entry-coverage", cl::Hidden,
269 cl::desc(
270 "Use this option to enable function entry coverage instrumentation."));
271
273 "pgo-block-coverage",
274 cl::desc("Use this option to enable basic block coverage instrumentation"));
275
276static cl::opt<bool>
277 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
278 cl::desc("Create a dot file of CFGs with block "
279 "coverage inference information"));
280
282 "pgo-temporal-instrumentation",
283 cl::desc("Use this option to enable temporal instrumentation"));
284
285static cl::opt<bool>
286 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
287 cl::desc("Fix function entry count in profile use."));
288
290 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
291 cl::desc("Print out the non-match BFI count if a hot raw profile count "
292 "becomes non-hot, or a cold raw profile count becomes hot. "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remakrs-analysis=pgo."));
295
297 "pgo-verify-bfi", cl::init(false), cl::Hidden,
298 cl::desc("Print out mismatched BFI counts after setting profile metadata "
299 "The print is enabled under -Rpass-analysis=pgo, or "
300 "internal option -pass-remakrs-analysis=pgo."));
301
303 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
304 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
305 "mismatched BFI if the difference percentage is greater than "
306 "this value (in percentage)."));
307
309 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
310 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
311 "profile count value is below."));
312
314 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
315 cl::value_desc("function name"),
316 cl::desc("Trace the hash of the function with this name."));
317
319 "pgo-function-size-threshold", cl::Hidden,
320 cl::desc("Do not instrument functions smaller than this threshold."));
321
323 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
324 cl::desc("Do not instrument functions with the number of critical edges "
325 " greater than this threshold."));
326
328 "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
329 cl::desc("For cold function instrumentation, skip instrumenting functions "
330 "whose entry count is above the given value."));
331
333 "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
334 cl::desc("For cold function instrumentation, treat count unknown(e.g. "
335 "unprofiled) functions as cold."));
336
338 "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
339 cl::desc("Enable cold function only instrumentation."));
340
342
343namespace llvm {
344// Command line option to turn on CFG dot dump after profile annotation.
345// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
347
348// Command line option to specify the name of the function for CFG dump
349// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
351
352// Command line option to enable vtable value profiling. Defined in
353// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
357} // namespace llvm
358
359namespace {
360class FunctionInstrumenter final {
361 Module &M;
362 Function &F;
364 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
365 BranchProbabilityInfo *const BPI;
366 BlockFrequencyInfo *const BFI;
367 LoopInfo *const LI;
368
369 const PGOInstrumentationType InstrumentationType;
370
371 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
372 // Ctx profiling implicitly captures indirect call cases, but not other
373 // values. Supporting other values is relatively straight-forward - just
374 // another counter range within the context.
375 bool isValueProfilingDisabled() const {
376 return DisableValueProfiling ||
377 InstrumentationType == PGOInstrumentationType::CTXPROF;
378 }
379
380 bool shouldInstrumentEntryBB() const {
381 return PGOInstrumentEntry ||
382 InstrumentationType == PGOInstrumentationType::CTXPROF;
383 }
384
385 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
386
387public:
388 FunctionInstrumenter(
390 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
391 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
392 LoopInfo *LI = nullptr,
393 PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
394 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
395 LI(LI), InstrumentationType(InstrumentationType) {}
396
397 void instrument();
398};
399} // namespace
400
401// Return a string describing the branch condition that can be
402// used in static branch probability heuristics:
403static std::string getBranchCondString(Instruction *TI) {
404 BranchInst *BI = dyn_cast<BranchInst>(TI);
405 if (!BI || !BI->isConditional())
406 return std::string();
407
408 Value *Cond = BI->getCondition();
409 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
410 if (!CI)
411 return std::string();
412
413 std::string result;
414 raw_string_ostream OS(result);
415 OS << CI->getPredicate() << "_";
416 CI->getOperand(0)->getType()->print(OS, true);
417
418 Value *RHS = CI->getOperand(1);
419 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
420 if (CV) {
421 if (CV->isZero())
422 OS << "_Zero";
423 else if (CV->isOne())
424 OS << "_One";
425 else if (CV->isMinusOne())
426 OS << "_MinusOne";
427 else
428 OS << "_Const";
429 }
430 return result;
431}
432
433static const char *ValueProfKindDescr[] = {
434#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
436};
437
438// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
439// aware this is an ir_level profile so it can set the version flag.
440static GlobalVariable *
442 PGOInstrumentationType InstrumentationType) {
443 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
444 Type *IntTy64 = Type::getInt64Ty(M.getContext());
445 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
446 if (InstrumentationType == PGOInstrumentationType::CSFDO)
447 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
448 if (PGOInstrumentEntry ||
449 InstrumentationType == PGOInstrumentationType::CTXPROF)
450 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
452 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
454 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
456 ProfileVersion |=
457 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
459 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
461 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
462 auto IRLevelVersionVariable = new GlobalVariable(
463 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
464 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
465 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
466 Triple TT(M.getTargetTriple());
467 if (TT.supportsCOMDAT()) {
468 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
469 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
470 }
471 return IRLevelVersionVariable;
472}
473
474namespace {
475
476/// The select instruction visitor plays three roles specified
477/// by the mode. In \c VM_counting mode, it simply counts the number of
478/// select instructions. In \c VM_instrument mode, it inserts code to count
479/// the number times TrueValue of select is taken. In \c VM_annotate mode,
480/// it reads the profile data and annotate the select instruction with metadata.
481enum VisitMode { VM_counting, VM_instrument, VM_annotate };
482class PGOUseFunc;
483
484/// Instruction Visitor class to visit select instructions.
485struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
486 Function &F;
487 unsigned NSIs = 0; // Number of select instructions instrumented.
488 VisitMode Mode = VM_counting; // Visiting mode.
489 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
490 unsigned TotalNumCtrs = 0; // Total number of counters
491 GlobalValue *FuncNameVar = nullptr;
492 uint64_t FuncHash = 0;
493 PGOUseFunc *UseFunc = nullptr;
494 bool HasSingleByteCoverage;
495
496 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
497 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
498
499 void countSelects() {
500 NSIs = 0;
501 Mode = VM_counting;
502 visit(F);
503 }
504
505 // Visit the IR stream and instrument all select instructions. \p
506 // Ind is a pointer to the counter index variable; \p TotalNC
507 // is the total number of counters; \p FNV is the pointer to the
508 // PGO function name var; \p FHash is the function hash.
509 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
510 uint64_t FHash) {
511 Mode = VM_instrument;
512 CurCtrIdx = Ind;
513 TotalNumCtrs = TotalNC;
514 FuncHash = FHash;
515 FuncNameVar = FNV;
516 visit(F);
517 }
518
519 // Visit the IR stream and annotate all select instructions.
520 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
521 Mode = VM_annotate;
522 UseFunc = UF;
523 CurCtrIdx = Ind;
524 visit(F);
525 }
526
527 void instrumentOneSelectInst(SelectInst &SI);
528 void annotateOneSelectInst(SelectInst &SI);
529
530 // Visit \p SI instruction and perform tasks according to visit mode.
531 void visitSelectInst(SelectInst &SI);
532
533 // Return the number of select instructions. This needs be called after
534 // countSelects().
535 unsigned getNumOfSelectInsts() const { return NSIs; }
536};
537
538/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
539/// based instrumentation.
540/// Note that the CFG can be a multi-graph. So there might be multiple edges
541/// with the same SrcBB and DestBB.
542struct PGOEdge {
543 BasicBlock *SrcBB;
544 BasicBlock *DestBB;
545 uint64_t Weight;
546 bool InMST = false;
547 bool Removed = false;
548 bool IsCritical = false;
549
550 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
551 : SrcBB(Src), DestBB(Dest), Weight(W) {}
552
553 /// Return the information string of an edge.
554 std::string infoString() const {
555 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
556 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
557 .str();
558 }
559};
560
561/// This class stores the auxiliary information for each BB in the MST.
562struct PGOBBInfo {
563 PGOBBInfo *Group;
565 uint32_t Rank = 0;
566
567 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
568
569 /// Return the information string of this object.
570 std::string infoString() const {
571 return (Twine("Index=") + Twine(Index)).str();
572 }
573};
574
575// This class implements the CFG edges. Note the CFG can be a multi-graph.
576template <class Edge, class BBInfo> class FuncPGOInstrumentation {
577private:
578 Function &F;
579
580 // Is this is context-sensitive instrumentation.
581 bool IsCS;
582
583 // A map that stores the Comdat group in function F.
584 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
585
587
588 void computeCFGHash();
589 void renameComdatFunction();
590
591public:
592 const TargetLibraryInfo &TLI;
593 std::vector<std::vector<VPCandidateInfo>> ValueSites;
594 SelectInstVisitor SIVisitor;
595 std::string FuncName;
596 std::string DeprecatedFuncName;
597 GlobalVariable *FuncNameVar;
598
599 // CFG hash value for this function.
600 uint64_t FunctionHash = 0;
601
602 // The Minimum Spanning Tree of function CFG.
604
605 const std::optional<BlockCoverageInference> BCI;
606
607 static std::optional<BlockCoverageInference>
608 constructBCI(Function &Func, bool HasSingleByteCoverage,
609 bool InstrumentFuncEntry) {
610 if (HasSingleByteCoverage)
611 return BlockCoverageInference(Func, InstrumentFuncEntry);
612 return {};
613 }
614
615 // Collect all the BBs that will be instrumented, and store them in
616 // InstrumentBBs.
617 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
618
619 // Give an edge, find the BB that will be instrumented.
620 // Return nullptr if there is no BB to be instrumented.
621 BasicBlock *getInstrBB(Edge *E);
622
623 // Return the auxiliary BB information.
624 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
625
626 // Return the auxiliary BB information if available.
627 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
628
629 // Dump edges and BB information.
630 void dumpInfo(StringRef Str = "") const {
631 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
632 " Hash: " + Twine(FunctionHash) + "\t" + Str);
633 }
634
635 FuncPGOInstrumentation(
636 Function &Func, TargetLibraryInfo &TLI,
637 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
638 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
639 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
640 bool IsCS = false, bool InstrumentFuncEntry = true,
641 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
642 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
643 TLI(TLI), ValueSites(IPVK_Last + 1),
644 SIVisitor(Func, HasSingleByteCoverage),
645 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
646 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
647 if (BCI && PGOViewBlockCoverageGraph)
648 BCI->viewBlockCoverageGraph();
649 // This should be done before CFG hash computation.
650 SIVisitor.countSelects();
651 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
652 if (!IsCS) {
653 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
654 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
655 NumOfPGOBB += MST.bbInfoSize();
656 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
658 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
659 } else {
660 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
661 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
662 NumOfCSPGOBB += MST.bbInfoSize();
663 }
664
665 FuncName = getIRPGOFuncName(F);
666 DeprecatedFuncName = getPGOFuncName(F);
667 computeCFGHash();
668 if (!ComdatMembers.empty())
669 renameComdatFunction();
670 LLVM_DEBUG(dumpInfo("after CFGMST"));
671
672 for (const auto &E : MST.allEdges()) {
673 if (E->Removed)
674 continue;
675 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
676 if (!E->InMST)
677 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
678 }
679
680 if (CreateGlobalVar)
681 FuncNameVar = createPGOFuncNameVar(F, FuncName);
682 }
683};
684
685} // end anonymous namespace
686
687// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
688// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
689// of selects, indirect calls, mem ops and edges.
690template <class Edge, class BBInfo>
691void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
692 std::vector<uint8_t> Indexes;
693 JamCRC JC;
694 for (auto &BB : F) {
695 for (BasicBlock *Succ : successors(&BB)) {
696 auto BI = findBBInfo(Succ);
697 if (BI == nullptr)
698 continue;
699 uint32_t Index = BI->Index;
700 for (int J = 0; J < 4; J++)
701 Indexes.push_back((uint8_t)(Index >> (J * 8)));
702 }
703 }
704 JC.update(Indexes);
705
706 JamCRC JCH;
707 // The higher 32 bits.
708 auto updateJCH = [&JCH](uint64_t Num) {
709 uint8_t Data[8];
711 JCH.update(Data);
712 };
713 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
714 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
715 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
716 if (BCI) {
717 updateJCH(BCI->getInstrumentedBlocksHash());
718 } else {
719 updateJCH((uint64_t)MST.numEdges());
720 }
721
722 // Hash format for context sensitive profile. Reserve 4 bits for other
723 // information.
724 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
725
726 // Reserve bit 60-63 for other information purpose.
727 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
728 if (IsCS)
730 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
731 << " CRC = " << JC.getCRC()
732 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
733 << ", Edges = " << MST.numEdges() << ", ICSites = "
734 << ValueSites[IPVK_IndirectCallTarget].size()
735 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
736 << ", High32 CRC = " << JCH.getCRC()
737 << ", Hash = " << FunctionHash << "\n";);
738
739 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
740 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
741 << " in building " << F.getParent()->getSourceFileName() << "\n";
742}
743
744// Check if we can safely rename this Comdat function.
745static bool canRenameComdat(
746 Function &F,
747 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
748 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
749 return false;
750
751 // FIXME: Current only handle those Comdat groups that only containing one
752 // function.
753 // (1) For a Comdat group containing multiple functions, we need to have a
754 // unique postfix based on the hashes for each function. There is a
755 // non-trivial code refactoring to do this efficiently.
756 // (2) Variables can not be renamed, so we can not rename Comdat function in a
757 // group including global vars.
758 Comdat *C = F.getComdat();
759 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
760 assert(!isa<GlobalAlias>(CM.second));
761 Function *FM = dyn_cast<Function>(CM.second);
762 if (FM != &F)
763 return false;
764 }
765 return true;
766}
767
768// Append the CFGHash to the Comdat function name.
769template <class Edge, class BBInfo>
770void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
771 if (!canRenameComdat(F, ComdatMembers))
772 return;
773 std::string OrigName = F.getName().str();
774 std::string NewFuncName =
775 Twine(F.getName() + "." + Twine(FunctionHash)).str();
776 F.setName(Twine(NewFuncName));
778 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
779 Comdat *NewComdat;
780 Module *M = F.getParent();
781 // For AvailableExternallyLinkage functions, change the linkage to
782 // LinkOnceODR and put them into comdat. This is because after renaming, there
783 // is no backup external copy available for the function.
784 if (!F.hasComdat()) {
786 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
788 F.setComdat(NewComdat);
789 return;
790 }
791
792 // This function belongs to a single function Comdat group.
793 Comdat *OrigComdat = F.getComdat();
794 std::string NewComdatName =
795 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
796 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
797 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
798
799 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
800 // Must be a function.
801 cast<Function>(CM.second)->setComdat(NewComdat);
802 }
803}
804
805/// Collect all the BBs that will be instruments and add them to
806/// `InstrumentBBs`.
807template <class Edge, class BBInfo>
808void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
809 std::vector<BasicBlock *> &InstrumentBBs) {
810 if (BCI) {
811 for (auto &BB : F)
812 if (BCI->shouldInstrumentBlock(BB))
813 InstrumentBBs.push_back(&BB);
814 return;
815 }
816
817 // Use a worklist as we will update the vector during the iteration.
818 std::vector<Edge *> EdgeList;
819 EdgeList.reserve(MST.numEdges());
820 for (const auto &E : MST.allEdges())
821 EdgeList.push_back(E.get());
822
823 for (auto &E : EdgeList) {
824 BasicBlock *InstrBB = getInstrBB(E);
825 if (InstrBB)
826 InstrumentBBs.push_back(InstrBB);
827 }
828}
829
830// Given a CFG E to be instrumented, find which BB to place the instrumented
831// code. The function will split the critical edge if necessary.
832template <class Edge, class BBInfo>
833BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
834 if (E->InMST || E->Removed)
835 return nullptr;
836
837 BasicBlock *SrcBB = E->SrcBB;
838 BasicBlock *DestBB = E->DestBB;
839 // For a fake edge, instrument the real BB.
840 if (SrcBB == nullptr)
841 return DestBB;
842 if (DestBB == nullptr)
843 return SrcBB;
844
845 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
846 // There are basic blocks (such as catchswitch) cannot be instrumented.
847 // If the returned first insertion point is the end of BB, skip this BB.
848 if (BB->getFirstInsertionPt() == BB->end())
849 return nullptr;
850 return BB;
851 };
852
853 // Instrument the SrcBB if it has a single successor,
854 // otherwise, the DestBB if this is not a critical edge.
855 Instruction *TI = SrcBB->getTerminator();
856 if (TI->getNumSuccessors() <= 1)
857 return canInstrument(SrcBB);
858 if (!E->IsCritical)
859 return canInstrument(DestBB);
860
861 // Some IndirectBr critical edges cannot be split by the previous
862 // SplitIndirectBrCriticalEdges call. Bail out.
863 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
864 BasicBlock *InstrBB =
865 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
866 if (!InstrBB) {
868 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
869 return nullptr;
870 }
871 // For a critical edge, we have to split. Instrument the newly
872 // created BB.
873 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
874 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
875 << " --> " << getBBInfo(DestBB).Index << "\n");
876 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
877 MST.addEdge(SrcBB, InstrBB, 0);
878 // Second one: Add new edge of InstrBB->DestBB.
879 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
880 NewEdge1.InMST = true;
881 E->Removed = true;
882
883 return canInstrument(InstrBB);
884}
885
886// When generating value profiling calls on Windows routines that make use of
887// handler funclets for exception processing an operand bundle needs to attached
888// to the called function. This routine will set \p OpBundles to contain the
889// funclet information, if any is needed, that should be placed on the generated
890// value profiling call for the value profile candidate call.
891static void
895 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
896 if (!OrigCall)
897 return;
898
899 if (!isa<IntrinsicInst>(OrigCall)) {
900 // The instrumentation call should belong to the same funclet as a
901 // non-intrinsic call, so just copy the operand bundle, if any exists.
902 std::optional<OperandBundleUse> ParentFunclet =
903 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
904 if (ParentFunclet)
905 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
906 } else {
907 // Intrinsics or other instructions do not get funclet information from the
908 // front-end. Need to use the BlockColors that was computed by the routine
909 // colorEHFunclets to determine whether a funclet is needed.
910 if (!BlockColors.empty()) {
911 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
912 assert(CV.size() == 1 && "non-unique color for block!");
913 Instruction *EHPad = CV.front()->getFirstNonPHI();
914 if (EHPad->isEHPad())
915 OpBundles.emplace_back("funclet", EHPad);
916 }
917 }
918}
919
920// Visit all edge and instrument the edges not in MST, and do value profiling.
921// Critical edges will be split.
922void FunctionInstrumenter::instrument() {
923 if (!PGOBlockCoverage) {
924 // Split indirectbr critical edges here before computing the MST rather than
925 // later in getInstrBB() to avoid invalidating it.
926 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
927 }
928
929 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
930 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
931 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
932 InstrumentationType == PGOInstrumentationType::CSFDO,
933 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
935
936 auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
937 auto *const CFGHash =
938 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
939 // Make sure that pointer to global is passed in with zero addrspace
940 // This is relevant during GPU profiling
941 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
942 Name, PointerType::get(M.getContext(), 0));
944 auto &EntryBB = F.getEntryBlock();
945 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
946 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
947 // i32 <index>)
948 Builder.CreateIntrinsic(
949 Intrinsic::instrprof_cover, {},
950 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
951 return;
952 }
953
954 std::vector<BasicBlock *> InstrumentBBs;
955 FuncInfo.getInstrumentBBs(InstrumentBBs);
956 unsigned NumCounters =
957 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
958
959 if (IsCtxProf) {
960 auto *CSIntrinsic =
961 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
962 // We want to count the instrumentable callsites, then instrument them. This
963 // is because the llvm.instrprof.callsite intrinsic has an argument (like
964 // the other instrprof intrinsics) capturing the total number of
965 // instrumented objects (counters, or callsites, in this case). In this
966 // case, we want that value so we can readily pass it to the compiler-rt
967 // APIs that may have to allocate memory based on the nr of callsites.
968 // The traversal logic is the same for both counting and instrumentation,
969 // just needs to be done in succession.
970 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
971 for (auto &BB : F)
972 for (auto &Instr : BB)
973 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
975 continue;
976 Visitor(CS);
977 }
978 };
979 // First, count callsites.
980 uint32_t TotalNumCallsites = 0;
981 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; });
982
983 // Now instrument.
984 uint32_t CallsiteIndex = 0;
985 Visit([&](auto *CB) {
986 IRBuilder<> Builder(CB);
987 Builder.CreateCall(CSIntrinsic,
988 {Name, CFGHash, Builder.getInt32(TotalNumCallsites),
989 Builder.getInt32(CallsiteIndex++),
990 CB->getCalledOperand()});
991 });
992 }
993
994 uint32_t I = 0;
996 NumCounters += PGOBlockCoverage ? 8 : 1;
997 auto &EntryBB = F.getEntryBlock();
998 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
999 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
1000 // i32 <index>)
1001 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp, {},
1002 {NormalizedNamePtr, CFGHash,
1003 Builder.getInt32(NumCounters),
1004 Builder.getInt32(I)});
1005 I += PGOBlockCoverage ? 8 : 1;
1006 }
1007
1008 for (auto *InstrBB : InstrumentBBs) {
1009 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
1010 assert(Builder.GetInsertPoint() != InstrBB->end() &&
1011 "Cannot get the Instrumentation point");
1012 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
1013 // i32 <index>)
1014 Builder.CreateIntrinsic(PGOBlockCoverage ? Intrinsic::instrprof_cover
1015 : Intrinsic::instrprof_increment,
1016 {},
1017 {NormalizedNamePtr, CFGHash,
1018 Builder.getInt32(NumCounters),
1019 Builder.getInt32(I++)});
1020 }
1021
1022 // Now instrument select instructions:
1023 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name,
1024 FuncInfo.FunctionHash);
1025 assert(I == NumCounters);
1026
1027 if (isValueProfilingDisabled())
1028 return;
1029
1030 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1031
1032 // Intrinsic function calls do not have funclet operand bundles needed for
1033 // Windows exception handling attached to them. However, if value profiling is
1034 // inserted for one of these calls, then a funclet value will need to be set
1035 // on the instrumentation call based on the funclet coloring.
1037 if (F.hasPersonalityFn() &&
1038 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1039 BlockColors = colorEHFunclets(F);
1040
1041 // For each VP Kind, walk the VP candidates and instrument each one.
1042 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1043 unsigned SiteIndex = 0;
1044 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1045 continue;
1046
1047 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1048 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1049 << " site: CallSite Index = " << SiteIndex << "\n");
1050
1051 IRBuilder<> Builder(Cand.InsertPt);
1052 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1053 "Cannot get the Instrumentation point");
1054
1055 Value *ToProfile = nullptr;
1056 if (Cand.V->getType()->isIntegerTy())
1057 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1058 else if (Cand.V->getType()->isPointerTy())
1059 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1060 assert(ToProfile && "value profiling Value is of unexpected type");
1061
1062 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1063 Name, PointerType::get(M.getContext(), 0));
1064
1066 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1067 Builder.CreateCall(
1069 Intrinsic::instrprof_value_profile),
1070 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1071 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1072 OpBundles);
1073 }
1074 } // IPVK_First <= Kind <= IPVK_Last
1075}
1076
1077namespace {
1078
1079// This class represents a CFG edge in profile use compilation.
1080struct PGOUseEdge : public PGOEdge {
1081 using PGOEdge::PGOEdge;
1082
1083 std::optional<uint64_t> Count;
1084
1085 // Set edge count value
1086 void setEdgeCount(uint64_t Value) { Count = Value; }
1087
1088 // Return the information string for this object.
1089 std::string infoString() const {
1090 if (!Count)
1091 return PGOEdge::infoString();
1092 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1093 }
1094};
1095
1096using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1097
1098// This class stores the auxiliary information for each BB.
1099struct PGOUseBBInfo : public PGOBBInfo {
1100 std::optional<uint64_t> Count;
1101 int32_t UnknownCountInEdge = 0;
1102 int32_t UnknownCountOutEdge = 0;
1103 DirectEdges InEdges;
1104 DirectEdges OutEdges;
1105
1106 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1107
1108 // Set the profile count value for this BB.
1109 void setBBInfoCount(uint64_t Value) { Count = Value; }
1110
1111 // Return the information string of this object.
1112 std::string infoString() const {
1113 if (!Count)
1114 return PGOBBInfo::infoString();
1115 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1116 }
1117
1118 // Add an OutEdge and update the edge count.
1119 void addOutEdge(PGOUseEdge *E) {
1120 OutEdges.push_back(E);
1121 UnknownCountOutEdge++;
1122 }
1123
1124 // Add an InEdge and update the edge count.
1125 void addInEdge(PGOUseEdge *E) {
1126 InEdges.push_back(E);
1127 UnknownCountInEdge++;
1128 }
1129};
1130
1131} // end anonymous namespace
1132
1133// Sum up the count values for all the edges.
1135 uint64_t Total = 0;
1136 for (const auto &E : Edges) {
1137 if (E->Removed)
1138 continue;
1139 if (E->Count)
1140 Total += *E->Count;
1141 }
1142 return Total;
1143}
1144
1145namespace {
1146
1147class PGOUseFunc {
1148public:
1149 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1150 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1152 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1153 bool InstrumentFuncEntry, bool InstrumentLoopEntries,
1154 bool HasSingleByteCoverage)
1155 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1156 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1157 InstrumentFuncEntry, InstrumentLoopEntries,
1158 HasSingleByteCoverage),
1159 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1160
1161 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1162
1163 // Read counts for the instrumented BB from profile.
1164 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1166
1167 // Populate the counts for all BBs.
1168 void populateCounters();
1169
1170 // Set block coverage based on profile coverage values.
1171 void populateCoverage(IndexedInstrProfReader *PGOReader);
1172
1173 // Set the branch weights based on the count values.
1174 void setBranchWeights();
1175
1176 // Annotate the value profile call sites for all value kind.
1177 void annotateValueSites();
1178
1179 // Annotate the value profile call sites for one value kind.
1180 void annotateValueSites(uint32_t Kind);
1181
1182 // Annotate the irreducible loop header weights.
1183 void annotateIrrLoopHeaderWeights();
1184
1185 // The hotness of the function from the profile count.
1186 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1187
1188 // Return the function hotness from the profile.
1189 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1190
1191 // Return the function hash.
1192 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1193
1194 // Return the profile record for this function;
1195 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1196
1197 // Return the auxiliary BB information.
1198 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1199 return FuncInfo.getBBInfo(BB);
1200 }
1201
1202 // Return the auxiliary BB information if available.
1203 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1204 return FuncInfo.findBBInfo(BB);
1205 }
1206
1207 Function &getFunc() const { return F; }
1208
1209 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1210
1211 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1212
1213private:
1214 Function &F;
1215 Module *M;
1217 ProfileSummaryInfo *PSI;
1218
1219 // This member stores the shared information with class PGOGenFunc.
1220 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1221
1222 // The maximum count value in the profile. This is only used in PGO use
1223 // compilation.
1224 uint64_t ProgramMaxCount;
1225
1226 // Position of counter that remains to be read.
1227 uint32_t CountPosition = 0;
1228
1229 // Total size of the profile count for this function.
1230 uint32_t ProfileCountSize = 0;
1231
1232 // ProfileRecord for this function.
1233 InstrProfRecord ProfileRecord;
1234
1235 // Function hotness info derived from profile.
1236 FuncFreqAttr FreqAttr;
1237
1238 // Is to use the context sensitive profile.
1239 bool IsCS;
1240
1242
1243 // Find the Instrumented BB and set the value. Return false on error.
1244 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1245
1246 // Set the edge counter value for the unknown edge -- there should be only
1247 // one unknown edge.
1248 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1249
1250 // Set the hot/cold inline hints based on the count values.
1251 // FIXME: This function should be removed once the functionality in
1252 // the inliner is implemented.
1253 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1254 if (PSI->isHotCount(EntryCount))
1255 FreqAttr = FFA_Hot;
1256 else if (PSI->isColdCount(MaxCount))
1257 FreqAttr = FFA_Cold;
1258 }
1259};
1260
1261} // end anonymous namespace
1262
1263/// Set up InEdges/OutEdges for all BBs in the MST.
1265 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1266 // This is not required when there is block coverage inference.
1267 if (FuncInfo.BCI)
1268 return;
1269 for (const auto &E : FuncInfo.MST.allEdges()) {
1270 if (E->Removed)
1271 continue;
1272 const BasicBlock *SrcBB = E->SrcBB;
1273 const BasicBlock *DestBB = E->DestBB;
1274 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1275 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1276 SrcInfo.addOutEdge(E.get());
1277 DestInfo.addInEdge(E.get());
1278 }
1279}
1280
1281// Visit all the edges and assign the count value for the instrumented
1282// edges and the BB. Return false on error.
1283bool PGOUseFunc::setInstrumentedCounts(
1284 const std::vector<uint64_t> &CountFromProfile) {
1285
1286 std::vector<BasicBlock *> InstrumentBBs;
1287 FuncInfo.getInstrumentBBs(InstrumentBBs);
1288
1289 setupBBInfoEdges(FuncInfo);
1290
1291 unsigned NumCounters =
1292 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1293 // The number of counters here should match the number of counters
1294 // in profile. Return if they mismatch.
1295 if (NumCounters != CountFromProfile.size()) {
1296 return false;
1297 }
1298 auto *FuncEntry = &*F.begin();
1299
1300 // Set the profile count to the Instrumented BBs.
1301 uint32_t I = 0;
1302 for (BasicBlock *InstrBB : InstrumentBBs) {
1303 uint64_t CountValue = CountFromProfile[I++];
1304 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1305 // If we reach here, we know that we have some nonzero count
1306 // values in this function. The entry count should not be 0.
1307 // Fix it if necessary.
1308 if (InstrBB == FuncEntry && CountValue == 0)
1309 CountValue = 1;
1310 Info.setBBInfoCount(CountValue);
1311 }
1312 ProfileCountSize = CountFromProfile.size();
1313 CountPosition = I;
1314
1315 // Set the edge count and update the count of unknown edges for BBs.
1316 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1317 E->setEdgeCount(Value);
1318 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1319 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1320 };
1321
1322 // Set the profile count the Instrumented edges. There are BBs that not in
1323 // MST but not instrumented. Need to set the edge count value so that we can
1324 // populate the profile counts later.
1325 for (const auto &E : FuncInfo.MST.allEdges()) {
1326 if (E->Removed || E->InMST)
1327 continue;
1328 const BasicBlock *SrcBB = E->SrcBB;
1329 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1330
1331 // If only one out-edge, the edge profile count should be the same as BB
1332 // profile count.
1333 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1334 setEdgeCount(E.get(), *SrcInfo.Count);
1335 else {
1336 const BasicBlock *DestBB = E->DestBB;
1337 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1338 // If only one in-edge, the edge profile count should be the same as BB
1339 // profile count.
1340 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1341 setEdgeCount(E.get(), *DestInfo.Count);
1342 }
1343 if (E->Count)
1344 continue;
1345 // E's count should have been set from profile. If not, this meenas E skips
1346 // the instrumentation. We set the count to 0.
1347 setEdgeCount(E.get(), 0);
1348 }
1349 return true;
1350}
1351
1352// Set the count value for the unknown edge. There should be one and only one
1353// unknown edge in Edges vector.
1354void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1355 for (auto &E : Edges) {
1356 if (E->Count)
1357 continue;
1358 E->setEdgeCount(Value);
1359
1360 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1361 getBBInfo(E->DestBB).UnknownCountInEdge--;
1362 return;
1363 }
1364 llvm_unreachable("Cannot find the unknown count edge");
1365}
1366
1367// Emit function metadata indicating PGO profile mismatch.
1369 const char MetadataName[] = "instr_prof_hash_mismatch";
1371 // If this metadata already exists, ignore.
1372 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1373 if (Existing) {
1374 MDTuple *Tuple = cast<MDTuple>(Existing);
1375 for (const auto &N : Tuple->operands()) {
1376 if (N.equalsStr(MetadataName))
1377 return;
1378 Names.push_back(N.get());
1379 }
1380 }
1381
1382 MDBuilder MDB(ctx);
1383 Names.push_back(MDB.createString(MetadataName));
1384 MDNode *MD = MDTuple::get(ctx, Names);
1385 F.setMetadata(LLVMContext::MD_annotation, MD);
1386}
1387
1388void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1389 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1390 auto &Ctx = M->getContext();
1391 auto Err = IPE.get();
1392 bool SkipWarning = false;
1393 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1394 << FuncInfo.FuncName << ": ");
1395 if (Err == instrprof_error::unknown_function) {
1396 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1397 SkipWarning = !PGOWarnMissing;
1398 LLVM_DEBUG(dbgs() << "unknown function");
1399 } else if (Err == instrprof_error::hash_mismatch ||
1400 Err == instrprof_error::malformed) {
1401 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1402 SkipWarning =
1405 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1407 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1408 << " skip=" << SkipWarning << ")");
1409 // Emit function metadata indicating PGO profile mismatch.
1410 annotateFunctionWithHashMismatch(F, M->getContext());
1411 }
1412
1413 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1414 if (SkipWarning)
1415 return;
1416
1417 std::string Msg =
1418 IPE.message() + std::string(" ") + F.getName().str() +
1419 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1420 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1421 std::string(" count discarded");
1422
1423 Ctx.diagnose(
1424 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1425 });
1426}
1427
1428// Read the profile from ProfileFileName and assign the value to the
1429// instrumented BB and the edges. This function also updates ProgramMaxCount.
1430// Return true if the profile are successfully read, and false on errors.
1431bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1433 auto &Ctx = M->getContext();
1434 uint64_t MismatchedFuncSum = 0;
1436 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1437 &MismatchedFuncSum);
1438 if (Error E = Result.takeError()) {
1439 handleInstrProfError(std::move(E), MismatchedFuncSum);
1440 return false;
1441 }
1442 ProfileRecord = std::move(Result.get());
1443 PseudoKind = ProfileRecord.getCountPseudoKind();
1444 if (PseudoKind != InstrProfRecord::NotPseudo) {
1445 return true;
1446 }
1447 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1448
1449 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1450 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1451
1452 uint64_t ValueSum = 0;
1453 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1454 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1455 ValueSum += CountFromProfile[I];
1456 }
1457 AllZeros = (ValueSum == 0);
1458
1459 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1460
1461 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1462 getBBInfo(nullptr).UnknownCountInEdge = 2;
1463
1464 if (!setInstrumentedCounts(CountFromProfile)) {
1465 LLVM_DEBUG(
1466 dbgs() << "Inconsistent number of counts, skipping this function");
1467 Ctx.diagnose(DiagnosticInfoPGOProfile(
1468 M->getName().data(),
1469 Twine("Inconsistent number of counts in ") + F.getName().str() +
1470 Twine(": the profile may be stale or there is a function name "
1471 "collision."),
1472 DS_Warning));
1473 return false;
1474 }
1475 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1476 return true;
1477}
1478
1479void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1480 uint64_t MismatchedFuncSum = 0;
1482 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1483 &MismatchedFuncSum);
1484 if (auto Err = Result.takeError()) {
1485 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1486 return;
1487 }
1488 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1489
1490 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1492 unsigned Index = 0;
1493 for (auto &BB : F)
1494 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1495 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1496 assert(Index == CountsFromProfile.size());
1497
1498 // For each B in InverseDependencies[A], if A is covered then B is covered.
1500 InverseDependencies;
1501 for (auto &BB : F) {
1502 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1503 // If Dep is covered then BB is covered.
1504 InverseDependencies[Dep].insert(&BB);
1505 }
1506 }
1507
1508 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1509 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1510 for (auto &[BB, IsCovered] : Coverage)
1511 if (IsCovered)
1512 CoveredBlocksToProcess.push(BB);
1513
1514 while (!CoveredBlocksToProcess.empty()) {
1515 auto *CoveredBlock = CoveredBlocksToProcess.top();
1516 assert(Coverage[CoveredBlock]);
1517 CoveredBlocksToProcess.pop();
1518 for (auto *BB : InverseDependencies[CoveredBlock]) {
1519 // If CoveredBlock is covered then BB is covered.
1520 if (Coverage[BB])
1521 continue;
1522 Coverage[BB] = true;
1523 CoveredBlocksToProcess.push(BB);
1524 }
1525 }
1526
1527 // Annotate block coverage.
1528 MDBuilder MDB(F.getContext());
1529 // We set the entry count to 10000 if the entry block is covered so that BFI
1530 // can propagate a fraction of this count to the other covered blocks.
1531 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1532 for (auto &BB : F) {
1533 // For a block A and its successor B, we set the edge weight as follows:
1534 // If A is covered and B is covered, set weight=1.
1535 // If A is covered and B is uncovered, set weight=0.
1536 // If A is uncovered, set weight=1.
1537 // This setup will allow BFI to give nonzero profile counts to only covered
1538 // blocks.
1540 for (auto *Succ : successors(&BB))
1541 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1542 if (Weights.size() >= 2)
1543 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1544 /*IsExpected=*/false);
1545 }
1546
1547 unsigned NumCorruptCoverage = 0;
1548 DominatorTree DT(F);
1549 LoopInfo LI(DT);
1550 BranchProbabilityInfo BPI(F, LI);
1551 BlockFrequencyInfo BFI(F, BPI, LI);
1552 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1553 if (auto C = BFI.getBlockProfileCount(&BB))
1554 return C == 0;
1555 return {};
1556 };
1557 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1558 for (auto &BB : F) {
1559 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1560 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1561 << "\n");
1562 // In some cases it is possible to find a covered block that has no covered
1563 // successors, e.g., when a block calls a function that may call exit(). In
1564 // those cases, BFI could find its successor to be covered while BCI could
1565 // find its successor to be dead.
1566 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1567 LLVM_DEBUG(
1568 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1569 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1570 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1571 ++NumCorruptCoverage;
1572 }
1573 if (Coverage[&BB])
1574 ++NumCoveredBlocks;
1575 }
1576 if (PGOVerifyBFI && NumCorruptCoverage) {
1577 auto &Ctx = M->getContext();
1578 Ctx.diagnose(DiagnosticInfoPGOProfile(
1579 M->getName().data(),
1580 Twine("Found inconsistent block coverage for function ") + F.getName() +
1581 " in " + Twine(NumCorruptCoverage) + " blocks.",
1582 DS_Warning));
1583 }
1585 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1586}
1587
1588// Populate the counters from instrumented BBs to all BBs.
1589// In the end of this operation, all BBs should have a valid count value.
1590void PGOUseFunc::populateCounters() {
1591 bool Changes = true;
1592 unsigned NumPasses = 0;
1593 while (Changes) {
1594 NumPasses++;
1595 Changes = false;
1596
1597 // For efficient traversal, it's better to start from the end as most
1598 // of the instrumented edges are at the end.
1599 for (auto &BB : reverse(F)) {
1600 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1601 if (UseBBInfo == nullptr)
1602 continue;
1603 if (!UseBBInfo->Count) {
1604 if (UseBBInfo->UnknownCountOutEdge == 0) {
1605 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1606 Changes = true;
1607 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1608 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1609 Changes = true;
1610 }
1611 }
1612 if (UseBBInfo->Count) {
1613 if (UseBBInfo->UnknownCountOutEdge == 1) {
1614 uint64_t Total = 0;
1615 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1616 // If the one of the successor block can early terminate (no-return),
1617 // we can end up with situation where out edge sum count is larger as
1618 // the source BB's count is collected by a post-dominated block.
1619 if (*UseBBInfo->Count > OutSum)
1620 Total = *UseBBInfo->Count - OutSum;
1621 setEdgeCount(UseBBInfo->OutEdges, Total);
1622 Changes = true;
1623 }
1624 if (UseBBInfo->UnknownCountInEdge == 1) {
1625 uint64_t Total = 0;
1626 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1627 if (*UseBBInfo->Count > InSum)
1628 Total = *UseBBInfo->Count - InSum;
1629 setEdgeCount(UseBBInfo->InEdges, Total);
1630 Changes = true;
1631 }
1632 }
1633 }
1634 }
1635
1636 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1637 (void)NumPasses;
1638#ifndef NDEBUG
1639 // Assert every BB has a valid counter.
1640 for (auto &BB : F) {
1641 auto BI = findBBInfo(&BB);
1642 if (BI == nullptr)
1643 continue;
1644 assert(BI->Count && "BB count is not valid");
1645 }
1646#endif
1647 // Now annotate select instructions. This may fixup impossible block counts.
1648 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1649 assert(CountPosition == ProfileCountSize);
1650
1651 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1652 uint64_t FuncMaxCount = FuncEntryCount;
1653 for (auto &BB : F) {
1654 auto BI = findBBInfo(&BB);
1655 if (BI == nullptr)
1656 continue;
1657 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1658 }
1659
1660 // Fix the obviously inconsistent entry count.
1661 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1662 FuncEntryCount = 1;
1664 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1665
1666 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1667}
1668
1669// Assign the scaled count values to the BB with multiple out edges.
1670void PGOUseFunc::setBranchWeights() {
1671 // Generate MD_prof metadata for every branch instruction.
1672 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1673 << " IsCS=" << IsCS << "\n");
1674 for (auto &BB : F) {
1675 Instruction *TI = BB.getTerminator();
1676 if (TI->getNumSuccessors() < 2)
1677 continue;
1678 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1679 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1680 isa<CallBrInst>(TI)))
1681 continue;
1682
1683 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1684 if (!*BBCountInfo.Count)
1685 continue;
1686
1687 // We have a non-zero Branch BB.
1688
1689 // SuccessorCount can be greater than OutEdgesCount, because
1690 // removed edges don't appear in OutEdges.
1691 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1692 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1693 assert(OutEdgesCount <= SuccessorCount);
1694
1695 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1696 uint64_t MaxCount = 0;
1697 for (unsigned It = 0; It < OutEdgesCount; It++) {
1698 const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1699 const BasicBlock *SrcBB = E->SrcBB;
1700 const BasicBlock *DestBB = E->DestBB;
1701 if (DestBB == nullptr)
1702 continue;
1703 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1704 uint64_t EdgeCount = *E->Count;
1705 if (EdgeCount > MaxCount)
1706 MaxCount = EdgeCount;
1707 EdgeCounts[SuccNum] = EdgeCount;
1708 }
1709
1710 if (MaxCount)
1711 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1712 else {
1713 // A zero MaxCount can come about when we have a BB with a positive
1714 // count, and whose successor blocks all have 0 count. This can happen
1715 // when there is no exit block and the code exits via a noreturn function.
1716 auto &Ctx = M->getContext();
1717 Ctx.diagnose(DiagnosticInfoPGOProfile(
1718 M->getName().data(),
1719 Twine("Profile in ") + F.getName().str() +
1720 Twine(" partially ignored") +
1721 Twine(", possibly due to the lack of a return path."),
1722 DS_Warning));
1723 }
1724 }
1725}
1726
1728 for (BasicBlock *Pred : predecessors(BB)) {
1729 if (isa<IndirectBrInst>(Pred->getTerminator()))
1730 return true;
1731 }
1732 return false;
1733}
1734
1735void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1736 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1737 // Find irr loop headers
1738 for (auto &BB : F) {
1739 // As a heuristic also annotate indrectbr targets as they have a high chance
1740 // to become an irreducible loop header after the indirectbr tail
1741 // duplication.
1742 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1743 Instruction *TI = BB.getTerminator();
1744 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1745 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1746 }
1747 }
1748}
1749
1750void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1751 Module *M = F.getParent();
1752 IRBuilder<> Builder(&SI);
1753 Type *Int64Ty = Builder.getInt64Ty();
1754 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1755 auto *NormalizedFuncNameVarPtr =
1757 FuncNameVar, PointerType::get(M->getContext(), 0));
1758 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step, {},
1759 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1760 Builder.getInt32(TotalNumCtrs),
1761 Builder.getInt32(*CurCtrIdx), Step});
1762 ++(*CurCtrIdx);
1763}
1764
1765void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1766 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1767 assert(*CurCtrIdx < CountFromProfile.size() &&
1768 "Out of bound access of counters");
1769 uint64_t SCounts[2];
1770 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1771 ++(*CurCtrIdx);
1772 uint64_t TotalCount = 0;
1773 auto BI = UseFunc->findBBInfo(SI.getParent());
1774 if (BI != nullptr) {
1775 TotalCount = *BI->Count;
1776
1777 // Fix the block count if it is impossible.
1778 if (TotalCount < SCounts[0])
1779 BI->Count = SCounts[0];
1780 }
1781 // False Count
1782 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1783 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1784 if (MaxCount)
1785 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1786}
1787
1788void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1789 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1790 return;
1791 // FIXME: do not handle this yet.
1792 if (SI.getCondition()->getType()->isVectorTy())
1793 return;
1794
1795 switch (Mode) {
1796 case VM_counting:
1797 NSIs++;
1798 return;
1799 case VM_instrument:
1800 instrumentOneSelectInst(SI);
1801 return;
1802 case VM_annotate:
1803 annotateOneSelectInst(SI);
1804 return;
1805 }
1806
1807 llvm_unreachable("Unknown visiting mode");
1808}
1809
1811 if (ValueProfKind == IPVK_MemOPSize)
1813 if (ValueProfKind == llvm::IPVK_VTableTarget)
1815 return MaxNumAnnotations;
1816}
1817
1818// Traverse all valuesites and annotate the instructions for all value kind.
1819void PGOUseFunc::annotateValueSites() {
1821 return;
1822
1823 // Create the PGOFuncName meta data.
1824 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1825
1826 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1827 annotateValueSites(Kind);
1828}
1829
1830// Annotate the instructions for a specific value kind.
1831void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1832 assert(Kind <= IPVK_Last);
1833 unsigned ValueSiteIndex = 0;
1834
1835 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1836
1837 // Since there isn't a reliable or fast way for profile reader to tell if a
1838 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1839 // value profile collector over the function IR to find the instrumented sites
1840 // iff function profile records shows the number of instrumented vtable sites
1841 // is not zero. Function cfg already takes the number of instrumented
1842 // indirect call sites into account so it doesn't hash the number of
1843 // instrumented vtables; as a side effect it makes it easier to enable
1844 // profiling and profile use in two steps if needed.
1845 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1846 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1847 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1849 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1850 auto &ValueSites = FuncInfo.ValueSites[Kind];
1851 if (NumValueSites != ValueSites.size()) {
1852 auto &Ctx = M->getContext();
1853 Ctx.diagnose(DiagnosticInfoPGOProfile(
1854 M->getName().data(),
1855 Twine("Inconsistent number of value sites for ") +
1856 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1857 F.getName().str() +
1858 Twine("\", possibly due to the use of a stale profile."),
1859 DS_Warning));
1860 return;
1861 }
1862
1863 for (VPCandidateInfo &I : ValueSites) {
1864 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1865 << "): Index = " << ValueSiteIndex << " out of "
1866 << NumValueSites << "\n");
1868 *M, *I.AnnotatedInst, ProfileRecord,
1869 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1870 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1871 ValueSiteIndex++;
1872 }
1873}
1874
1875// Collect the set of members for each Comdat in module M and store
1876// in ComdatMembers.
1878 Module &M,
1879 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1880 if (!DoComdatRenaming)
1881 return;
1882 for (Function &F : M)
1883 if (Comdat *C = F.getComdat())
1884 ComdatMembers.insert(std::make_pair(C, &F));
1885 for (GlobalVariable &GV : M.globals())
1886 if (Comdat *C = GV.getComdat())
1887 ComdatMembers.insert(std::make_pair(C, &GV));
1888 for (GlobalAlias &GA : M.aliases())
1889 if (Comdat *C = GA.getComdat())
1890 ComdatMembers.insert(std::make_pair(C, &GA));
1891}
1892
1893// Return true if we should not find instrumentation data for this function
1894static bool skipPGOUse(const Function &F) {
1895 if (F.isDeclaration())
1896 return true;
1897 // If there are too many critical edges, PGO might cause
1898 // compiler time problem. Skip PGO if the number of
1899 // critical edges execeed the threshold.
1900 unsigned NumCriticalEdges = 0;
1901 for (auto &BB : F) {
1902 const Instruction *TI = BB.getTerminator();
1903 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1904 if (isCriticalEdge(TI, I))
1905 NumCriticalEdges++;
1906 }
1907 }
1908 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1909 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1910 << ", NumCriticalEdges=" << NumCriticalEdges
1911 << " exceed the threshold. Skip PGO.\n");
1912 return true;
1913 }
1914 return false;
1915}
1916
1917// Return true if we should not instrument this function
1918static bool skipPGOGen(const Function &F) {
1919 if (skipPGOUse(F))
1920 return true;
1921 if (F.hasFnAttribute(llvm::Attribute::Naked))
1922 return true;
1923 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1924 return true;
1925 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1926 return true;
1927 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1928 return true;
1930 if (auto EntryCount = F.getEntryCount())
1931 return EntryCount->getCount() > PGOColdInstrumentEntryThreshold;
1932 return !PGOTreatUnknownAsCold;
1933 }
1934 return false;
1935}
1936
1938 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1941 function_ref<LoopInfo *(Function &)> LookupLI,
1942 PGOInstrumentationType InstrumentationType) {
1943 // For the context-sensitve instrumentation, we should have a separated pass
1944 // (before LTO/ThinLTO linking) to create these variables.
1945 if (InstrumentationType == PGOInstrumentationType::FDO)
1946 createIRLevelProfileFlagVar(M, InstrumentationType);
1947
1948 Triple TT(M.getTargetTriple());
1949 LLVMContext &Ctx = M.getContext();
1950 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1952 M.getName().data(),
1953 Twine("VTable value profiling is presently not "
1954 "supported for non-ELF object formats"),
1955 DS_Warning));
1956 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1957 collectComdatMembers(M, ComdatMembers);
1958
1959 for (auto &F : M) {
1960 if (skipPGOGen(F))
1961 continue;
1962 TargetLibraryInfo &TLI = LookupTLI(F);
1963 BranchProbabilityInfo *BPI = LookupBPI(F);
1964 BlockFrequencyInfo *BFI = LookupBFI(F);
1965 LoopInfo *LI = LookupLI(F);
1966 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
1967 InstrumentationType);
1968 FI.instrument();
1969 }
1970 return true;
1971}
1972
1975 createProfileFileNameVar(M, CSInstrName);
1976 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1977 // will be retained.
1980 if (ProfileSampling)
1985 return PA;
1986}
1987
1990 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1991 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1993 };
1994 auto LookupBPI = [&FAM](Function &F) {
1996 };
1997 auto LookupBFI = [&FAM](Function &F) {
1999 };
2000 auto LookupLI = [&FAM](Function &F) {
2001 return &FAM.getResult<LoopAnalysis>(F);
2002 };
2003
2004 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
2005 InstrumentationType))
2006 return PreservedAnalyses::all();
2007
2008 return PreservedAnalyses::none();
2009}
2010
2011// Using the ratio b/w sums of profile count values and BFI count values to
2012// adjust the func entry count.
2013static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
2014 BranchProbabilityInfo &NBPI) {
2015 Function &F = Func.getFunc();
2016 BlockFrequencyInfo NBFI(F, NBPI, LI);
2017#ifndef NDEBUG
2018 auto BFIEntryCount = F.getEntryCount();
2019 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
2020 "Invalid BFI Entrycount");
2021#endif
2022 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
2023 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
2024 for (auto &BBI : F) {
2025 uint64_t CountValue = 0;
2026 uint64_t BFICountValue = 0;
2027 if (!Func.findBBInfo(&BBI))
2028 continue;
2029 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2030 CountValue = *Func.getBBInfo(&BBI).Count;
2031 BFICountValue = *BFICount;
2032 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
2033 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
2034 }
2035 if (SumCount.isZero())
2036 return;
2037
2038 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
2039 "Incorrect sum of BFI counts");
2040 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
2041 return;
2042 double Scale = (SumCount / SumBFICount).convertToDouble();
2043 if (Scale < 1.001 && Scale > 0.999)
2044 return;
2045
2046 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
2047 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
2048 if (NewEntryCount == 0)
2049 NewEntryCount = 1;
2050 if (NewEntryCount != FuncEntryCount) {
2051 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
2052 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2053 << ", entry_count " << FuncEntryCount << " --> "
2054 << NewEntryCount << "\n");
2055 }
2056}
2057
2058// Compare the profile count values with BFI count values, and print out
2059// the non-matching ones.
2060static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2062 uint64_t HotCountThreshold,
2064 Function &F = Func.getFunc();
2065 BlockFrequencyInfo NBFI(F, NBPI, LI);
2066 // bool PrintFunc = false;
2067 bool HotBBOnly = PGOVerifyHotBFI;
2068 StringRef Msg;
2070
2071 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2072 for (auto &BBI : F) {
2073 uint64_t CountValue = 0;
2074 uint64_t BFICountValue = 0;
2075
2076 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
2077
2078 BBNum++;
2079 if (CountValue)
2080 NonZeroBBNum++;
2081 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2082 if (BFICount)
2083 BFICountValue = *BFICount;
2084
2085 if (HotBBOnly) {
2086 bool rawIsHot = CountValue >= HotCountThreshold;
2087 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2088 bool rawIsCold = CountValue <= ColdCountThreshold;
2089 bool ShowCount = false;
2090 if (rawIsHot && !BFIIsHot) {
2091 Msg = "raw-Hot to BFI-nonHot";
2092 ShowCount = true;
2093 } else if (rawIsCold && BFIIsHot) {
2094 Msg = "raw-Cold to BFI-Hot";
2095 ShowCount = true;
2096 }
2097 if (!ShowCount)
2098 continue;
2099 } else {
2100 if ((CountValue < PGOVerifyBFICutoff) &&
2101 (BFICountValue < PGOVerifyBFICutoff))
2102 continue;
2103 uint64_t Diff = (BFICountValue >= CountValue)
2104 ? BFICountValue - CountValue
2105 : CountValue - BFICountValue;
2106 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2107 continue;
2108 }
2109 BBMisMatchNum++;
2110
2111 ORE.emit([&]() {
2113 F.getSubprogram(), &BBI);
2114 Remark << "BB " << ore::NV("Block", BBI.getName())
2115 << " Count=" << ore::NV("Count", CountValue)
2116 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2117 if (!Msg.empty())
2118 Remark << " (" << Msg << ")";
2119 return Remark;
2120 });
2121 }
2122 if (BBMisMatchNum)
2123 ORE.emit([&]() {
2124 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2125 F.getSubprogram(), &F.getEntryBlock())
2126 << "In Func " << ore::NV("Function", F.getName())
2127 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2128 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2129 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2130 });
2131}
2132
2134 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2135 vfs::FileSystem &FS,
2136 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2139 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2140 bool IsCS) {
2141 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2142 auto &Ctx = M.getContext();
2143 // Read the counter array from file.
2144 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2145 ProfileRemappingFileName);
2146 if (Error E = ReaderOrErr.takeError()) {
2147 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2148 Ctx.diagnose(
2149 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2150 });
2151 return false;
2152 }
2153
2154 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2155 std::move(ReaderOrErr.get());
2156 if (!PGOReader) {
2157 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2158 StringRef("Cannot get PGOReader")));
2159 return false;
2160 }
2161 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2162 return false;
2163
2164 // TODO: might need to change the warning once the clang option is finalized.
2165 if (!PGOReader->isIRLevelProfile()) {
2166 Ctx.diagnose(DiagnosticInfoPGOProfile(
2167 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2168 return false;
2169 }
2170 if (PGOReader->functionEntryOnly()) {
2171 Ctx.diagnose(DiagnosticInfoPGOProfile(
2172 ProfileFileName.data(),
2173 "Function entry profiles are not yet supported for optimization"));
2174 return false;
2175 }
2176
2178 for (GlobalVariable &G : M.globals()) {
2179 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2180 continue;
2181
2182 // Create the PGOFuncName meta data.
2183 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2184 }
2185 }
2186
2187 // Add the profile summary (read from the header of the indexed summary) here
2188 // so that we can use it below when reading counters (which checks if the
2189 // function should be marked with a cold or inlinehint attribute).
2190 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2193 PSI->refresh();
2194
2195 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2196 collectComdatMembers(M, ComdatMembers);
2197 std::vector<Function *> HotFunctions;
2198 std::vector<Function *> ColdFunctions;
2199
2200 // If the profile marked as always instrument the entry BB, do the
2201 // same. Note this can be overwritten by the internal option in CFGMST.h
2202 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2203 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2204 InstrumentFuncEntry = PGOInstrumentEntry;
2205 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2206 if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
2207 InstrumentLoopEntries = PGOInstrumentLoopEntries;
2208
2209 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2210 for (auto &F : M) {
2211 if (skipPGOUse(F))
2212 continue;
2213 TargetLibraryInfo &TLI = LookupTLI(F);
2214 BranchProbabilityInfo *BPI = LookupBPI(F);
2215 BlockFrequencyInfo *BFI = LookupBFI(F);
2216 LoopInfo *LI = LookupLI(F);
2217 if (!HasSingleByteCoverage) {
2218 // Split indirectbr critical edges here before computing the MST rather
2219 // than later in getInstrBB() to avoid invalidating it.
2220 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2221 BFI);
2222 }
2223 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2224 InstrumentFuncEntry, InstrumentLoopEntries,
2225 HasSingleByteCoverage);
2226 if (HasSingleByteCoverage) {
2227 Func.populateCoverage(PGOReader.get());
2228 continue;
2229 }
2230 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2231 // it means the profile for the function is unrepresentative and this
2232 // function is actually hot / warm. We will reset the function hot / cold
2233 // attribute and drop all the profile counters.
2235 bool AllZeros = false;
2236 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2237 continue;
2238 if (AllZeros) {
2239 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2240 if (Func.getProgramMaxCount() != 0)
2241 ColdFunctions.push_back(&F);
2242 continue;
2243 }
2244 if (PseudoKind != InstrProfRecord::NotPseudo) {
2245 // Clear function attribute cold.
2246 if (F.hasFnAttribute(Attribute::Cold))
2247 F.removeFnAttr(Attribute::Cold);
2248 // Set function attribute as hot.
2249 if (PseudoKind == InstrProfRecord::PseudoHot)
2250 F.addFnAttr(Attribute::Hot);
2251 continue;
2252 }
2253 Func.populateCounters();
2254 Func.setBranchWeights();
2255 Func.annotateValueSites();
2256 Func.annotateIrrLoopHeaderWeights();
2257 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2258 if (FreqAttr == PGOUseFunc::FFA_Cold)
2259 ColdFunctions.push_back(&F);
2260 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2261 HotFunctions.push_back(&F);
2262 if (PGOViewCounts != PGOVCT_None &&
2263 (ViewBlockFreqFuncName.empty() ||
2264 F.getName() == ViewBlockFreqFuncName)) {
2266 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2267 std::make_unique<BranchProbabilityInfo>(F, LI);
2268 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2269 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2271 NewBFI->view();
2272 else if (PGOViewCounts == PGOVCT_Text) {
2273 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2274 NewBFI->print(dbgs());
2275 }
2276 }
2278 (ViewBlockFreqFuncName.empty() ||
2279 F.getName() == ViewBlockFreqFuncName)) {
2281 if (ViewBlockFreqFuncName.empty())
2282 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2283 else
2284 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2285 else if (PGOViewRawCounts == PGOVCT_Text) {
2286 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2287 Func.dumpInfo();
2288 }
2289 }
2290
2293 BranchProbabilityInfo NBPI(F, LI);
2294
2295 // Fix func entry count.
2296 if (PGOFixEntryCount)
2297 fixFuncEntryCount(Func, LI, NBPI);
2298
2299 // Verify BlockFrequency information.
2300 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2301 if (PGOVerifyHotBFI) {
2302 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2304 }
2305 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2306 }
2307 }
2308
2309 // Set function hotness attribute from the profile.
2310 // We have to apply these attributes at the end because their presence
2311 // can affect the BranchProbabilityInfo of any callers, resulting in an
2312 // inconsistent MST between prof-gen and prof-use.
2313 for (auto &F : HotFunctions) {
2314 F->addFnAttr(Attribute::InlineHint);
2315 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2316 << "\n");
2317 }
2318 for (auto &F : ColdFunctions) {
2319 // Only set when there is no Attribute::Hot set by the user. For Hot
2320 // attribute, user's annotation has the precedence over the profile.
2321 if (F->hasFnAttribute(Attribute::Hot)) {
2322 auto &Ctx = M.getContext();
2323 std::string Msg = std::string("Function ") + F->getName().str() +
2324 std::string(" is annotated as a hot function but"
2325 " the profile is cold");
2326 Ctx.diagnose(
2327 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2328 continue;
2329 }
2330 F->addFnAttr(Attribute::Cold);
2331 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2332 << "\n");
2333 }
2334 return true;
2335}
2336
2338 std::string Filename, std::string RemappingFilename, bool IsCS,
2340 : ProfileFileName(std::move(Filename)),
2341 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2342 FS(std::move(VFS)) {
2343 if (!PGOTestProfileFile.empty())
2344 ProfileFileName = PGOTestProfileFile;
2345 if (!PGOTestProfileRemappingFile.empty())
2346 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2347 if (!FS)
2349}
2350
2353
2354 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2355 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2357 };
2358 auto LookupBPI = [&FAM](Function &F) {
2360 };
2361 auto LookupBFI = [&FAM](Function &F) {
2363 };
2364 auto LookupLI = [&FAM](Function &F) {
2365 return &FAM.getResult<LoopAnalysis>(F);
2366 };
2367
2368 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2369 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2370 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2371 IsCS))
2372 return PreservedAnalyses::all();
2373
2374 return PreservedAnalyses::none();
2375}
2376
2377static std::string getSimpleNodeName(const BasicBlock *Node) {
2378 if (!Node->getName().empty())
2379 return Node->getName().str();
2380
2381 std::string SimpleNodeName;
2382 raw_string_ostream OS(SimpleNodeName);
2383 Node->printAsOperand(OS, false);
2384 return SimpleNodeName;
2385}
2386
2388 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2389 assert(MaxCount > 0 && "Bad max count");
2390 uint64_t Scale = calculateCountScale(MaxCount);
2392 for (const auto &ECI : EdgeCounts)
2393 Weights.push_back(scaleBranchCount(ECI, Scale));
2394
2395 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2396 : Weights) {
2397 dbgs() << W << " ";
2398 } dbgs() << "\n";);
2399
2400 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2401
2402 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2404 std::string BrCondStr = getBranchCondString(TI);
2405 if (BrCondStr.empty())
2406 return;
2407
2408 uint64_t WSum =
2409 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2410 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2411 uint64_t TotalCount =
2412 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2413 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2414 Scale = calculateCountScale(WSum);
2415 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2416 scaleBranchCount(WSum, Scale));
2417 std::string BranchProbStr;
2418 raw_string_ostream OS(BranchProbStr);
2419 OS << BP;
2420 OS << " (total count : " << TotalCount << ")";
2421 Function *F = TI->getParent()->getParent();
2423 ORE.emit([&]() {
2424 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2425 << BrCondStr << " is true with probability : " << BranchProbStr;
2426 });
2427 }
2428}
2429
2430namespace llvm {
2431
2433 MDBuilder MDB(M->getContext());
2434 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2435 MDB.createIrrLoopHeaderWeight(Count));
2436}
2437
2438template <> struct GraphTraits<PGOUseFunc *> {
2439 using NodeRef = const BasicBlock *;
2442
2443 static NodeRef getEntryNode(const PGOUseFunc *G) {
2444 return &G->getFunc().front();
2445 }
2446
2448 return succ_begin(N);
2449 }
2450
2451 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2452
2453 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2454 return nodes_iterator(G->getFunc().begin());
2455 }
2456
2457 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2458 return nodes_iterator(G->getFunc().end());
2459 }
2460};
2461
2462template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2463 explicit DOTGraphTraits(bool isSimple = false)
2465
2466 static std::string getGraphName(const PGOUseFunc *G) {
2467 return std::string(G->getFunc().getName());
2468 }
2469
2470 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2471 std::string Result;
2472 raw_string_ostream OS(Result);
2473
2474 OS << getSimpleNodeName(Node) << ":\\l";
2475 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2476 OS << "Count : ";
2477 if (BI && BI->Count)
2478 OS << *BI->Count << "\\l";
2479 else
2480 OS << "Unknown\\l";
2481
2482 if (!PGOInstrSelect)
2483 return Result;
2484
2485 for (const Instruction &I : *Node) {
2486 if (!isa<SelectInst>(&I))
2487 continue;
2488 // Display scaled counts for SELECT instruction:
2489 OS << "SELECT : { T = ";
2490 uint64_t TC, FC;
2491 bool HasProf = extractBranchWeights(I, TC, FC);
2492 if (!HasProf)
2493 OS << "Unknown, F = Unknown }\\l";
2494 else
2495 OS << TC << ", F = " << FC << " }\\l";
2496 }
2497 return Result;
2498 }
2499};
2500
2501} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::string Name
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static cl::opt< uint64_t > PGOColdInstrumentEntryThreshold("pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, cl::desc("For cold function instrumentation, skip instrumenting functions " "whose entry count is above the given value."))
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static cl::opt< bool > PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), cl::Hidden, cl::desc("Force to instrument loop entries."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType)
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static cl::opt< bool > PGOTreatUnknownAsCold("pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, cl::desc("For cold function instrumentation, treat count unknown(e.g. " "unprofiled) functions as cold."))
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
cl::opt< bool > PGOInstrumentColdFunctionOnly("pgo-instrument-cold-function-only", cl::init(false), cl::Hidden, cl::desc("Enable cold function only instrumentation."))
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:1076
Class for arbitrary precision integers.
Definition: APInt.h:78
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
iterator begin() const
Definition: ArrayRef.h:156
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:40
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:304
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:341
size_t bbInfoSize() const
Definition: CFGMST.h:349
size_t numEdges() const
Definition: CFGMST.h:347
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:359
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:352
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:285
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2268
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:214
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:403
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:292
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:557
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
static bool canInstrumentCallsite(const CallBase &CB)
instrprof_error get() const
Definition: InstrProf.h:419
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:255
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:833
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:342
Metadata node.
Definition: Metadata.h:1069
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
Tuple of metadata.
Definition: Metadata.h:1473
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1500
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:228
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:379
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1427
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:368
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
Definition: InstrProf.cpp:1431
PGOInstrumentationType
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:395
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:482
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1301
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:57
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:275
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1483
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1506
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:297
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:266
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:836
std::vector< uint64_t > Counts
Definition: InstrProf.h:837
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:934
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1036
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1017