LLVM 20.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
116#include <algorithm>
117#include <cassert>
118#include <cstdint>
119#include <memory>
120#include <numeric>
121#include <optional>
122#include <stack>
123#include <string>
124#include <unordered_map>
125#include <utility>
126#include <vector>
127
128using namespace llvm;
131
132#define DEBUG_TYPE "pgo-instrumentation"
133
134STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
135STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
136STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
137STATISTIC(NumOfPGOEdge, "Number of edges.");
138STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
139STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
140STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
141STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
142STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
143STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
144STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
145STATISTIC(NumOfCSPGOSelectInsts,
146 "Number of select instruction instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOMemIntrinsics,
148 "Number of mem intrinsics instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
150STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
151STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
152STATISTIC(NumOfCSPGOFunc,
153 "Number of functions having valid profile counts in CSPGO.");
154STATISTIC(NumOfCSPGOMismatch,
155 "Number of functions having mismatch profile in CSPGO.");
156STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
157STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
158
159// Command line option to specify the file to read profile from. This is
160// mainly used for testing.
162 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
163 cl::value_desc("filename"),
164 cl::desc("Specify the path of profile data file. This is"
165 "mainly for test purpose."));
167 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
168 cl::value_desc("filename"),
169 cl::desc("Specify the path of profile remapping file. This is mainly for "
170 "test purpose."));
171
172// Command line option to disable value profiling. The default is false:
173// i.e. value profiling is enabled by default. This is for debug purpose.
174static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
176 cl::desc("Disable Value Profiling"));
177
178// Command line option to set the maximum number of VP annotations to write to
179// the metadata for a single indirect call callsite.
181 "icp-max-annotations", cl::init(3), cl::Hidden,
182 cl::desc("Max number of annotations for a single indirect "
183 "call callsite"));
184
185// Command line option to set the maximum number of value annotations
186// to write to the metadata for a single memop intrinsic.
188 "memop-max-annotations", cl::init(4), cl::Hidden,
189 cl::desc("Max number of preicise value annotations for a single memop"
190 "intrinsic"));
191
192// Command line option to control appending FunctionHash to the name of a COMDAT
193// function. This is to avoid the hash mismatch caused by the preinliner.
195 "do-comdat-renaming", cl::init(false), cl::Hidden,
196 cl::desc("Append function hash to the name of COMDAT function to avoid "
197 "function hash mismatch due to the preinliner"));
198
199namespace llvm {
200// Command line option to enable/disable the warning about missing profile
201// information.
202cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
204 cl::desc("Use this option to turn on/off "
205 "warnings about missing profile data for "
206 "functions."));
207
208// Command line option to enable/disable the warning about a hash mismatch in
209// the profile data.
211 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
212 cl::desc("Use this option to turn off/on "
213 "warnings about profile cfg mismatch."));
214
215// Command line option to enable/disable the warning about a hash mismatch in
216// the profile data for Comdat functions, which often turns out to be false
217// positive due to the pre-instrumentation inline.
219 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
220 cl::desc("The option is used to turn on/off "
221 "warnings about hash mismatch for comdat "
222 "or weak functions."));
223} // namespace llvm
224
225// Command line option to enable/disable select instruction instrumentation.
226static cl::opt<bool>
227 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
228 cl::desc("Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
230
231// Command line option to turn on CFG dot or text dump of raw profile counts
233 "pgo-view-raw-counts", cl::Hidden,
234 cl::desc("A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
240 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
241 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
242 clEnumValN(PGOVCT_Text, "text", "show in text.")));
243
244// Command line option to enable/disable memop intrinsic call.size profiling.
245static cl::opt<bool>
246 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
247 cl::desc("Use this option to turn on/off "
248 "memory intrinsic size profiling."));
249
250// Emit branch probability as optimization remarks.
251static cl::opt<bool>
252 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
253 cl::desc("When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
257
259 "pgo-instrument-entry", cl::init(false), cl::Hidden,
260 cl::desc("Force to instrument function entry basicblock."));
261
263 "pgo-function-entry-coverage", cl::Hidden,
264 cl::desc(
265 "Use this option to enable function entry coverage instrumentation."));
266
268 "pgo-block-coverage",
269 cl::desc("Use this option to enable basic block coverage instrumentation"));
270
271static cl::opt<bool>
272 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
273 cl::desc("Create a dot file of CFGs with block "
274 "coverage inference information"));
275
277 "pgo-temporal-instrumentation",
278 cl::desc("Use this option to enable temporal instrumentation"));
279
280static cl::opt<bool>
281 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
282 cl::desc("Fix function entry count in profile use."));
283
285 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
286 cl::desc("Print out the non-match BFI count if a hot raw profile count "
287 "becomes non-hot, or a cold raw profile count becomes hot. "
288 "The print is enabled under -Rpass-analysis=pgo, or "
289 "internal option -pass-remakrs-analysis=pgo."));
290
292 "pgo-verify-bfi", cl::init(false), cl::Hidden,
293 cl::desc("Print out mismatched BFI counts after setting profile metadata "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remakrs-analysis=pgo."));
296
298 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
299 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
300 "mismatched BFI if the difference percentage is greater than "
301 "this value (in percentage)."));
302
304 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
305 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
306 "profile count value is below."));
307
309 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
310 cl::value_desc("function name"),
311 cl::desc("Trace the hash of the function with this name."));
312
314 "pgo-function-size-threshold", cl::Hidden,
315 cl::desc("Do not instrument functions smaller than this threshold."));
316
318 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
319 cl::desc("Do not instrument functions with the number of critical edges "
320 " greater than this threshold."));
321
323
324namespace llvm {
325// Command line option to turn on CFG dot dump after profile annotation.
326// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
328
329// Command line option to specify the name of the function for CFG dump
330// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
332
333// Command line option to enable vtable value profiling. Defined in
334// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
338} // namespace llvm
339
340namespace {
341class FunctionInstrumenter final {
342 Module &M;
343 Function &F;
345 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
346 BranchProbabilityInfo *const BPI;
347 BlockFrequencyInfo *const BFI;
348
349 const PGOInstrumentationType InstrumentationType;
350
351 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
352 // Ctx profiling implicitly captures indirect call cases, but not other
353 // values. Supporting other values is relatively straight-forward - just
354 // another counter range within the context.
355 bool isValueProfilingDisabled() const {
356 return DisableValueProfiling ||
357 InstrumentationType == PGOInstrumentationType::CTXPROF;
358 }
359
360 bool shouldInstrumentEntryBB() const {
361 return PGOInstrumentEntry ||
362 InstrumentationType == PGOInstrumentationType::CTXPROF;
363 }
364
365public:
366 FunctionInstrumenter(
368 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
369 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
370 PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
371 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
372 InstrumentationType(InstrumentationType) {}
373
374 void instrument();
375};
376} // namespace
377
378// Return a string describing the branch condition that can be
379// used in static branch probability heuristics:
380static std::string getBranchCondString(Instruction *TI) {
381 BranchInst *BI = dyn_cast<BranchInst>(TI);
382 if (!BI || !BI->isConditional())
383 return std::string();
384
385 Value *Cond = BI->getCondition();
386 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
387 if (!CI)
388 return std::string();
389
390 std::string result;
391 raw_string_ostream OS(result);
392 OS << CI->getPredicate() << "_";
393 CI->getOperand(0)->getType()->print(OS, true);
394
395 Value *RHS = CI->getOperand(1);
396 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
397 if (CV) {
398 if (CV->isZero())
399 OS << "_Zero";
400 else if (CV->isOne())
401 OS << "_One";
402 else if (CV->isMinusOne())
403 OS << "_MinusOne";
404 else
405 OS << "_Const";
406 }
407 OS.flush();
408 return result;
409}
410
411static const char *ValueProfKindDescr[] = {
412#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
414};
415
416// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
417// aware this is an ir_level profile so it can set the version flag.
418static GlobalVariable *
420 PGOInstrumentationType InstrumentationType) {
421 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
422 Type *IntTy64 = Type::getInt64Ty(M.getContext());
423 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
424 if (InstrumentationType == PGOInstrumentationType::CSFDO)
425 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
426 if (PGOInstrumentEntry ||
427 InstrumentationType == PGOInstrumentationType::CTXPROF)
428 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
430 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
432 ProfileVersion |=
433 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
435 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
437 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
438 auto IRLevelVersionVariable = new GlobalVariable(
439 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
440 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
441 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
442 Triple TT(M.getTargetTriple());
443 if (TT.supportsCOMDAT()) {
444 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
445 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
446 }
447 return IRLevelVersionVariable;
448}
449
450namespace {
451
452/// The select instruction visitor plays three roles specified
453/// by the mode. In \c VM_counting mode, it simply counts the number of
454/// select instructions. In \c VM_instrument mode, it inserts code to count
455/// the number times TrueValue of select is taken. In \c VM_annotate mode,
456/// it reads the profile data and annotate the select instruction with metadata.
457enum VisitMode { VM_counting, VM_instrument, VM_annotate };
458class PGOUseFunc;
459
460/// Instruction Visitor class to visit select instructions.
461struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
462 Function &F;
463 unsigned NSIs = 0; // Number of select instructions instrumented.
464 VisitMode Mode = VM_counting; // Visiting mode.
465 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
466 unsigned TotalNumCtrs = 0; // Total number of counters
467 GlobalVariable *FuncNameVar = nullptr;
468 uint64_t FuncHash = 0;
469 PGOUseFunc *UseFunc = nullptr;
470 bool HasSingleByteCoverage;
471
472 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
473 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
474
475 void countSelects() {
476 NSIs = 0;
477 Mode = VM_counting;
478 visit(F);
479 }
480
481 // Visit the IR stream and instrument all select instructions. \p
482 // Ind is a pointer to the counter index variable; \p TotalNC
483 // is the total number of counters; \p FNV is the pointer to the
484 // PGO function name var; \p FHash is the function hash.
485 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
486 uint64_t FHash) {
487 Mode = VM_instrument;
488 CurCtrIdx = Ind;
489 TotalNumCtrs = TotalNC;
490 FuncHash = FHash;
491 FuncNameVar = FNV;
492 visit(F);
493 }
494
495 // Visit the IR stream and annotate all select instructions.
496 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
497 Mode = VM_annotate;
498 UseFunc = UF;
499 CurCtrIdx = Ind;
500 visit(F);
501 }
502
503 void instrumentOneSelectInst(SelectInst &SI);
504 void annotateOneSelectInst(SelectInst &SI);
505
506 // Visit \p SI instruction and perform tasks according to visit mode.
507 void visitSelectInst(SelectInst &SI);
508
509 // Return the number of select instructions. This needs be called after
510 // countSelects().
511 unsigned getNumOfSelectInsts() const { return NSIs; }
512};
513
514/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
515/// based instrumentation.
516/// Note that the CFG can be a multi-graph. So there might be multiple edges
517/// with the same SrcBB and DestBB.
518struct PGOEdge {
519 BasicBlock *SrcBB;
520 BasicBlock *DestBB;
521 uint64_t Weight;
522 bool InMST = false;
523 bool Removed = false;
524 bool IsCritical = false;
525
526 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
527 : SrcBB(Src), DestBB(Dest), Weight(W) {}
528
529 /// Return the information string of an edge.
530 std::string infoString() const {
531 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
532 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
533 .str();
534 }
535};
536
537/// This class stores the auxiliary information for each BB in the MST.
538struct PGOBBInfo {
539 PGOBBInfo *Group;
541 uint32_t Rank = 0;
542
543 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
544
545 /// Return the information string of this object.
546 std::string infoString() const {
547 return (Twine("Index=") + Twine(Index)).str();
548 }
549};
550
551// This class implements the CFG edges. Note the CFG can be a multi-graph.
552template <class Edge, class BBInfo> class FuncPGOInstrumentation {
553private:
554 Function &F;
555
556 // Is this is context-sensitive instrumentation.
557 bool IsCS;
558
559 // A map that stores the Comdat group in function F.
560 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
561
563
564 void computeCFGHash();
565 void renameComdatFunction();
566
567public:
568 const TargetLibraryInfo &TLI;
569 std::vector<std::vector<VPCandidateInfo>> ValueSites;
570 SelectInstVisitor SIVisitor;
571 std::string FuncName;
572 std::string DeprecatedFuncName;
573 GlobalVariable *FuncNameVar;
574
575 // CFG hash value for this function.
576 uint64_t FunctionHash = 0;
577
578 // The Minimum Spanning Tree of function CFG.
580
581 const std::optional<BlockCoverageInference> BCI;
582
583 static std::optional<BlockCoverageInference>
584 constructBCI(Function &Func, bool HasSingleByteCoverage,
585 bool InstrumentFuncEntry) {
586 if (HasSingleByteCoverage)
587 return BlockCoverageInference(Func, InstrumentFuncEntry);
588 return {};
589 }
590
591 // Collect all the BBs that will be instrumented, and store them in
592 // InstrumentBBs.
593 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
594
595 // Give an edge, find the BB that will be instrumented.
596 // Return nullptr if there is no BB to be instrumented.
597 BasicBlock *getInstrBB(Edge *E);
598
599 // Return the auxiliary BB information.
600 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
601
602 // Return the auxiliary BB information if available.
603 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
604
605 // Dump edges and BB information.
606 void dumpInfo(StringRef Str = "") const {
607 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
608 " Hash: " + Twine(FunctionHash) + "\t" + Str);
609 }
610
611 FuncPGOInstrumentation(
612 Function &Func, TargetLibraryInfo &TLI,
613 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
614 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
615 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
616 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
617 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
618 TLI(TLI), ValueSites(IPVK_Last + 1),
619 SIVisitor(Func, HasSingleByteCoverage),
620 MST(F, InstrumentFuncEntry, BPI, BFI),
621 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
622 if (BCI && PGOViewBlockCoverageGraph)
623 BCI->viewBlockCoverageGraph();
624 // This should be done before CFG hash computation.
625 SIVisitor.countSelects();
626 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
627 if (!IsCS) {
628 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
629 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
630 NumOfPGOBB += MST.bbInfoSize();
631 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
633 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
634 } else {
635 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
636 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
637 NumOfCSPGOBB += MST.bbInfoSize();
638 }
639
640 FuncName = getIRPGOFuncName(F);
641 DeprecatedFuncName = getPGOFuncName(F);
642 computeCFGHash();
643 if (!ComdatMembers.empty())
644 renameComdatFunction();
645 LLVM_DEBUG(dumpInfo("after CFGMST"));
646
647 for (const auto &E : MST.allEdges()) {
648 if (E->Removed)
649 continue;
650 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
651 if (!E->InMST)
652 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
653 }
654
655 if (CreateGlobalVar)
656 FuncNameVar = createPGOFuncNameVar(F, FuncName);
657 }
658};
659
660} // end anonymous namespace
661
662// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
663// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
664// of selects, indirect calls, mem ops and edges.
665template <class Edge, class BBInfo>
666void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
667 std::vector<uint8_t> Indexes;
668 JamCRC JC;
669 for (auto &BB : F) {
670 for (BasicBlock *Succ : successors(&BB)) {
671 auto BI = findBBInfo(Succ);
672 if (BI == nullptr)
673 continue;
674 uint32_t Index = BI->Index;
675 for (int J = 0; J < 4; J++)
676 Indexes.push_back((uint8_t)(Index >> (J * 8)));
677 }
678 }
679 JC.update(Indexes);
680
681 JamCRC JCH;
682 // The higher 32 bits.
683 auto updateJCH = [&JCH](uint64_t Num) {
684 uint8_t Data[8];
686 JCH.update(Data);
687 };
688 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
689 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
690 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
691 if (BCI) {
692 updateJCH(BCI->getInstrumentedBlocksHash());
693 } else {
694 updateJCH((uint64_t)MST.numEdges());
695 }
696
697 // Hash format for context sensitive profile. Reserve 4 bits for other
698 // information.
699 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
700
701 // Reserve bit 60-63 for other information purpose.
702 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
703 if (IsCS)
705 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
706 << " CRC = " << JC.getCRC()
707 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
708 << ", Edges = " << MST.numEdges() << ", ICSites = "
709 << ValueSites[IPVK_IndirectCallTarget].size()
710 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
711 << ", High32 CRC = " << JCH.getCRC()
712 << ", Hash = " << FunctionHash << "\n";);
713
714 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
715 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
716 << " in building " << F.getParent()->getSourceFileName() << "\n";
717}
718
719// Check if we can safely rename this Comdat function.
720static bool canRenameComdat(
721 Function &F,
722 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
723 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
724 return false;
725
726 // FIXME: Current only handle those Comdat groups that only containing one
727 // function.
728 // (1) For a Comdat group containing multiple functions, we need to have a
729 // unique postfix based on the hashes for each function. There is a
730 // non-trivial code refactoring to do this efficiently.
731 // (2) Variables can not be renamed, so we can not rename Comdat function in a
732 // group including global vars.
733 Comdat *C = F.getComdat();
734 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
735 assert(!isa<GlobalAlias>(CM.second));
736 Function *FM = dyn_cast<Function>(CM.second);
737 if (FM != &F)
738 return false;
739 }
740 return true;
741}
742
743// Append the CFGHash to the Comdat function name.
744template <class Edge, class BBInfo>
745void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
746 if (!canRenameComdat(F, ComdatMembers))
747 return;
748 std::string OrigName = F.getName().str();
749 std::string NewFuncName =
750 Twine(F.getName() + "." + Twine(FunctionHash)).str();
751 F.setName(Twine(NewFuncName));
753 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
754 Comdat *NewComdat;
755 Module *M = F.getParent();
756 // For AvailableExternallyLinkage functions, change the linkage to
757 // LinkOnceODR and put them into comdat. This is because after renaming, there
758 // is no backup external copy available for the function.
759 if (!F.hasComdat()) {
761 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
763 F.setComdat(NewComdat);
764 return;
765 }
766
767 // This function belongs to a single function Comdat group.
768 Comdat *OrigComdat = F.getComdat();
769 std::string NewComdatName =
770 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
771 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
772 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
773
774 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
775 // Must be a function.
776 cast<Function>(CM.second)->setComdat(NewComdat);
777 }
778}
779
780/// Collect all the BBs that will be instruments and add them to
781/// `InstrumentBBs`.
782template <class Edge, class BBInfo>
783void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
784 std::vector<BasicBlock *> &InstrumentBBs) {
785 if (BCI) {
786 for (auto &BB : F)
787 if (BCI->shouldInstrumentBlock(BB))
788 InstrumentBBs.push_back(&BB);
789 return;
790 }
791
792 // Use a worklist as we will update the vector during the iteration.
793 std::vector<Edge *> EdgeList;
794 EdgeList.reserve(MST.numEdges());
795 for (const auto &E : MST.allEdges())
796 EdgeList.push_back(E.get());
797
798 for (auto &E : EdgeList) {
799 BasicBlock *InstrBB = getInstrBB(E);
800 if (InstrBB)
801 InstrumentBBs.push_back(InstrBB);
802 }
803}
804
805// Given a CFG E to be instrumented, find which BB to place the instrumented
806// code. The function will split the critical edge if necessary.
807template <class Edge, class BBInfo>
808BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
809 if (E->InMST || E->Removed)
810 return nullptr;
811
812 BasicBlock *SrcBB = E->SrcBB;
813 BasicBlock *DestBB = E->DestBB;
814 // For a fake edge, instrument the real BB.
815 if (SrcBB == nullptr)
816 return DestBB;
817 if (DestBB == nullptr)
818 return SrcBB;
819
820 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
821 // There are basic blocks (such as catchswitch) cannot be instrumented.
822 // If the returned first insertion point is the end of BB, skip this BB.
823 if (BB->getFirstInsertionPt() == BB->end())
824 return nullptr;
825 return BB;
826 };
827
828 // Instrument the SrcBB if it has a single successor,
829 // otherwise, the DestBB if this is not a critical edge.
830 Instruction *TI = SrcBB->getTerminator();
831 if (TI->getNumSuccessors() <= 1)
832 return canInstrument(SrcBB);
833 if (!E->IsCritical)
834 return canInstrument(DestBB);
835
836 // Some IndirectBr critical edges cannot be split by the previous
837 // SplitIndirectBrCriticalEdges call. Bail out.
838 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
839 BasicBlock *InstrBB =
840 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
841 if (!InstrBB) {
843 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
844 return nullptr;
845 }
846 // For a critical edge, we have to split. Instrument the newly
847 // created BB.
848 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
849 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
850 << " --> " << getBBInfo(DestBB).Index << "\n");
851 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
852 MST.addEdge(SrcBB, InstrBB, 0);
853 // Second one: Add new edge of InstrBB->DestBB.
854 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
855 NewEdge1.InMST = true;
856 E->Removed = true;
857
858 return canInstrument(InstrBB);
859}
860
861// When generating value profiling calls on Windows routines that make use of
862// handler funclets for exception processing an operand bundle needs to attached
863// to the called function. This routine will set \p OpBundles to contain the
864// funclet information, if any is needed, that should be placed on the generated
865// value profiling call for the value profile candidate call.
866static void
870 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
871 if (!OrigCall)
872 return;
873
874 if (!isa<IntrinsicInst>(OrigCall)) {
875 // The instrumentation call should belong to the same funclet as a
876 // non-intrinsic call, so just copy the operand bundle, if any exists.
877 std::optional<OperandBundleUse> ParentFunclet =
878 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
879 if (ParentFunclet)
880 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
881 } else {
882 // Intrinsics or other instructions do not get funclet information from the
883 // front-end. Need to use the BlockColors that was computed by the routine
884 // colorEHFunclets to determine whether a funclet is needed.
885 if (!BlockColors.empty()) {
886 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
887 assert(CV.size() == 1 && "non-unique color for block!");
888 Instruction *EHPad = CV.front()->getFirstNonPHI();
889 if (EHPad->isEHPad())
890 OpBundles.emplace_back("funclet", EHPad);
891 }
892 }
893}
894
895// Visit all edge and instrument the edges not in MST, and do value profiling.
896// Critical edges will be split.
897void FunctionInstrumenter::instrument() {
898 if (!PGOBlockCoverage) {
899 // Split indirectbr critical edges here before computing the MST rather than
900 // later in getInstrBB() to avoid invalidating it.
901 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
902 }
903
904 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
905 F, TLI, ComdatMembers, true, BPI, BFI,
906 InstrumentationType == PGOInstrumentationType::CSFDO,
907 shouldInstrumentEntryBB(), PGOBlockCoverage);
908
909 auto Name = FuncInfo.FuncNameVar;
910 auto CFGHash =
911 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
913 auto &EntryBB = F.getEntryBlock();
914 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
915 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
916 // i32 <index>)
917 Builder.CreateCall(
918 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_cover),
919 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
920 return;
921 }
922
923 std::vector<BasicBlock *> InstrumentBBs;
924 FuncInfo.getInstrumentBBs(InstrumentBBs);
925 unsigned NumCounters =
926 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
927
928 if (InstrumentationType == PGOInstrumentationType::CTXPROF) {
929 auto *CSIntrinsic =
930 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_callsite);
931 // We want to count the instrumentable callsites, then instrument them. This
932 // is because the llvm.instrprof.callsite intrinsic has an argument (like
933 // the other instrprof intrinsics) capturing the total number of
934 // instrumented objects (counters, or callsites, in this case). In this
935 // case, we want that value so we can readily pass it to the compiler-rt
936 // APIs that may have to allocate memory based on the nr of callsites.
937 // The traversal logic is the same for both counting and instrumentation,
938 // just needs to be done in succession.
939 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
940 for (auto &BB : F)
941 for (auto &Instr : BB)
942 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
943 if ((CS->getCalledFunction() &&
944 CS->getCalledFunction()->isIntrinsic()) ||
945 dyn_cast<InlineAsm>(CS->getCalledOperand()))
946 continue;
947 Visitor(CS);
948 }
949 };
950 // First, count callsites.
951 uint32_t TotalNrCallsites = 0;
952 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
953
954 // Now instrument.
955 uint32_t CallsiteIndex = 0;
956 Visit([&](auto *CB) {
957 IRBuilder<> Builder(CB);
958 Builder.CreateCall(CSIntrinsic,
959 {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
960 Builder.getInt32(CallsiteIndex++),
961 CB->getCalledOperand()});
962 });
963 }
964
965 uint32_t I = 0;
967 NumCounters += PGOBlockCoverage ? 8 : 1;
968 auto &EntryBB = F.getEntryBlock();
969 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
970 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
971 // i32 <index>)
972 Builder.CreateCall(
973 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_timestamp),
974 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
975 I += PGOBlockCoverage ? 8 : 1;
976 }
977
978 for (auto *InstrBB : InstrumentBBs) {
979 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
980 assert(Builder.GetInsertPoint() != InstrBB->end() &&
981 "Cannot get the Instrumentation point");
982 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
983 // i32 <index>)
984 Builder.CreateCall(
986 ? Intrinsic::instrprof_cover
987 : Intrinsic::instrprof_increment),
988 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
989 }
990
991 // Now instrument select instructions:
992 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
993 FuncInfo.FunctionHash);
994 assert(I == NumCounters);
995
996 if (isValueProfilingDisabled())
997 return;
998
999 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1000
1001 // Intrinsic function calls do not have funclet operand bundles needed for
1002 // Windows exception handling attached to them. However, if value profiling is
1003 // inserted for one of these calls, then a funclet value will need to be set
1004 // on the instrumentation call based on the funclet coloring.
1006 if (F.hasPersonalityFn() &&
1007 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1008 BlockColors = colorEHFunclets(F);
1009
1010 // For each VP Kind, walk the VP candidates and instrument each one.
1011 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1012 unsigned SiteIndex = 0;
1013 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1014 continue;
1015
1016 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1017 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1018 << " site: CallSite Index = " << SiteIndex << "\n");
1019
1020 IRBuilder<> Builder(Cand.InsertPt);
1021 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1022 "Cannot get the Instrumentation point");
1023
1024 Value *ToProfile = nullptr;
1025 if (Cand.V->getType()->isIntegerTy())
1026 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1027 else if (Cand.V->getType()->isPointerTy())
1028 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1029 assert(ToProfile && "value profiling Value is of unexpected type");
1030
1032 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1033 Builder.CreateCall(
1034 Intrinsic::getDeclaration(&M, Intrinsic::instrprof_value_profile),
1035 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1036 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1037 OpBundles);
1038 }
1039 } // IPVK_First <= Kind <= IPVK_Last
1040}
1041
1042namespace {
1043
1044// This class represents a CFG edge in profile use compilation.
1045struct PGOUseEdge : public PGOEdge {
1046 using PGOEdge::PGOEdge;
1047
1048 std::optional<uint64_t> Count;
1049
1050 // Set edge count value
1051 void setEdgeCount(uint64_t Value) { Count = Value; }
1052
1053 // Return the information string for this object.
1054 std::string infoString() const {
1055 if (!Count)
1056 return PGOEdge::infoString();
1057 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1058 }
1059};
1060
1061using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1062
1063// This class stores the auxiliary information for each BB.
1064struct PGOUseBBInfo : public PGOBBInfo {
1065 std::optional<uint64_t> Count;
1066 int32_t UnknownCountInEdge = 0;
1067 int32_t UnknownCountOutEdge = 0;
1068 DirectEdges InEdges;
1069 DirectEdges OutEdges;
1070
1071 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1072
1073 // Set the profile count value for this BB.
1074 void setBBInfoCount(uint64_t Value) { Count = Value; }
1075
1076 // Return the information string of this object.
1077 std::string infoString() const {
1078 if (!Count)
1079 return PGOBBInfo::infoString();
1080 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1081 }
1082
1083 // Add an OutEdge and update the edge count.
1084 void addOutEdge(PGOUseEdge *E) {
1085 OutEdges.push_back(E);
1086 UnknownCountOutEdge++;
1087 }
1088
1089 // Add an InEdge and update the edge count.
1090 void addInEdge(PGOUseEdge *E) {
1091 InEdges.push_back(E);
1092 UnknownCountInEdge++;
1093 }
1094};
1095
1096} // end anonymous namespace
1097
1098// Sum up the count values for all the edges.
1100 uint64_t Total = 0;
1101 for (const auto &E : Edges) {
1102 if (E->Removed)
1103 continue;
1104 if (E->Count)
1105 Total += *E->Count;
1106 }
1107 return Total;
1108}
1109
1110namespace {
1111
1112class PGOUseFunc {
1113public:
1114 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1115 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1117 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1118 bool HasSingleByteCoverage)
1119 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1120 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1121 InstrumentFuncEntry, HasSingleByteCoverage),
1122 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1123
1124 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1125
1126 // Read counts for the instrumented BB from profile.
1127 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1129
1130 // Populate the counts for all BBs.
1131 void populateCounters();
1132
1133 // Set block coverage based on profile coverage values.
1134 void populateCoverage(IndexedInstrProfReader *PGOReader);
1135
1136 // Set the branch weights based on the count values.
1137 void setBranchWeights();
1138
1139 // Annotate the value profile call sites for all value kind.
1140 void annotateValueSites();
1141
1142 // Annotate the value profile call sites for one value kind.
1143 void annotateValueSites(uint32_t Kind);
1144
1145 // Annotate the irreducible loop header weights.
1146 void annotateIrrLoopHeaderWeights();
1147
1148 // The hotness of the function from the profile count.
1149 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1150
1151 // Return the function hotness from the profile.
1152 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1153
1154 // Return the function hash.
1155 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1156
1157 // Return the profile record for this function;
1158 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1159
1160 // Return the auxiliary BB information.
1161 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1162 return FuncInfo.getBBInfo(BB);
1163 }
1164
1165 // Return the auxiliary BB information if available.
1166 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1167 return FuncInfo.findBBInfo(BB);
1168 }
1169
1170 Function &getFunc() const { return F; }
1171
1172 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1173
1174 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1175
1176private:
1177 Function &F;
1178 Module *M;
1180 ProfileSummaryInfo *PSI;
1181
1182 // This member stores the shared information with class PGOGenFunc.
1183 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1184
1185 // The maximum count value in the profile. This is only used in PGO use
1186 // compilation.
1187 uint64_t ProgramMaxCount;
1188
1189 // Position of counter that remains to be read.
1190 uint32_t CountPosition = 0;
1191
1192 // Total size of the profile count for this function.
1193 uint32_t ProfileCountSize = 0;
1194
1195 // ProfileRecord for this function.
1196 InstrProfRecord ProfileRecord;
1197
1198 // Function hotness info derived from profile.
1199 FuncFreqAttr FreqAttr;
1200
1201 // Is to use the context sensitive profile.
1202 bool IsCS;
1203
1205
1206 // Find the Instrumented BB and set the value. Return false on error.
1207 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1208
1209 // Set the edge counter value for the unknown edge -- there should be only
1210 // one unknown edge.
1211 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1212
1213 // Set the hot/cold inline hints based on the count values.
1214 // FIXME: This function should be removed once the functionality in
1215 // the inliner is implemented.
1216 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1217 if (PSI->isHotCount(EntryCount))
1218 FreqAttr = FFA_Hot;
1219 else if (PSI->isColdCount(MaxCount))
1220 FreqAttr = FFA_Cold;
1221 }
1222};
1223
1224} // end anonymous namespace
1225
1226/// Set up InEdges/OutEdges for all BBs in the MST.
1228 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1229 // This is not required when there is block coverage inference.
1230 if (FuncInfo.BCI)
1231 return;
1232 for (const auto &E : FuncInfo.MST.allEdges()) {
1233 if (E->Removed)
1234 continue;
1235 const BasicBlock *SrcBB = E->SrcBB;
1236 const BasicBlock *DestBB = E->DestBB;
1237 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1238 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1239 SrcInfo.addOutEdge(E.get());
1240 DestInfo.addInEdge(E.get());
1241 }
1242}
1243
1244// Visit all the edges and assign the count value for the instrumented
1245// edges and the BB. Return false on error.
1246bool PGOUseFunc::setInstrumentedCounts(
1247 const std::vector<uint64_t> &CountFromProfile) {
1248
1249 std::vector<BasicBlock *> InstrumentBBs;
1250 FuncInfo.getInstrumentBBs(InstrumentBBs);
1251
1252 setupBBInfoEdges(FuncInfo);
1253
1254 unsigned NumCounters =
1255 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1256 // The number of counters here should match the number of counters
1257 // in profile. Return if they mismatch.
1258 if (NumCounters != CountFromProfile.size()) {
1259 return false;
1260 }
1261 auto *FuncEntry = &*F.begin();
1262
1263 // Set the profile count to the Instrumented BBs.
1264 uint32_t I = 0;
1265 for (BasicBlock *InstrBB : InstrumentBBs) {
1266 uint64_t CountValue = CountFromProfile[I++];
1267 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1268 // If we reach here, we know that we have some nonzero count
1269 // values in this function. The entry count should not be 0.
1270 // Fix it if necessary.
1271 if (InstrBB == FuncEntry && CountValue == 0)
1272 CountValue = 1;
1273 Info.setBBInfoCount(CountValue);
1274 }
1275 ProfileCountSize = CountFromProfile.size();
1276 CountPosition = I;
1277
1278 // Set the edge count and update the count of unknown edges for BBs.
1279 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1280 E->setEdgeCount(Value);
1281 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1282 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1283 };
1284
1285 // Set the profile count the Instrumented edges. There are BBs that not in
1286 // MST but not instrumented. Need to set the edge count value so that we can
1287 // populate the profile counts later.
1288 for (const auto &E : FuncInfo.MST.allEdges()) {
1289 if (E->Removed || E->InMST)
1290 continue;
1291 const BasicBlock *SrcBB = E->SrcBB;
1292 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1293
1294 // If only one out-edge, the edge profile count should be the same as BB
1295 // profile count.
1296 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1297 setEdgeCount(E.get(), *SrcInfo.Count);
1298 else {
1299 const BasicBlock *DestBB = E->DestBB;
1300 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1301 // If only one in-edge, the edge profile count should be the same as BB
1302 // profile count.
1303 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1304 setEdgeCount(E.get(), *DestInfo.Count);
1305 }
1306 if (E->Count)
1307 continue;
1308 // E's count should have been set from profile. If not, this meenas E skips
1309 // the instrumentation. We set the count to 0.
1310 setEdgeCount(E.get(), 0);
1311 }
1312 return true;
1313}
1314
1315// Set the count value for the unknown edge. There should be one and only one
1316// unknown edge in Edges vector.
1317void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1318 for (auto &E : Edges) {
1319 if (E->Count)
1320 continue;
1321 E->setEdgeCount(Value);
1322
1323 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1324 getBBInfo(E->DestBB).UnknownCountInEdge--;
1325 return;
1326 }
1327 llvm_unreachable("Cannot find the unknown count edge");
1328}
1329
1330// Emit function metadata indicating PGO profile mismatch.
1332 const char MetadataName[] = "instr_prof_hash_mismatch";
1334 // If this metadata already exists, ignore.
1335 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1336 if (Existing) {
1337 MDTuple *Tuple = cast<MDTuple>(Existing);
1338 for (const auto &N : Tuple->operands()) {
1339 if (N.equalsStr(MetadataName))
1340 return;
1341 Names.push_back(N.get());
1342 }
1343 }
1344
1345 MDBuilder MDB(ctx);
1346 Names.push_back(MDB.createString(MetadataName));
1347 MDNode *MD = MDTuple::get(ctx, Names);
1348 F.setMetadata(LLVMContext::MD_annotation, MD);
1349}
1350
1351void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1352 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1353 auto &Ctx = M->getContext();
1354 auto Err = IPE.get();
1355 bool SkipWarning = false;
1356 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1357 << FuncInfo.FuncName << ": ");
1358 if (Err == instrprof_error::unknown_function) {
1359 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1360 SkipWarning = !PGOWarnMissing;
1361 LLVM_DEBUG(dbgs() << "unknown function");
1362 } else if (Err == instrprof_error::hash_mismatch ||
1363 Err == instrprof_error::malformed) {
1364 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1365 SkipWarning =
1368 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1370 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1371 << " skip=" << SkipWarning << ")");
1372 // Emit function metadata indicating PGO profile mismatch.
1373 annotateFunctionWithHashMismatch(F, M->getContext());
1374 }
1375
1376 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1377 if (SkipWarning)
1378 return;
1379
1380 std::string Msg =
1381 IPE.message() + std::string(" ") + F.getName().str() +
1382 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1383 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1384 std::string(" count discarded");
1385
1386 Ctx.diagnose(
1387 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1388 });
1389}
1390
1391// Read the profile from ProfileFileName and assign the value to the
1392// instrumented BB and the edges. This function also updates ProgramMaxCount.
1393// Return true if the profile are successfully read, and false on errors.
1394bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1396 auto &Ctx = M->getContext();
1397 uint64_t MismatchedFuncSum = 0;
1399 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1400 &MismatchedFuncSum);
1401 if (Error E = Result.takeError()) {
1402 handleInstrProfError(std::move(E), MismatchedFuncSum);
1403 return false;
1404 }
1405 ProfileRecord = std::move(Result.get());
1406 PseudoKind = ProfileRecord.getCountPseudoKind();
1407 if (PseudoKind != InstrProfRecord::NotPseudo) {
1408 return true;
1409 }
1410 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1411
1412 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1413 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1414
1415 uint64_t ValueSum = 0;
1416 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1417 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1418 ValueSum += CountFromProfile[I];
1419 }
1420 AllZeros = (ValueSum == 0);
1421
1422 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1423
1424 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1425 getBBInfo(nullptr).UnknownCountInEdge = 2;
1426
1427 if (!setInstrumentedCounts(CountFromProfile)) {
1428 LLVM_DEBUG(
1429 dbgs() << "Inconsistent number of counts, skipping this function");
1430 Ctx.diagnose(DiagnosticInfoPGOProfile(
1431 M->getName().data(),
1432 Twine("Inconsistent number of counts in ") + F.getName().str() +
1433 Twine(": the profile may be stale or there is a function name "
1434 "collision."),
1435 DS_Warning));
1436 return false;
1437 }
1438 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1439 return true;
1440}
1441
1442void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1443 uint64_t MismatchedFuncSum = 0;
1445 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1446 &MismatchedFuncSum);
1447 if (auto Err = Result.takeError()) {
1448 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1449 return;
1450 }
1451 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1452
1453 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1455 unsigned Index = 0;
1456 for (auto &BB : F)
1457 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1458 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1459 assert(Index == CountsFromProfile.size());
1460
1461 // For each B in InverseDependencies[A], if A is covered then B is covered.
1463 InverseDependencies;
1464 for (auto &BB : F) {
1465 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1466 // If Dep is covered then BB is covered.
1467 InverseDependencies[Dep].insert(&BB);
1468 }
1469 }
1470
1471 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1472 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1473 for (auto &[BB, IsCovered] : Coverage)
1474 if (IsCovered)
1475 CoveredBlocksToProcess.push(BB);
1476
1477 while (!CoveredBlocksToProcess.empty()) {
1478 auto *CoveredBlock = CoveredBlocksToProcess.top();
1479 assert(Coverage[CoveredBlock]);
1480 CoveredBlocksToProcess.pop();
1481 for (auto *BB : InverseDependencies[CoveredBlock]) {
1482 // If CoveredBlock is covered then BB is covered.
1483 if (Coverage[BB])
1484 continue;
1485 Coverage[BB] = true;
1486 CoveredBlocksToProcess.push(BB);
1487 }
1488 }
1489
1490 // Annotate block coverage.
1491 MDBuilder MDB(F.getContext());
1492 // We set the entry count to 10000 if the entry block is covered so that BFI
1493 // can propagate a fraction of this count to the other covered blocks.
1494 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1495 for (auto &BB : F) {
1496 // For a block A and its successor B, we set the edge weight as follows:
1497 // If A is covered and B is covered, set weight=1.
1498 // If A is covered and B is uncovered, set weight=0.
1499 // If A is uncovered, set weight=1.
1500 // This setup will allow BFI to give nonzero profile counts to only covered
1501 // blocks.
1503 for (auto *Succ : successors(&BB))
1504 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1505 if (Weights.size() >= 2)
1506 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1507 /*IsExpected=*/false);
1508 }
1509
1510 unsigned NumCorruptCoverage = 0;
1511 DominatorTree DT(F);
1512 LoopInfo LI(DT);
1513 BranchProbabilityInfo BPI(F, LI);
1514 BlockFrequencyInfo BFI(F, BPI, LI);
1515 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1516 if (auto C = BFI.getBlockProfileCount(&BB))
1517 return C == 0;
1518 return {};
1519 };
1520 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1521 for (auto &BB : F) {
1522 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1523 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1524 << "\n");
1525 // In some cases it is possible to find a covered block that has no covered
1526 // successors, e.g., when a block calls a function that may call exit(). In
1527 // those cases, BFI could find its successor to be covered while BCI could
1528 // find its successor to be dead.
1529 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1530 LLVM_DEBUG(
1531 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1532 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1533 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1534 ++NumCorruptCoverage;
1535 }
1536 if (Coverage[&BB])
1537 ++NumCoveredBlocks;
1538 }
1539 if (PGOVerifyBFI && NumCorruptCoverage) {
1540 auto &Ctx = M->getContext();
1541 Ctx.diagnose(DiagnosticInfoPGOProfile(
1542 M->getName().data(),
1543 Twine("Found inconsistent block coverage for function ") + F.getName() +
1544 " in " + Twine(NumCorruptCoverage) + " blocks.",
1545 DS_Warning));
1546 }
1548 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1549}
1550
1551// Populate the counters from instrumented BBs to all BBs.
1552// In the end of this operation, all BBs should have a valid count value.
1553void PGOUseFunc::populateCounters() {
1554 bool Changes = true;
1555 unsigned NumPasses = 0;
1556 while (Changes) {
1557 NumPasses++;
1558 Changes = false;
1559
1560 // For efficient traversal, it's better to start from the end as most
1561 // of the instrumented edges are at the end.
1562 for (auto &BB : reverse(F)) {
1563 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1564 if (UseBBInfo == nullptr)
1565 continue;
1566 if (!UseBBInfo->Count) {
1567 if (UseBBInfo->UnknownCountOutEdge == 0) {
1568 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1569 Changes = true;
1570 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1571 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1572 Changes = true;
1573 }
1574 }
1575 if (UseBBInfo->Count) {
1576 if (UseBBInfo->UnknownCountOutEdge == 1) {
1577 uint64_t Total = 0;
1578 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1579 // If the one of the successor block can early terminate (no-return),
1580 // we can end up with situation where out edge sum count is larger as
1581 // the source BB's count is collected by a post-dominated block.
1582 if (*UseBBInfo->Count > OutSum)
1583 Total = *UseBBInfo->Count - OutSum;
1584 setEdgeCount(UseBBInfo->OutEdges, Total);
1585 Changes = true;
1586 }
1587 if (UseBBInfo->UnknownCountInEdge == 1) {
1588 uint64_t Total = 0;
1589 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1590 if (*UseBBInfo->Count > InSum)
1591 Total = *UseBBInfo->Count - InSum;
1592 setEdgeCount(UseBBInfo->InEdges, Total);
1593 Changes = true;
1594 }
1595 }
1596 }
1597 }
1598
1599 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1600 (void)NumPasses;
1601#ifndef NDEBUG
1602 // Assert every BB has a valid counter.
1603 for (auto &BB : F) {
1604 auto BI = findBBInfo(&BB);
1605 if (BI == nullptr)
1606 continue;
1607 assert(BI->Count && "BB count is not valid");
1608 }
1609#endif
1610 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1611 uint64_t FuncMaxCount = FuncEntryCount;
1612 for (auto &BB : F) {
1613 auto BI = findBBInfo(&BB);
1614 if (BI == nullptr)
1615 continue;
1616 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1617 }
1618
1619 // Fix the obviously inconsistent entry count.
1620 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1621 FuncEntryCount = 1;
1623 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1624
1625 // Now annotate select instructions
1626 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1627 assert(CountPosition == ProfileCountSize);
1628
1629 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1630}
1631
1632// Assign the scaled count values to the BB with multiple out edges.
1633void PGOUseFunc::setBranchWeights() {
1634 // Generate MD_prof metadata for every branch instruction.
1635 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1636 << " IsCS=" << IsCS << "\n");
1637 for (auto &BB : F) {
1638 Instruction *TI = BB.getTerminator();
1639 if (TI->getNumSuccessors() < 2)
1640 continue;
1641 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1642 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1643 isa<CallBrInst>(TI)))
1644 continue;
1645
1646 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1647 if (!*BBCountInfo.Count)
1648 continue;
1649
1650 // We have a non-zero Branch BB.
1651 unsigned Size = BBCountInfo.OutEdges.size();
1652 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1653 uint64_t MaxCount = 0;
1654 for (unsigned s = 0; s < Size; s++) {
1655 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1656 const BasicBlock *SrcBB = E->SrcBB;
1657 const BasicBlock *DestBB = E->DestBB;
1658 if (DestBB == nullptr)
1659 continue;
1660 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1661 uint64_t EdgeCount = *E->Count;
1662 if (EdgeCount > MaxCount)
1663 MaxCount = EdgeCount;
1664 EdgeCounts[SuccNum] = EdgeCount;
1665 }
1666
1667 if (MaxCount)
1668 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1669 else {
1670 // A zero MaxCount can come about when we have a BB with a positive
1671 // count, and whose successor blocks all have 0 count. This can happen
1672 // when there is no exit block and the code exits via a noreturn function.
1673 auto &Ctx = M->getContext();
1674 Ctx.diagnose(DiagnosticInfoPGOProfile(
1675 M->getName().data(),
1676 Twine("Profile in ") + F.getName().str() +
1677 Twine(" partially ignored") +
1678 Twine(", possibly due to the lack of a return path."),
1679 DS_Warning));
1680 }
1681 }
1682}
1683
1685 for (BasicBlock *Pred : predecessors(BB)) {
1686 if (isa<IndirectBrInst>(Pred->getTerminator()))
1687 return true;
1688 }
1689 return false;
1690}
1691
1692void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1693 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1694 // Find irr loop headers
1695 for (auto &BB : F) {
1696 // As a heuristic also annotate indrectbr targets as they have a high chance
1697 // to become an irreducible loop header after the indirectbr tail
1698 // duplication.
1699 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1700 Instruction *TI = BB.getTerminator();
1701 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1702 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1703 }
1704 }
1705}
1706
1707void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1708 Module *M = F.getParent();
1709 IRBuilder<> Builder(&SI);
1710 Type *Int64Ty = Builder.getInt64Ty();
1711 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1712 Builder.CreateCall(
1713 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1714 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1715 Builder.getInt32(*CurCtrIdx), Step});
1716 ++(*CurCtrIdx);
1717}
1718
1719void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1720 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1721 assert(*CurCtrIdx < CountFromProfile.size() &&
1722 "Out of bound access of counters");
1723 uint64_t SCounts[2];
1724 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1725 ++(*CurCtrIdx);
1726 uint64_t TotalCount = 0;
1727 auto BI = UseFunc->findBBInfo(SI.getParent());
1728 if (BI != nullptr)
1729 TotalCount = *BI->Count;
1730 // False Count
1731 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1732 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1733 if (MaxCount)
1734 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1735}
1736
1737void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1738 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1739 return;
1740 // FIXME: do not handle this yet.
1741 if (SI.getCondition()->getType()->isVectorTy())
1742 return;
1743
1744 switch (Mode) {
1745 case VM_counting:
1746 NSIs++;
1747 return;
1748 case VM_instrument:
1749 instrumentOneSelectInst(SI);
1750 return;
1751 case VM_annotate:
1752 annotateOneSelectInst(SI);
1753 return;
1754 }
1755
1756 llvm_unreachable("Unknown visiting mode");
1757}
1758
1760 if (ValueProfKind == IPVK_MemOPSize)
1762 if (ValueProfKind == llvm::IPVK_VTableTarget)
1764 return MaxNumAnnotations;
1765}
1766
1767// Traverse all valuesites and annotate the instructions for all value kind.
1768void PGOUseFunc::annotateValueSites() {
1770 return;
1771
1772 // Create the PGOFuncName meta data.
1773 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1774
1775 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1776 annotateValueSites(Kind);
1777}
1778
1779// Annotate the instructions for a specific value kind.
1780void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1781 assert(Kind <= IPVK_Last);
1782 unsigned ValueSiteIndex = 0;
1783
1784 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1785
1786 // Since there isn't a reliable or fast way for profile reader to tell if a
1787 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1788 // value profile collector over the function IR to find the instrumented sites
1789 // iff function profile records shows the number of instrumented vtable sites
1790 // is not zero. Function cfg already takes the number of instrumented
1791 // indirect call sites into account so it doesn't hash the number of
1792 // instrumented vtables; as a side effect it makes it easier to enable
1793 // profiling and profile use in two steps if needed.
1794 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1795 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1796 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1798 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1799 auto &ValueSites = FuncInfo.ValueSites[Kind];
1800 if (NumValueSites != ValueSites.size()) {
1801 auto &Ctx = M->getContext();
1802 Ctx.diagnose(DiagnosticInfoPGOProfile(
1803 M->getName().data(),
1804 Twine("Inconsistent number of value sites for ") +
1805 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1806 F.getName().str() +
1807 Twine("\", possibly due to the use of a stale profile."),
1808 DS_Warning));
1809 return;
1810 }
1811
1812 for (VPCandidateInfo &I : ValueSites) {
1813 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1814 << "): Index = " << ValueSiteIndex << " out of "
1815 << NumValueSites << "\n");
1817 *M, *I.AnnotatedInst, ProfileRecord,
1818 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1819 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1820 ValueSiteIndex++;
1821 }
1822}
1823
1824// Collect the set of members for each Comdat in module M and store
1825// in ComdatMembers.
1827 Module &M,
1828 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1829 if (!DoComdatRenaming)
1830 return;
1831 for (Function &F : M)
1832 if (Comdat *C = F.getComdat())
1833 ComdatMembers.insert(std::make_pair(C, &F));
1834 for (GlobalVariable &GV : M.globals())
1835 if (Comdat *C = GV.getComdat())
1836 ComdatMembers.insert(std::make_pair(C, &GV));
1837 for (GlobalAlias &GA : M.aliases())
1838 if (Comdat *C = GA.getComdat())
1839 ComdatMembers.insert(std::make_pair(C, &GA));
1840}
1841
1842// Return true if we should not find instrumentation data for this function
1843static bool skipPGOUse(const Function &F) {
1844 if (F.isDeclaration())
1845 return true;
1846 // If there are too many critical edges, PGO might cause
1847 // compiler time problem. Skip PGO if the number of
1848 // critical edges execeed the threshold.
1849 unsigned NumCriticalEdges = 0;
1850 for (auto &BB : F) {
1851 const Instruction *TI = BB.getTerminator();
1852 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1853 if (isCriticalEdge(TI, I))
1854 NumCriticalEdges++;
1855 }
1856 }
1857 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1858 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1859 << ", NumCriticalEdges=" << NumCriticalEdges
1860 << " exceed the threshold. Skip PGO.\n");
1861 return true;
1862 }
1863 return false;
1864}
1865
1866// Return true if we should not instrument this function
1867static bool skipPGOGen(const Function &F) {
1868 if (skipPGOUse(F))
1869 return true;
1870 if (F.hasFnAttribute(llvm::Attribute::Naked))
1871 return true;
1872 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1873 return true;
1874 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1875 return true;
1876 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1877 return true;
1878 return false;
1879}
1880
1882 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1885 PGOInstrumentationType InstrumentationType) {
1886 // For the context-sensitve instrumentation, we should have a separated pass
1887 // (before LTO/ThinLTO linking) to create these variables.
1888 if (InstrumentationType == PGOInstrumentationType::FDO)
1889 createIRLevelProfileFlagVar(M, InstrumentationType);
1890
1891 Triple TT(M.getTargetTriple());
1892 LLVMContext &Ctx = M.getContext();
1893 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1895 M.getName().data(),
1896 Twine("VTable value profiling is presently not "
1897 "supported for non-ELF object formats"),
1898 DS_Warning));
1899 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1900 collectComdatMembers(M, ComdatMembers);
1901
1902 for (auto &F : M) {
1903 if (skipPGOGen(F))
1904 continue;
1905 auto &TLI = LookupTLI(F);
1906 auto *BPI = LookupBPI(F);
1907 auto *BFI = LookupBFI(F);
1908 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI,
1909 InstrumentationType);
1910 FI.instrument();
1911 }
1912 return true;
1913}
1914
1917 createProfileFileNameVar(M, CSInstrName);
1918 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1919 // will be retained.
1922 if (ProfileSampling)
1927 return PA;
1928}
1929
1932 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1933 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1935 };
1936 auto LookupBPI = [&FAM](Function &F) {
1938 };
1939 auto LookupBFI = [&FAM](Function &F) {
1941 };
1942
1943 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI,
1944 InstrumentationType))
1945 return PreservedAnalyses::all();
1946
1947 return PreservedAnalyses::none();
1948}
1949
1950// Using the ratio b/w sums of profile count values and BFI count values to
1951// adjust the func entry count.
1952static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1953 BranchProbabilityInfo &NBPI) {
1954 Function &F = Func.getFunc();
1955 BlockFrequencyInfo NBFI(F, NBPI, LI);
1956#ifndef NDEBUG
1957 auto BFIEntryCount = F.getEntryCount();
1958 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1959 "Invalid BFI Entrycount");
1960#endif
1961 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1962 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1963 for (auto &BBI : F) {
1964 uint64_t CountValue = 0;
1965 uint64_t BFICountValue = 0;
1966 if (!Func.findBBInfo(&BBI))
1967 continue;
1968 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1969 CountValue = *Func.getBBInfo(&BBI).Count;
1970 BFICountValue = *BFICount;
1971 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1972 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1973 }
1974 if (SumCount.isZero())
1975 return;
1976
1977 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1978 "Incorrect sum of BFI counts");
1979 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1980 return;
1981 double Scale = (SumCount / SumBFICount).convertToDouble();
1982 if (Scale < 1.001 && Scale > 0.999)
1983 return;
1984
1985 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1986 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1987 if (NewEntryCount == 0)
1988 NewEntryCount = 1;
1989 if (NewEntryCount != FuncEntryCount) {
1990 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1991 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1992 << ", entry_count " << FuncEntryCount << " --> "
1993 << NewEntryCount << "\n");
1994 }
1995}
1996
1997// Compare the profile count values with BFI count values, and print out
1998// the non-matching ones.
1999static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2001 uint64_t HotCountThreshold,
2003 Function &F = Func.getFunc();
2004 BlockFrequencyInfo NBFI(F, NBPI, LI);
2005 // bool PrintFunc = false;
2006 bool HotBBOnly = PGOVerifyHotBFI;
2007 StringRef Msg;
2009
2010 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2011 for (auto &BBI : F) {
2012 uint64_t CountValue = 0;
2013 uint64_t BFICountValue = 0;
2014
2015 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
2016
2017 BBNum++;
2018 if (CountValue)
2019 NonZeroBBNum++;
2020 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2021 if (BFICount)
2022 BFICountValue = *BFICount;
2023
2024 if (HotBBOnly) {
2025 bool rawIsHot = CountValue >= HotCountThreshold;
2026 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2027 bool rawIsCold = CountValue <= ColdCountThreshold;
2028 bool ShowCount = false;
2029 if (rawIsHot && !BFIIsHot) {
2030 Msg = "raw-Hot to BFI-nonHot";
2031 ShowCount = true;
2032 } else if (rawIsCold && BFIIsHot) {
2033 Msg = "raw-Cold to BFI-Hot";
2034 ShowCount = true;
2035 }
2036 if (!ShowCount)
2037 continue;
2038 } else {
2039 if ((CountValue < PGOVerifyBFICutoff) &&
2040 (BFICountValue < PGOVerifyBFICutoff))
2041 continue;
2042 uint64_t Diff = (BFICountValue >= CountValue)
2043 ? BFICountValue - CountValue
2044 : CountValue - BFICountValue;
2045 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2046 continue;
2047 }
2048 BBMisMatchNum++;
2049
2050 ORE.emit([&]() {
2052 F.getSubprogram(), &BBI);
2053 Remark << "BB " << ore::NV("Block", BBI.getName())
2054 << " Count=" << ore::NV("Count", CountValue)
2055 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2056 if (!Msg.empty())
2057 Remark << " (" << Msg << ")";
2058 return Remark;
2059 });
2060 }
2061 if (BBMisMatchNum)
2062 ORE.emit([&]() {
2063 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2064 F.getSubprogram(), &F.getEntryBlock())
2065 << "In Func " << ore::NV("Function", F.getName())
2066 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2067 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2068 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2069 });
2070}
2071
2073 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2074 vfs::FileSystem &FS,
2075 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2078 ProfileSummaryInfo *PSI, bool IsCS) {
2079 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2080 auto &Ctx = M.getContext();
2081 // Read the counter array from file.
2082 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2083 ProfileRemappingFileName);
2084 if (Error E = ReaderOrErr.takeError()) {
2085 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2086 Ctx.diagnose(
2087 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2088 });
2089 return false;
2090 }
2091
2092 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2093 std::move(ReaderOrErr.get());
2094 if (!PGOReader) {
2095 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2096 StringRef("Cannot get PGOReader")));
2097 return false;
2098 }
2099 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2100 return false;
2101
2102 // TODO: might need to change the warning once the clang option is finalized.
2103 if (!PGOReader->isIRLevelProfile()) {
2104 Ctx.diagnose(DiagnosticInfoPGOProfile(
2105 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2106 return false;
2107 }
2108 if (PGOReader->functionEntryOnly()) {
2109 Ctx.diagnose(DiagnosticInfoPGOProfile(
2110 ProfileFileName.data(),
2111 "Function entry profiles are not yet supported for optimization"));
2112 return false;
2113 }
2114
2116 for (GlobalVariable &G : M.globals()) {
2117 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2118 continue;
2119
2120 // Create the PGOFuncName meta data.
2121 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2122 }
2123 }
2124
2125 // Add the profile summary (read from the header of the indexed summary) here
2126 // so that we can use it below when reading counters (which checks if the
2127 // function should be marked with a cold or inlinehint attribute).
2128 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2131 PSI->refresh();
2132
2133 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2134 collectComdatMembers(M, ComdatMembers);
2135 std::vector<Function *> HotFunctions;
2136 std::vector<Function *> ColdFunctions;
2137
2138 // If the profile marked as always instrument the entry BB, do the
2139 // same. Note this can be overwritten by the internal option in CFGMST.h
2140 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2141 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2142 InstrumentFuncEntry = PGOInstrumentEntry;
2143
2144 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2145 for (auto &F : M) {
2146 if (skipPGOUse(F))
2147 continue;
2148 auto &TLI = LookupTLI(F);
2149 auto *BPI = LookupBPI(F);
2150 auto *BFI = LookupBFI(F);
2151 if (!HasSingleByteCoverage) {
2152 // Split indirectbr critical edges here before computing the MST rather
2153 // than later in getInstrBB() to avoid invalidating it.
2154 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2155 BFI);
2156 }
2157 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2158 InstrumentFuncEntry, HasSingleByteCoverage);
2159 if (HasSingleByteCoverage) {
2160 Func.populateCoverage(PGOReader.get());
2161 continue;
2162 }
2163 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2164 // it means the profile for the function is unrepresentative and this
2165 // function is actually hot / warm. We will reset the function hot / cold
2166 // attribute and drop all the profile counters.
2168 bool AllZeros = false;
2169 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2170 continue;
2171 if (AllZeros) {
2172 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2173 if (Func.getProgramMaxCount() != 0)
2174 ColdFunctions.push_back(&F);
2175 continue;
2176 }
2177 if (PseudoKind != InstrProfRecord::NotPseudo) {
2178 // Clear function attribute cold.
2179 if (F.hasFnAttribute(Attribute::Cold))
2180 F.removeFnAttr(Attribute::Cold);
2181 // Set function attribute as hot.
2182 if (PseudoKind == InstrProfRecord::PseudoHot)
2183 F.addFnAttr(Attribute::Hot);
2184 continue;
2185 }
2186 Func.populateCounters();
2187 Func.setBranchWeights();
2188 Func.annotateValueSites();
2189 Func.annotateIrrLoopHeaderWeights();
2190 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2191 if (FreqAttr == PGOUseFunc::FFA_Cold)
2192 ColdFunctions.push_back(&F);
2193 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2194 HotFunctions.push_back(&F);
2195 if (PGOViewCounts != PGOVCT_None &&
2196 (ViewBlockFreqFuncName.empty() ||
2197 F.getName() == ViewBlockFreqFuncName)) {
2199 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2200 std::make_unique<BranchProbabilityInfo>(F, LI);
2201 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2202 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2204 NewBFI->view();
2205 else if (PGOViewCounts == PGOVCT_Text) {
2206 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2207 NewBFI->print(dbgs());
2208 }
2209 }
2211 (ViewBlockFreqFuncName.empty() ||
2212 F.getName() == ViewBlockFreqFuncName)) {
2214 if (ViewBlockFreqFuncName.empty())
2215 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2216 else
2217 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2218 else if (PGOViewRawCounts == PGOVCT_Text) {
2219 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2220 Func.dumpInfo();
2221 }
2222 }
2223
2226 BranchProbabilityInfo NBPI(F, LI);
2227
2228 // Fix func entry count.
2229 if (PGOFixEntryCount)
2230 fixFuncEntryCount(Func, LI, NBPI);
2231
2232 // Verify BlockFrequency information.
2233 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2234 if (PGOVerifyHotBFI) {
2235 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2237 }
2238 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2239 }
2240 }
2241
2242 // Set function hotness attribute from the profile.
2243 // We have to apply these attributes at the end because their presence
2244 // can affect the BranchProbabilityInfo of any callers, resulting in an
2245 // inconsistent MST between prof-gen and prof-use.
2246 for (auto &F : HotFunctions) {
2247 F->addFnAttr(Attribute::InlineHint);
2248 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2249 << "\n");
2250 }
2251 for (auto &F : ColdFunctions) {
2252 // Only set when there is no Attribute::Hot set by the user. For Hot
2253 // attribute, user's annotation has the precedence over the profile.
2254 if (F->hasFnAttribute(Attribute::Hot)) {
2255 auto &Ctx = M.getContext();
2256 std::string Msg = std::string("Function ") + F->getName().str() +
2257 std::string(" is annotated as a hot function but"
2258 " the profile is cold");
2259 Ctx.diagnose(
2260 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2261 continue;
2262 }
2263 F->addFnAttr(Attribute::Cold);
2264 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2265 << "\n");
2266 }
2267 return true;
2268}
2269
2271 std::string Filename, std::string RemappingFilename, bool IsCS,
2273 : ProfileFileName(std::move(Filename)),
2274 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2275 FS(std::move(VFS)) {
2276 if (!PGOTestProfileFile.empty())
2277 ProfileFileName = PGOTestProfileFile;
2278 if (!PGOTestProfileRemappingFile.empty())
2279 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2280 if (!FS)
2282}
2283
2286
2287 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2288 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2290 };
2291 auto LookupBPI = [&FAM](Function &F) {
2293 };
2294 auto LookupBFI = [&FAM](Function &F) {
2296 };
2297
2298 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2299 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2300 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2301 return PreservedAnalyses::all();
2302
2303 return PreservedAnalyses::none();
2304}
2305
2306static std::string getSimpleNodeName(const BasicBlock *Node) {
2307 if (!Node->getName().empty())
2308 return Node->getName().str();
2309
2310 std::string SimpleNodeName;
2311 raw_string_ostream OS(SimpleNodeName);
2312 Node->printAsOperand(OS, false);
2313 return SimpleNodeName;
2314}
2315
2317 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2318 assert(MaxCount > 0 && "Bad max count");
2319 uint64_t Scale = calculateCountScale(MaxCount);
2321 for (const auto &ECI : EdgeCounts)
2322 Weights.push_back(scaleBranchCount(ECI, Scale));
2323
2324 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2325 : Weights) {
2326 dbgs() << W << " ";
2327 } dbgs() << "\n";);
2328
2329 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2330
2331 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2333 std::string BrCondStr = getBranchCondString(TI);
2334 if (BrCondStr.empty())
2335 return;
2336
2337 uint64_t WSum =
2338 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2339 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2340 uint64_t TotalCount =
2341 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2342 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2343 Scale = calculateCountScale(WSum);
2344 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2345 scaleBranchCount(WSum, Scale));
2346 std::string BranchProbStr;
2347 raw_string_ostream OS(BranchProbStr);
2348 OS << BP;
2349 OS << " (total count : " << TotalCount << ")";
2350 OS.flush();
2351 Function *F = TI->getParent()->getParent();
2353 ORE.emit([&]() {
2354 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2355 << BrCondStr << " is true with probability : " << BranchProbStr;
2356 });
2357 }
2358}
2359
2360namespace llvm {
2361
2363 MDBuilder MDB(M->getContext());
2364 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2365 MDB.createIrrLoopHeaderWeight(Count));
2366}
2367
2368template <> struct GraphTraits<PGOUseFunc *> {
2369 using NodeRef = const BasicBlock *;
2372
2373 static NodeRef getEntryNode(const PGOUseFunc *G) {
2374 return &G->getFunc().front();
2375 }
2376
2378 return succ_begin(N);
2379 }
2380
2381 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2382
2383 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2384 return nodes_iterator(G->getFunc().begin());
2385 }
2386
2387 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2388 return nodes_iterator(G->getFunc().end());
2389 }
2390};
2391
2392template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2393 explicit DOTGraphTraits(bool isSimple = false)
2395
2396 static std::string getGraphName(const PGOUseFunc *G) {
2397 return std::string(G->getFunc().getName());
2398 }
2399
2400 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2401 std::string Result;
2402 raw_string_ostream OS(Result);
2403
2404 OS << getSimpleNodeName(Node) << ":\\l";
2405 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2406 OS << "Count : ";
2407 if (BI && BI->Count)
2408 OS << *BI->Count << "\\l";
2409 else
2410 OS << "Unknown\\l";
2411
2412 if (!PGOInstrSelect)
2413 return Result;
2414
2415 for (const Instruction &I : *Node) {
2416 if (!isa<SelectInst>(&I))
2417 continue;
2418 // Display scaled counts for SELECT instruction:
2419 OS << "SELECT : { T = ";
2420 uint64_t TC, FC;
2421 bool HasProf = extractBranchWeights(I, TC, FC);
2422 if (!HasProf)
2423 OS << "Unknown, F = Unknown }\\l";
2424 else
2425 OS << TC << ", F = " << FC << " }\\l";
2426 }
2427 return Result;
2428 }
2429};
2430
2431} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, PGOInstrumentationType InstrumentationType)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:994
Class for arbitrary precision integers.
Definition: APInt.h:78
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:296
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:550
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:413
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:255
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:344
Metadata node.
Definition: Metadata.h:1069
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1428
Tuple of metadata.
Definition: Metadata.h:1472
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1499
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:204
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:379
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1408
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:368
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
Definition: InstrProf.cpp:1412
PGOInstrumentationType
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:395
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:467
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1282
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:56
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:271
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1464
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1487
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1856
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:282
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:827
std::vector< uint64_t > Counts
Definition: InstrProf.h:828
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:925
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1027
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1008