LLVM 19.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
116#include <algorithm>
117#include <cassert>
118#include <cstdint>
119#include <memory>
120#include <numeric>
121#include <optional>
122#include <string>
123#include <unordered_map>
124#include <utility>
125#include <vector>
126
127using namespace llvm;
130
131#define DEBUG_TYPE "pgo-instrumentation"
132
133STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
134STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
135STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
136STATISTIC(NumOfPGOEdge, "Number of edges.");
137STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
138STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
139STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
140STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
141STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
142STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
143STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
144STATISTIC(NumOfCSPGOSelectInsts,
145 "Number of select instruction instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOMemIntrinsics,
147 "Number of mem intrinsics instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
149STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
150STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
151STATISTIC(NumOfCSPGOFunc,
152 "Number of functions having valid profile counts in CSPGO.");
153STATISTIC(NumOfCSPGOMismatch,
154 "Number of functions having mismatch profile in CSPGO.");
155STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
156STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
157
158// Command line option to specify the file to read profile from. This is
159// mainly used for testing.
161 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
162 cl::value_desc("filename"),
163 cl::desc("Specify the path of profile data file. This is"
164 "mainly for test purpose."));
166 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
167 cl::value_desc("filename"),
168 cl::desc("Specify the path of profile remapping file. This is mainly for "
169 "test purpose."));
170
171// Command line option to disable value profiling. The default is false:
172// i.e. value profiling is enabled by default. This is for debug purpose.
173static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
175 cl::desc("Disable Value Profiling"));
176
177// Command line option to set the maximum number of VP annotations to write to
178// the metadata for a single indirect call callsite.
180 "icp-max-annotations", cl::init(3), cl::Hidden,
181 cl::desc("Max number of annotations for a single indirect "
182 "call callsite"));
183
184// Command line option to set the maximum number of value annotations
185// to write to the metadata for a single memop intrinsic.
187 "memop-max-annotations", cl::init(4), cl::Hidden,
188 cl::desc("Max number of preicise value annotations for a single memop"
189 "intrinsic"));
190
191// Command line option to control appending FunctionHash to the name of a COMDAT
192// function. This is to avoid the hash mismatch caused by the preinliner.
194 "do-comdat-renaming", cl::init(false), cl::Hidden,
195 cl::desc("Append function hash to the name of COMDAT function to avoid "
196 "function hash mismatch due to the preinliner"));
197
198namespace llvm {
199// Command line option to enable/disable the warning about missing profile
200// information.
201cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
203 cl::desc("Use this option to turn on/off "
204 "warnings about missing profile data for "
205 "functions."));
206
207// Command line option to enable/disable the warning about a hash mismatch in
208// the profile data.
210 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
211 cl::desc("Use this option to turn off/on "
212 "warnings about profile cfg mismatch."));
213
214// Command line option to enable/disable the warning about a hash mismatch in
215// the profile data for Comdat functions, which often turns out to be false
216// positive due to the pre-instrumentation inline.
218 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
219 cl::desc("The option is used to turn on/off "
220 "warnings about hash mismatch for comdat "
221 "or weak functions."));
222} // namespace llvm
223
224// Command line option to enable/disable select instruction instrumentation.
225static cl::opt<bool>
226 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
227 cl::desc("Use this option to turn on/off SELECT "
228 "instruction instrumentation. "));
229
230// Command line option to turn on CFG dot or text dump of raw profile counts
232 "pgo-view-raw-counts", cl::Hidden,
233 cl::desc("A boolean option to show CFG dag or text "
234 "with raw profile counts from "
235 "profile data. See also option "
236 "-pgo-view-counts. To limit graph "
237 "display to only one function, use "
238 "filtering option -view-bfi-func-name."),
239 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
240 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
241 clEnumValN(PGOVCT_Text, "text", "show in text.")));
242
243// Command line option to enable/disable memop intrinsic call.size profiling.
244static cl::opt<bool>
245 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
246 cl::desc("Use this option to turn on/off "
247 "memory intrinsic size profiling."));
248
249// Emit branch probability as optimization remarks.
250static cl::opt<bool>
251 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
252 cl::desc("When this option is on, the annotated "
253 "branch probability will be emitted as "
254 "optimization remarks: -{Rpass|"
255 "pass-remarks}=pgo-instrumentation"));
256
258 "pgo-instrument-entry", cl::init(false), cl::Hidden,
259 cl::desc("Force to instrument function entry basicblock."));
260
262 "pgo-function-entry-coverage", cl::Hidden,
263 cl::desc(
264 "Use this option to enable function entry coverage instrumentation."));
265
267 "pgo-block-coverage",
268 cl::desc("Use this option to enable basic block coverage instrumentation"));
269
270static cl::opt<bool>
271 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
272 cl::desc("Create a dot file of CFGs with block "
273 "coverage inference information"));
274
276 "pgo-temporal-instrumentation",
277 cl::desc("Use this option to enable temporal instrumentation"));
278
279static cl::opt<bool>
280 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
281 cl::desc("Fix function entry count in profile use."));
282
284 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
285 cl::desc("Print out the non-match BFI count if a hot raw profile count "
286 "becomes non-hot, or a cold raw profile count becomes hot. "
287 "The print is enabled under -Rpass-analysis=pgo, or "
288 "internal option -pass-remakrs-analysis=pgo."));
289
291 "pgo-verify-bfi", cl::init(false), cl::Hidden,
292 cl::desc("Print out mismatched BFI counts after setting profile metadata "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remakrs-analysis=pgo."));
295
297 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
298 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
299 "mismatched BFI if the difference percentage is greater than "
300 "this value (in percentage)."));
301
303 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
304 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
305 "profile count value is below."));
306
308 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
309 cl::value_desc("function name"),
310 cl::desc("Trace the hash of the function with this name."));
311
313 "pgo-function-size-threshold", cl::Hidden,
314 cl::desc("Do not instrument functions smaller than this threshold."));
315
317 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
318 cl::desc("Do not instrument functions with the number of critical edges "
319 " greater than this threshold."));
320
321namespace llvm {
322// Command line option to turn on CFG dot dump after profile annotation.
323// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
325
326// Command line option to specify the name of the function for CFG dump
327// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
329
331} // namespace llvm
332
333// Return a string describing the branch condition that can be
334// used in static branch probability heuristics:
335static std::string getBranchCondString(Instruction *TI) {
336 BranchInst *BI = dyn_cast<BranchInst>(TI);
337 if (!BI || !BI->isConditional())
338 return std::string();
339
340 Value *Cond = BI->getCondition();
341 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
342 if (!CI)
343 return std::string();
344
345 std::string result;
346 raw_string_ostream OS(result);
347 OS << CI->getPredicate() << "_";
348 CI->getOperand(0)->getType()->print(OS, true);
349
350 Value *RHS = CI->getOperand(1);
351 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
352 if (CV) {
353 if (CV->isZero())
354 OS << "_Zero";
355 else if (CV->isOne())
356 OS << "_One";
357 else if (CV->isMinusOne())
358 OS << "_MinusOne";
359 else
360 OS << "_Const";
361 }
362 OS.flush();
363 return result;
364}
365
366static const char *ValueProfKindDescr[] = {
367#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
369};
370
371// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
372// aware this is an ir_level profile so it can set the version flag.
374 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
375 Type *IntTy64 = Type::getInt64Ty(M.getContext());
376 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
377 if (IsCS)
378 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
380 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
382 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
384 ProfileVersion |=
385 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
387 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
389 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
390 auto IRLevelVersionVariable = new GlobalVariable(
391 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
392 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
393 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
394 Triple TT(M.getTargetTriple());
395 if (TT.supportsCOMDAT()) {
396 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
397 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
398 }
399 return IRLevelVersionVariable;
400}
401
402namespace {
403
404/// The select instruction visitor plays three roles specified
405/// by the mode. In \c VM_counting mode, it simply counts the number of
406/// select instructions. In \c VM_instrument mode, it inserts code to count
407/// the number times TrueValue of select is taken. In \c VM_annotate mode,
408/// it reads the profile data and annotate the select instruction with metadata.
409enum VisitMode { VM_counting, VM_instrument, VM_annotate };
410class PGOUseFunc;
411
412/// Instruction Visitor class to visit select instructions.
413struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
414 Function &F;
415 unsigned NSIs = 0; // Number of select instructions instrumented.
416 VisitMode Mode = VM_counting; // Visiting mode.
417 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
418 unsigned TotalNumCtrs = 0; // Total number of counters
419 GlobalVariable *FuncNameVar = nullptr;
420 uint64_t FuncHash = 0;
421 PGOUseFunc *UseFunc = nullptr;
422 bool HasSingleByteCoverage;
423
424 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
425 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
426
427 void countSelects() {
428 NSIs = 0;
429 Mode = VM_counting;
430 visit(F);
431 }
432
433 // Visit the IR stream and instrument all select instructions. \p
434 // Ind is a pointer to the counter index variable; \p TotalNC
435 // is the total number of counters; \p FNV is the pointer to the
436 // PGO function name var; \p FHash is the function hash.
437 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
438 uint64_t FHash) {
439 Mode = VM_instrument;
440 CurCtrIdx = Ind;
441 TotalNumCtrs = TotalNC;
442 FuncHash = FHash;
443 FuncNameVar = FNV;
444 visit(F);
445 }
446
447 // Visit the IR stream and annotate all select instructions.
448 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
449 Mode = VM_annotate;
450 UseFunc = UF;
451 CurCtrIdx = Ind;
452 visit(F);
453 }
454
455 void instrumentOneSelectInst(SelectInst &SI);
456 void annotateOneSelectInst(SelectInst &SI);
457
458 // Visit \p SI instruction and perform tasks according to visit mode.
459 void visitSelectInst(SelectInst &SI);
460
461 // Return the number of select instructions. This needs be called after
462 // countSelects().
463 unsigned getNumOfSelectInsts() const { return NSIs; }
464};
465
466/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
467/// based instrumentation.
468/// Note that the CFG can be a multi-graph. So there might be multiple edges
469/// with the same SrcBB and DestBB.
470struct PGOEdge {
471 BasicBlock *SrcBB;
472 BasicBlock *DestBB;
473 uint64_t Weight;
474 bool InMST = false;
475 bool Removed = false;
476 bool IsCritical = false;
477
478 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
479 : SrcBB(Src), DestBB(Dest), Weight(W) {}
480
481 /// Return the information string of an edge.
482 std::string infoString() const {
483 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
484 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
485 .str();
486 }
487};
488
489/// This class stores the auxiliary information for each BB in the MST.
490struct PGOBBInfo {
491 PGOBBInfo *Group;
493 uint32_t Rank = 0;
494
495 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
496
497 /// Return the information string of this object.
498 std::string infoString() const {
499 return (Twine("Index=") + Twine(Index)).str();
500 }
501};
502
503// This class implements the CFG edges. Note the CFG can be a multi-graph.
504template <class Edge, class BBInfo> class FuncPGOInstrumentation {
505private:
506 Function &F;
507
508 // Is this is context-sensitive instrumentation.
509 bool IsCS;
510
511 // A map that stores the Comdat group in function F.
512 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
513
515
516 void computeCFGHash();
517 void renameComdatFunction();
518
519public:
520 const TargetLibraryInfo &TLI;
521 std::vector<std::vector<VPCandidateInfo>> ValueSites;
522 SelectInstVisitor SIVisitor;
523 std::string FuncName;
524 std::string DeprecatedFuncName;
525 GlobalVariable *FuncNameVar;
526
527 // CFG hash value for this function.
528 uint64_t FunctionHash = 0;
529
530 // The Minimum Spanning Tree of function CFG.
532
533 const std::optional<BlockCoverageInference> BCI;
534
535 static std::optional<BlockCoverageInference>
536 constructBCI(Function &Func, bool HasSingleByteCoverage,
537 bool InstrumentFuncEntry) {
538 if (HasSingleByteCoverage)
539 return BlockCoverageInference(Func, InstrumentFuncEntry);
540 return {};
541 }
542
543 // Collect all the BBs that will be instrumented, and store them in
544 // InstrumentBBs.
545 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
546
547 // Give an edge, find the BB that will be instrumented.
548 // Return nullptr if there is no BB to be instrumented.
549 BasicBlock *getInstrBB(Edge *E);
550
551 // Return the auxiliary BB information.
552 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
553
554 // Return the auxiliary BB information if available.
555 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
556
557 // Dump edges and BB information.
558 void dumpInfo(StringRef Str = "") const {
559 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
560 " Hash: " + Twine(FunctionHash) + "\t" + Str);
561 }
562
563 FuncPGOInstrumentation(
564 Function &Func, TargetLibraryInfo &TLI,
565 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
566 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
567 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
568 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
569 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
570 TLI(TLI), ValueSites(IPVK_Last + 1),
571 SIVisitor(Func, HasSingleByteCoverage),
572 MST(F, InstrumentFuncEntry, BPI, BFI),
573 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
574 if (BCI && PGOViewBlockCoverageGraph)
575 BCI->viewBlockCoverageGraph();
576 // This should be done before CFG hash computation.
577 SIVisitor.countSelects();
578 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
579 if (!IsCS) {
580 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
581 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
582 NumOfPGOBB += MST.bbInfoSize();
583 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
584 } else {
585 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
586 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
587 NumOfCSPGOBB += MST.bbInfoSize();
588 }
589
590 FuncName = getIRPGOFuncName(F);
591 DeprecatedFuncName = getPGOFuncName(F);
592 computeCFGHash();
593 if (!ComdatMembers.empty())
594 renameComdatFunction();
595 LLVM_DEBUG(dumpInfo("after CFGMST"));
596
597 for (const auto &E : MST.allEdges()) {
598 if (E->Removed)
599 continue;
600 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
601 if (!E->InMST)
602 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
603 }
604
605 if (CreateGlobalVar)
606 FuncNameVar = createPGOFuncNameVar(F, FuncName);
607 }
608};
609
610} // end anonymous namespace
611
612// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
613// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
614// of selects, indirect calls, mem ops and edges.
615template <class Edge, class BBInfo>
616void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
617 std::vector<uint8_t> Indexes;
618 JamCRC JC;
619 for (auto &BB : F) {
620 for (BasicBlock *Succ : successors(&BB)) {
621 auto BI = findBBInfo(Succ);
622 if (BI == nullptr)
623 continue;
624 uint32_t Index = BI->Index;
625 for (int J = 0; J < 4; J++)
626 Indexes.push_back((uint8_t)(Index >> (J * 8)));
627 }
628 }
629 JC.update(Indexes);
630
631 JamCRC JCH;
632 // The higher 32 bits.
633 auto updateJCH = [&JCH](uint64_t Num) {
634 uint8_t Data[8];
636 JCH.update(Data);
637 };
638 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
639 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
640 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
641 if (BCI) {
642 updateJCH(BCI->getInstrumentedBlocksHash());
643 } else {
644 updateJCH((uint64_t)MST.numEdges());
645 }
646
647 // Hash format for context sensitive profile. Reserve 4 bits for other
648 // information.
649 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
650
651 // Reserve bit 60-63 for other information purpose.
652 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
653 if (IsCS)
655 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
656 << " CRC = " << JC.getCRC()
657 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
658 << ", Edges = " << MST.numEdges() << ", ICSites = "
659 << ValueSites[IPVK_IndirectCallTarget].size()
660 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
661 << ", High32 CRC = " << JCH.getCRC()
662 << ", Hash = " << FunctionHash << "\n";);
663
664 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
665 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
666 << " in building " << F.getParent()->getSourceFileName() << "\n";
667}
668
669// Check if we can safely rename this Comdat function.
670static bool canRenameComdat(
671 Function &F,
672 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
673 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
674 return false;
675
676 // FIXME: Current only handle those Comdat groups that only containing one
677 // function.
678 // (1) For a Comdat group containing multiple functions, we need to have a
679 // unique postfix based on the hashes for each function. There is a
680 // non-trivial code refactoring to do this efficiently.
681 // (2) Variables can not be renamed, so we can not rename Comdat function in a
682 // group including global vars.
683 Comdat *C = F.getComdat();
684 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
685 assert(!isa<GlobalAlias>(CM.second));
686 Function *FM = dyn_cast<Function>(CM.second);
687 if (FM != &F)
688 return false;
689 }
690 return true;
691}
692
693// Append the CFGHash to the Comdat function name.
694template <class Edge, class BBInfo>
695void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
696 if (!canRenameComdat(F, ComdatMembers))
697 return;
698 std::string OrigName = F.getName().str();
699 std::string NewFuncName =
700 Twine(F.getName() + "." + Twine(FunctionHash)).str();
701 F.setName(Twine(NewFuncName));
703 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
704 Comdat *NewComdat;
705 Module *M = F.getParent();
706 // For AvailableExternallyLinkage functions, change the linkage to
707 // LinkOnceODR and put them into comdat. This is because after renaming, there
708 // is no backup external copy available for the function.
709 if (!F.hasComdat()) {
711 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
713 F.setComdat(NewComdat);
714 return;
715 }
716
717 // This function belongs to a single function Comdat group.
718 Comdat *OrigComdat = F.getComdat();
719 std::string NewComdatName =
720 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
721 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
722 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
723
724 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
725 // Must be a function.
726 cast<Function>(CM.second)->setComdat(NewComdat);
727 }
728}
729
730/// Collect all the BBs that will be instruments and add them to
731/// `InstrumentBBs`.
732template <class Edge, class BBInfo>
733void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
734 std::vector<BasicBlock *> &InstrumentBBs) {
735 if (BCI) {
736 for (auto &BB : F)
737 if (BCI->shouldInstrumentBlock(BB))
738 InstrumentBBs.push_back(&BB);
739 return;
740 }
741
742 // Use a worklist as we will update the vector during the iteration.
743 std::vector<Edge *> EdgeList;
744 EdgeList.reserve(MST.numEdges());
745 for (const auto &E : MST.allEdges())
746 EdgeList.push_back(E.get());
747
748 for (auto &E : EdgeList) {
749 BasicBlock *InstrBB = getInstrBB(E);
750 if (InstrBB)
751 InstrumentBBs.push_back(InstrBB);
752 }
753}
754
755// Given a CFG E to be instrumented, find which BB to place the instrumented
756// code. The function will split the critical edge if necessary.
757template <class Edge, class BBInfo>
758BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
759 if (E->InMST || E->Removed)
760 return nullptr;
761
762 BasicBlock *SrcBB = E->SrcBB;
763 BasicBlock *DestBB = E->DestBB;
764 // For a fake edge, instrument the real BB.
765 if (SrcBB == nullptr)
766 return DestBB;
767 if (DestBB == nullptr)
768 return SrcBB;
769
770 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
771 // There are basic blocks (such as catchswitch) cannot be instrumented.
772 // If the returned first insertion point is the end of BB, skip this BB.
773 if (BB->getFirstInsertionPt() == BB->end())
774 return nullptr;
775 return BB;
776 };
777
778 // Instrument the SrcBB if it has a single successor,
779 // otherwise, the DestBB if this is not a critical edge.
780 Instruction *TI = SrcBB->getTerminator();
781 if (TI->getNumSuccessors() <= 1)
782 return canInstrument(SrcBB);
783 if (!E->IsCritical)
784 return canInstrument(DestBB);
785
786 // Some IndirectBr critical edges cannot be split by the previous
787 // SplitIndirectBrCriticalEdges call. Bail out.
788 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
789 BasicBlock *InstrBB =
790 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
791 if (!InstrBB) {
793 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
794 return nullptr;
795 }
796 // For a critical edge, we have to split. Instrument the newly
797 // created BB.
798 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
799 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
800 << " --> " << getBBInfo(DestBB).Index << "\n");
801 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
802 MST.addEdge(SrcBB, InstrBB, 0);
803 // Second one: Add new edge of InstrBB->DestBB.
804 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
805 NewEdge1.InMST = true;
806 E->Removed = true;
807
808 return canInstrument(InstrBB);
809}
810
811// When generating value profiling calls on Windows routines that make use of
812// handler funclets for exception processing an operand bundle needs to attached
813// to the called function. This routine will set \p OpBundles to contain the
814// funclet information, if any is needed, that should be placed on the generated
815// value profiling call for the value profile candidate call.
816static void
820 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
821 if (!OrigCall)
822 return;
823
824 if (!isa<IntrinsicInst>(OrigCall)) {
825 // The instrumentation call should belong to the same funclet as a
826 // non-intrinsic call, so just copy the operand bundle, if any exists.
827 std::optional<OperandBundleUse> ParentFunclet =
828 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
829 if (ParentFunclet)
830 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
831 } else {
832 // Intrinsics or other instructions do not get funclet information from the
833 // front-end. Need to use the BlockColors that was computed by the routine
834 // colorEHFunclets to determine whether a funclet is needed.
835 if (!BlockColors.empty()) {
836 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
837 assert(CV.size() == 1 && "non-unique color for block!");
838 Instruction *EHPad = CV.front()->getFirstNonPHI();
839 if (EHPad->isEHPad())
840 OpBundles.emplace_back("funclet", EHPad);
841 }
842 }
843}
844
845// Visit all edge and instrument the edges not in MST, and do value profiling.
846// Critical edges will be split.
850 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
851 bool IsCS) {
852 if (!PGOBlockCoverage) {
853 // Split indirectbr critical edges here before computing the MST rather than
854 // later in getInstrBB() to avoid invalidating it.
855 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
856 }
857
858 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
859 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
861
862 auto Name = FuncInfo.FuncNameVar;
863 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
864 FuncInfo.FunctionHash);
866 auto &EntryBB = F.getEntryBlock();
867 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
868 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
869 // i32 <index>)
870 Builder.CreateCall(
871 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
872 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
873 return;
874 }
875
876 std::vector<BasicBlock *> InstrumentBBs;
877 FuncInfo.getInstrumentBBs(InstrumentBBs);
878 unsigned NumCounters =
879 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
880
881 uint32_t I = 0;
883 NumCounters += PGOBlockCoverage ? 8 : 1;
884 auto &EntryBB = F.getEntryBlock();
885 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
886 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
887 // i32 <index>)
888 Builder.CreateCall(
889 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
890 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
891 I += PGOBlockCoverage ? 8 : 1;
892 }
893
894 for (auto *InstrBB : InstrumentBBs) {
895 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
896 assert(Builder.GetInsertPoint() != InstrBB->end() &&
897 "Cannot get the Instrumentation point");
898 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
899 // i32 <index>)
900 Builder.CreateCall(
902 ? Intrinsic::instrprof_cover
903 : Intrinsic::instrprof_increment),
904 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
905 }
906
907 // Now instrument select instructions:
908 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
909 FuncInfo.FunctionHash);
910 assert(I == NumCounters);
911
913 return;
914
915 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
916
917 // Intrinsic function calls do not have funclet operand bundles needed for
918 // Windows exception handling attached to them. However, if value profiling is
919 // inserted for one of these calls, then a funclet value will need to be set
920 // on the instrumentation call based on the funclet coloring.
922 if (F.hasPersonalityFn() &&
924 BlockColors = colorEHFunclets(F);
925
926 // For each VP Kind, walk the VP candidates and instrument each one.
927 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
928 unsigned SiteIndex = 0;
929 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
930 continue;
931
932 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
933 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
934 << " site: CallSite Index = " << SiteIndex << "\n");
935
936 IRBuilder<> Builder(Cand.InsertPt);
937 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
938 "Cannot get the Instrumentation point");
939
940 Value *ToProfile = nullptr;
941 if (Cand.V->getType()->isIntegerTy())
942 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
943 else if (Cand.V->getType()->isPointerTy())
944 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
945 assert(ToProfile && "value profiling Value is of unexpected type");
946
948 populateEHOperandBundle(Cand, BlockColors, OpBundles);
949 Builder.CreateCall(
950 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
951 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
952 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
953 OpBundles);
954 }
955 } // IPVK_First <= Kind <= IPVK_Last
956}
957
958namespace {
959
960// This class represents a CFG edge in profile use compilation.
961struct PGOUseEdge : public PGOEdge {
962 using PGOEdge::PGOEdge;
963
964 bool CountValid = false;
965 uint64_t CountValue = 0;
966
967 // Set edge count value
968 void setEdgeCount(uint64_t Value) {
969 CountValue = Value;
970 CountValid = true;
971 }
972
973 // Return the information string for this object.
974 std::string infoString() const {
975 if (!CountValid)
976 return PGOEdge::infoString();
977 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
978 .str();
979 }
980};
981
982using DirectEdges = SmallVector<PGOUseEdge *, 2>;
983
984// This class stores the auxiliary information for each BB.
985struct PGOUseBBInfo : public PGOBBInfo {
986 uint64_t CountValue = 0;
987 bool CountValid;
988 int32_t UnknownCountInEdge = 0;
989 int32_t UnknownCountOutEdge = 0;
990 DirectEdges InEdges;
991 DirectEdges OutEdges;
992
993 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX), CountValid(false) {}
994
995 // Set the profile count value for this BB.
996 void setBBInfoCount(uint64_t Value) {
997 CountValue = Value;
998 CountValid = true;
999 }
1000
1001 // Return the information string of this object.
1002 std::string infoString() const {
1003 if (!CountValid)
1004 return PGOBBInfo::infoString();
1005 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(CountValue))
1006 .str();
1007 }
1008
1009 // Add an OutEdge and update the edge count.
1010 void addOutEdge(PGOUseEdge *E) {
1011 OutEdges.push_back(E);
1012 UnknownCountOutEdge++;
1013 }
1014
1015 // Add an InEdge and update the edge count.
1016 void addInEdge(PGOUseEdge *E) {
1017 InEdges.push_back(E);
1018 UnknownCountInEdge++;
1019 }
1020};
1021
1022} // end anonymous namespace
1023
1024// Sum up the count values for all the edges.
1026 uint64_t Total = 0;
1027 for (const auto &E : Edges) {
1028 if (E->Removed)
1029 continue;
1030 Total += E->CountValue;
1031 }
1032 return Total;
1033}
1034
1035namespace {
1036
1037class PGOUseFunc {
1038public:
1039 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1040 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1042 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1043 bool HasSingleByteCoverage)
1044 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1045 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1046 InstrumentFuncEntry, HasSingleByteCoverage),
1047 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1048
1049 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1050
1051 // Read counts for the instrumented BB from profile.
1052 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1054
1055 // Populate the counts for all BBs.
1056 void populateCounters();
1057
1058 // Set block coverage based on profile coverage values.
1059 void populateCoverage(IndexedInstrProfReader *PGOReader);
1060
1061 // Set the branch weights based on the count values.
1062 void setBranchWeights();
1063
1064 // Annotate the value profile call sites for all value kind.
1065 void annotateValueSites();
1066
1067 // Annotate the value profile call sites for one value kind.
1068 void annotateValueSites(uint32_t Kind);
1069
1070 // Annotate the irreducible loop header weights.
1071 void annotateIrrLoopHeaderWeights();
1072
1073 // The hotness of the function from the profile count.
1074 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1075
1076 // Return the function hotness from the profile.
1077 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1078
1079 // Return the function hash.
1080 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1081
1082 // Return the profile record for this function;
1083 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1084
1085 // Return the auxiliary BB information.
1086 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1087 return FuncInfo.getBBInfo(BB);
1088 }
1089
1090 // Return the auxiliary BB information if available.
1091 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1092 return FuncInfo.findBBInfo(BB);
1093 }
1094
1095 Function &getFunc() const { return F; }
1096
1097 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1098
1099 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1100
1101private:
1102 Function &F;
1103 Module *M;
1105 ProfileSummaryInfo *PSI;
1106
1107 // This member stores the shared information with class PGOGenFunc.
1108 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1109
1110 // The maximum count value in the profile. This is only used in PGO use
1111 // compilation.
1112 uint64_t ProgramMaxCount;
1113
1114 // Position of counter that remains to be read.
1115 uint32_t CountPosition = 0;
1116
1117 // Total size of the profile count for this function.
1118 uint32_t ProfileCountSize = 0;
1119
1120 // ProfileRecord for this function.
1121 InstrProfRecord ProfileRecord;
1122
1123 // Function hotness info derived from profile.
1124 FuncFreqAttr FreqAttr;
1125
1126 // Is to use the context sensitive profile.
1127 bool IsCS;
1128
1129 // Find the Instrumented BB and set the value. Return false on error.
1130 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1131
1132 // Set the edge counter value for the unknown edge -- there should be only
1133 // one unknown edge.
1134 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1135
1136 // Set the hot/cold inline hints based on the count values.
1137 // FIXME: This function should be removed once the functionality in
1138 // the inliner is implemented.
1139 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1140 if (PSI->isHotCount(EntryCount))
1141 FreqAttr = FFA_Hot;
1142 else if (PSI->isColdCount(MaxCount))
1143 FreqAttr = FFA_Cold;
1144 }
1145};
1146
1147} // end anonymous namespace
1148
1149/// Set up InEdges/OutEdges for all BBs in the MST.
1151 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1152 // This is not required when there is block coverage inference.
1153 if (FuncInfo.BCI)
1154 return;
1155 for (const auto &E : FuncInfo.MST.allEdges()) {
1156 if (E->Removed)
1157 continue;
1158 const BasicBlock *SrcBB = E->SrcBB;
1159 const BasicBlock *DestBB = E->DestBB;
1160 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1161 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1162 SrcInfo.addOutEdge(E.get());
1163 DestInfo.addInEdge(E.get());
1164 }
1165}
1166
1167// Visit all the edges and assign the count value for the instrumented
1168// edges and the BB. Return false on error.
1169bool PGOUseFunc::setInstrumentedCounts(
1170 const std::vector<uint64_t> &CountFromProfile) {
1171
1172 std::vector<BasicBlock *> InstrumentBBs;
1173 FuncInfo.getInstrumentBBs(InstrumentBBs);
1174
1175 setupBBInfoEdges(FuncInfo);
1176
1177 unsigned NumCounters =
1178 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1179 // The number of counters here should match the number of counters
1180 // in profile. Return if they mismatch.
1181 if (NumCounters != CountFromProfile.size()) {
1182 return false;
1183 }
1184 auto *FuncEntry = &*F.begin();
1185
1186 // Set the profile count to the Instrumented BBs.
1187 uint32_t I = 0;
1188 for (BasicBlock *InstrBB : InstrumentBBs) {
1189 uint64_t CountValue = CountFromProfile[I++];
1190 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1191 // If we reach here, we know that we have some nonzero count
1192 // values in this function. The entry count should not be 0.
1193 // Fix it if necessary.
1194 if (InstrBB == FuncEntry && CountValue == 0)
1195 CountValue = 1;
1196 Info.setBBInfoCount(CountValue);
1197 }
1198 ProfileCountSize = CountFromProfile.size();
1199 CountPosition = I;
1200
1201 // Set the edge count and update the count of unknown edges for BBs.
1202 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1203 E->setEdgeCount(Value);
1204 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1205 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1206 };
1207
1208 // Set the profile count the Instrumented edges. There are BBs that not in
1209 // MST but not instrumented. Need to set the edge count value so that we can
1210 // populate the profile counts later.
1211 for (const auto &E : FuncInfo.MST.allEdges()) {
1212 if (E->Removed || E->InMST)
1213 continue;
1214 const BasicBlock *SrcBB = E->SrcBB;
1215 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1216
1217 // If only one out-edge, the edge profile count should be the same as BB
1218 // profile count.
1219 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1220 setEdgeCount(E.get(), SrcInfo.CountValue);
1221 else {
1222 const BasicBlock *DestBB = E->DestBB;
1223 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1224 // If only one in-edge, the edge profile count should be the same as BB
1225 // profile count.
1226 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1227 setEdgeCount(E.get(), DestInfo.CountValue);
1228 }
1229 if (E->CountValid)
1230 continue;
1231 // E's count should have been set from profile. If not, this meenas E skips
1232 // the instrumentation. We set the count to 0.
1233 setEdgeCount(E.get(), 0);
1234 }
1235 return true;
1236}
1237
1238// Set the count value for the unknown edge. There should be one and only one
1239// unknown edge in Edges vector.
1240void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1241 for (auto &E : Edges) {
1242 if (E->CountValid)
1243 continue;
1244 E->setEdgeCount(Value);
1245
1246 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1247 getBBInfo(E->DestBB).UnknownCountInEdge--;
1248 return;
1249 }
1250 llvm_unreachable("Cannot find the unknown count edge");
1251}
1252
1253// Emit function metadata indicating PGO profile mismatch.
1255 const char MetadataName[] = "instr_prof_hash_mismatch";
1257 // If this metadata already exists, ignore.
1258 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1259 if (Existing) {
1260 MDTuple *Tuple = cast<MDTuple>(Existing);
1261 for (const auto &N : Tuple->operands()) {
1262 if (N.equalsStr(MetadataName))
1263 return;
1264 Names.push_back(N.get());
1265 }
1266 }
1267
1268 MDBuilder MDB(ctx);
1269 Names.push_back(MDB.createString(MetadataName));
1270 MDNode *MD = MDTuple::get(ctx, Names);
1271 F.setMetadata(LLVMContext::MD_annotation, MD);
1272}
1273
1274void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1275 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1276 auto &Ctx = M->getContext();
1277 auto Err = IPE.get();
1278 bool SkipWarning = false;
1279 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1280 << FuncInfo.FuncName << ": ");
1281 if (Err == instrprof_error::unknown_function) {
1282 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1283 SkipWarning = !PGOWarnMissing;
1284 LLVM_DEBUG(dbgs() << "unknown function");
1285 } else if (Err == instrprof_error::hash_mismatch ||
1286 Err == instrprof_error::malformed) {
1287 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1288 SkipWarning =
1291 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1293 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1294 << " skip=" << SkipWarning << ")");
1295 // Emit function metadata indicating PGO profile mismatch.
1296 annotateFunctionWithHashMismatch(F, M->getContext());
1297 }
1298
1299 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1300 if (SkipWarning)
1301 return;
1302
1303 std::string Msg =
1304 IPE.message() + std::string(" ") + F.getName().str() +
1305 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1306 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1307 std::string(" count discarded");
1308
1309 Ctx.diagnose(
1310 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1311 });
1312}
1313
1314// Read the profile from ProfileFileName and assign the value to the
1315// instrumented BB and the edges. This function also updates ProgramMaxCount.
1316// Return true if the profile are successfully read, and false on errors.
1317bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1319 auto &Ctx = M->getContext();
1320 uint64_t MismatchedFuncSum = 0;
1322 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1323 &MismatchedFuncSum);
1324 if (Error E = Result.takeError()) {
1325 handleInstrProfError(std::move(E), MismatchedFuncSum);
1326 return false;
1327 }
1328 ProfileRecord = std::move(Result.get());
1329 PseudoKind = ProfileRecord.getCountPseudoKind();
1330 if (PseudoKind != InstrProfRecord::NotPseudo) {
1331 return true;
1332 }
1333 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1334
1335 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1336 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1337
1338 uint64_t ValueSum = 0;
1339 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1340 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1341 ValueSum += CountFromProfile[I];
1342 }
1343 AllZeros = (ValueSum == 0);
1344
1345 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1346
1347 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1348 getBBInfo(nullptr).UnknownCountInEdge = 2;
1349
1350 if (!setInstrumentedCounts(CountFromProfile)) {
1351 LLVM_DEBUG(
1352 dbgs() << "Inconsistent number of counts, skipping this function");
1353 Ctx.diagnose(DiagnosticInfoPGOProfile(
1354 M->getName().data(),
1355 Twine("Inconsistent number of counts in ") + F.getName().str() +
1356 Twine(": the profile may be stale or there is a function name "
1357 "collision."),
1358 DS_Warning));
1359 return false;
1360 }
1361 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1362 return true;
1363}
1364
1365void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1366 uint64_t MismatchedFuncSum = 0;
1368 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1369 &MismatchedFuncSum);
1370 if (auto Err = Result.takeError()) {
1371 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1372 return;
1373 }
1374
1375 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1377 unsigned Index = 0;
1378 for (auto &BB : F)
1379 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1380 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1381 assert(Index == CountsFromProfile.size());
1382
1383 // For each B in InverseDependencies[A], if A is covered then B is covered.
1385 InverseDependencies;
1386 for (auto &BB : F) {
1387 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1388 // If Dep is covered then BB is covered.
1389 InverseDependencies[Dep].insert(&BB);
1390 }
1391 }
1392
1393 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1394 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1395 for (auto &[BB, IsCovered] : Coverage)
1396 if (IsCovered)
1397 CoveredBlocksToProcess.push(BB);
1398
1399 while (!CoveredBlocksToProcess.empty()) {
1400 auto *CoveredBlock = CoveredBlocksToProcess.top();
1401 assert(Coverage[CoveredBlock]);
1402 CoveredBlocksToProcess.pop();
1403 for (auto *BB : InverseDependencies[CoveredBlock]) {
1404 // If CoveredBlock is covered then BB is covered.
1405 if (Coverage[BB])
1406 continue;
1407 Coverage[BB] = true;
1408 CoveredBlocksToProcess.push(BB);
1409 }
1410 }
1411
1412 // Annotate block coverage.
1413 MDBuilder MDB(F.getContext());
1414 // We set the entry count to 10000 if the entry block is covered so that BFI
1415 // can propagate a fraction of this count to the other covered blocks.
1416 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1417 for (auto &BB : F) {
1418 // For a block A and its successor B, we set the edge weight as follows:
1419 // If A is covered and B is covered, set weight=1.
1420 // If A is covered and B is uncovered, set weight=0.
1421 // If A is uncovered, set weight=1.
1422 // This setup will allow BFI to give nonzero profile counts to only covered
1423 // blocks.
1425 for (auto *Succ : successors(&BB))
1426 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1427 if (Weights.size() >= 2)
1428 llvm::setBranchWeights(*BB.getTerminator(), Weights);
1429 }
1430
1431 unsigned NumCorruptCoverage = 0;
1432 DominatorTree DT(F);
1433 LoopInfo LI(DT);
1434 BranchProbabilityInfo BPI(F, LI);
1435 BlockFrequencyInfo BFI(F, BPI, LI);
1436 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1437 if (auto C = BFI.getBlockProfileCount(&BB))
1438 return C == 0;
1439 return {};
1440 };
1441 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1442 for (auto &BB : F) {
1443 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1444 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1445 << "\n");
1446 // In some cases it is possible to find a covered block that has no covered
1447 // successors, e.g., when a block calls a function that may call exit(). In
1448 // those cases, BFI could find its successor to be covered while BCI could
1449 // find its successor to be dead.
1450 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1451 LLVM_DEBUG(
1452 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1453 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1454 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1455 ++NumCorruptCoverage;
1456 }
1457 if (Coverage[&BB])
1458 ++NumCoveredBlocks;
1459 }
1460 if (PGOVerifyBFI && NumCorruptCoverage) {
1461 auto &Ctx = M->getContext();
1462 Ctx.diagnose(DiagnosticInfoPGOProfile(
1463 M->getName().data(),
1464 Twine("Found inconsistent block coverage for function ") + F.getName() +
1465 " in " + Twine(NumCorruptCoverage) + " blocks.",
1466 DS_Warning));
1467 }
1469 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1470}
1471
1472// Populate the counters from instrumented BBs to all BBs.
1473// In the end of this operation, all BBs should have a valid count value.
1474void PGOUseFunc::populateCounters() {
1475 bool Changes = true;
1476 unsigned NumPasses = 0;
1477 while (Changes) {
1478 NumPasses++;
1479 Changes = false;
1480
1481 // For efficient traversal, it's better to start from the end as most
1482 // of the instrumented edges are at the end.
1483 for (auto &BB : reverse(F)) {
1484 PGOUseBBInfo *Count = findBBInfo(&BB);
1485 if (Count == nullptr)
1486 continue;
1487 if (!Count->CountValid) {
1488 if (Count->UnknownCountOutEdge == 0) {
1489 Count->CountValue = sumEdgeCount(Count->OutEdges);
1490 Count->CountValid = true;
1491 Changes = true;
1492 } else if (Count->UnknownCountInEdge == 0) {
1493 Count->CountValue = sumEdgeCount(Count->InEdges);
1494 Count->CountValid = true;
1495 Changes = true;
1496 }
1497 }
1498 if (Count->CountValid) {
1499 if (Count->UnknownCountOutEdge == 1) {
1500 uint64_t Total = 0;
1501 uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1502 // If the one of the successor block can early terminate (no-return),
1503 // we can end up with situation where out edge sum count is larger as
1504 // the source BB's count is collected by a post-dominated block.
1505 if (Count->CountValue > OutSum)
1506 Total = Count->CountValue - OutSum;
1507 setEdgeCount(Count->OutEdges, Total);
1508 Changes = true;
1509 }
1510 if (Count->UnknownCountInEdge == 1) {
1511 uint64_t Total = 0;
1512 uint64_t InSum = sumEdgeCount(Count->InEdges);
1513 if (Count->CountValue > InSum)
1514 Total = Count->CountValue - InSum;
1515 setEdgeCount(Count->InEdges, Total);
1516 Changes = true;
1517 }
1518 }
1519 }
1520 }
1521
1522 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1523 (void)NumPasses;
1524#ifndef NDEBUG
1525 // Assert every BB has a valid counter.
1526 for (auto &BB : F) {
1527 auto BI = findBBInfo(&BB);
1528 if (BI == nullptr)
1529 continue;
1530 assert(BI->CountValid && "BB count is not valid");
1531 }
1532#endif
1533 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1534 uint64_t FuncMaxCount = FuncEntryCount;
1535 for (auto &BB : F) {
1536 auto BI = findBBInfo(&BB);
1537 if (BI == nullptr)
1538 continue;
1539 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1540 }
1541
1542 // Fix the obviously inconsistent entry count.
1543 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1544 FuncEntryCount = 1;
1546 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1547
1548 // Now annotate select instructions
1549 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1550 assert(CountPosition == ProfileCountSize);
1551
1552 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1553}
1554
1555// Assign the scaled count values to the BB with multiple out edges.
1556void PGOUseFunc::setBranchWeights() {
1557 // Generate MD_prof metadata for every branch instruction.
1558 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1559 << " IsCS=" << IsCS << "\n");
1560 for (auto &BB : F) {
1561 Instruction *TI = BB.getTerminator();
1562 if (TI->getNumSuccessors() < 2)
1563 continue;
1564 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1565 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1566 isa<CallBrInst>(TI)))
1567 continue;
1568
1569 if (getBBInfo(&BB).CountValue == 0)
1570 continue;
1571
1572 // We have a non-zero Branch BB.
1573 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1574 unsigned Size = BBCountInfo.OutEdges.size();
1575 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1576 uint64_t MaxCount = 0;
1577 for (unsigned s = 0; s < Size; s++) {
1578 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1579 const BasicBlock *SrcBB = E->SrcBB;
1580 const BasicBlock *DestBB = E->DestBB;
1581 if (DestBB == nullptr)
1582 continue;
1583 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1584 uint64_t EdgeCount = E->CountValue;
1585 if (EdgeCount > MaxCount)
1586 MaxCount = EdgeCount;
1587 EdgeCounts[SuccNum] = EdgeCount;
1588 }
1589
1590 if (MaxCount)
1591 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1592 else {
1593 // A zero MaxCount can come about when we have a BB with a positive
1594 // count, and whose successor blocks all have 0 count. This can happen
1595 // when there is no exit block and the code exits via a noreturn function.
1596 auto &Ctx = M->getContext();
1597 Ctx.diagnose(DiagnosticInfoPGOProfile(
1598 M->getName().data(),
1599 Twine("Profile in ") + F.getName().str() +
1600 Twine(" partially ignored") +
1601 Twine(", possibly due to the lack of a return path."),
1602 DS_Warning));
1603 }
1604 }
1605}
1606
1608 for (BasicBlock *Pred : predecessors(BB)) {
1609 if (isa<IndirectBrInst>(Pred->getTerminator()))
1610 return true;
1611 }
1612 return false;
1613}
1614
1615void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1616 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1617 // Find irr loop headers
1618 for (auto &BB : F) {
1619 // As a heuristic also annotate indrectbr targets as they have a high chance
1620 // to become an irreducible loop header after the indirectbr tail
1621 // duplication.
1622 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1623 Instruction *TI = BB.getTerminator();
1624 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1625 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1626 }
1627 }
1628}
1629
1630void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1631 Module *M = F.getParent();
1632 IRBuilder<> Builder(&SI);
1633 Type *Int64Ty = Builder.getInt64Ty();
1634 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1635 Builder.CreateCall(
1636 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1637 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1638 Builder.getInt32(*CurCtrIdx), Step});
1639 ++(*CurCtrIdx);
1640}
1641
1642void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1643 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1644 assert(*CurCtrIdx < CountFromProfile.size() &&
1645 "Out of bound access of counters");
1646 uint64_t SCounts[2];
1647 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1648 ++(*CurCtrIdx);
1649 uint64_t TotalCount = 0;
1650 auto BI = UseFunc->findBBInfo(SI.getParent());
1651 if (BI != nullptr)
1652 TotalCount = BI->CountValue;
1653 // False Count
1654 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1655 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1656 if (MaxCount)
1657 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1658}
1659
1660void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1661 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1662 return;
1663 // FIXME: do not handle this yet.
1664 if (SI.getCondition()->getType()->isVectorTy())
1665 return;
1666
1667 switch (Mode) {
1668 case VM_counting:
1669 NSIs++;
1670 return;
1671 case VM_instrument:
1672 instrumentOneSelectInst(SI);
1673 return;
1674 case VM_annotate:
1675 annotateOneSelectInst(SI);
1676 return;
1677 }
1678
1679 llvm_unreachable("Unknown visiting mode");
1680}
1681
1682// Traverse all valuesites and annotate the instructions for all value kind.
1683void PGOUseFunc::annotateValueSites() {
1685 return;
1686
1687 // Create the PGOFuncName meta data.
1688 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1689
1690 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1691 annotateValueSites(Kind);
1692}
1693
1694// Annotate the instructions for a specific value kind.
1695void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1696 assert(Kind <= IPVK_Last);
1697 unsigned ValueSiteIndex = 0;
1698 auto &ValueSites = FuncInfo.ValueSites[Kind];
1699 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1700 if (NumValueSites != ValueSites.size()) {
1701 auto &Ctx = M->getContext();
1702 Ctx.diagnose(DiagnosticInfoPGOProfile(
1703 M->getName().data(),
1704 Twine("Inconsistent number of value sites for ") +
1705 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1706 F.getName().str() +
1707 Twine("\", possibly due to the use of a stale profile."),
1708 DS_Warning));
1709 return;
1710 }
1711
1712 for (VPCandidateInfo &I : ValueSites) {
1713 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1714 << "): Index = " << ValueSiteIndex << " out of "
1715 << NumValueSites << "\n");
1716 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1717 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1718 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1720 ValueSiteIndex++;
1721 }
1722}
1723
1724// Collect the set of members for each Comdat in module M and store
1725// in ComdatMembers.
1727 Module &M,
1728 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1729 if (!DoComdatRenaming)
1730 return;
1731 for (Function &F : M)
1732 if (Comdat *C = F.getComdat())
1733 ComdatMembers.insert(std::make_pair(C, &F));
1734 for (GlobalVariable &GV : M.globals())
1735 if (Comdat *C = GV.getComdat())
1736 ComdatMembers.insert(std::make_pair(C, &GV));
1737 for (GlobalAlias &GA : M.aliases())
1738 if (Comdat *C = GA.getComdat())
1739 ComdatMembers.insert(std::make_pair(C, &GA));
1740}
1741
1742// Return true if we should not find instrumentation data for this function
1743static bool skipPGOUse(const Function &F) {
1744 if (F.isDeclaration())
1745 return true;
1746 // If there are too many critical edges, PGO might cause
1747 // compiler time problem. Skip PGO if the number of
1748 // critical edges execeed the threshold.
1749 unsigned NumCriticalEdges = 0;
1750 for (auto &BB : F) {
1751 const Instruction *TI = BB.getTerminator();
1752 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1753 if (isCriticalEdge(TI, I))
1754 NumCriticalEdges++;
1755 }
1756 }
1757 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1758 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1759 << ", NumCriticalEdges=" << NumCriticalEdges
1760 << " exceed the threshold. Skip PGO.\n");
1761 return true;
1762 }
1763 return false;
1764}
1765
1766// Return true if we should not instrument this function
1767static bool skipPGOGen(const Function &F) {
1768 if (skipPGOUse(F))
1769 return true;
1770 if (F.hasFnAttribute(llvm::Attribute::Naked))
1771 return true;
1772 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1773 return true;
1774 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1775 return true;
1776 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1777 return true;
1778 return false;
1779}
1780
1782 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1784 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1785 // For the context-sensitve instrumentation, we should have a separated pass
1786 // (before LTO/ThinLTO linking) to create these variables.
1787 if (!IsCS)
1788 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1789 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1790 collectComdatMembers(M, ComdatMembers);
1791
1792 for (auto &F : M) {
1793 if (skipPGOGen(F))
1794 continue;
1795 auto &TLI = LookupTLI(F);
1796 auto *BPI = LookupBPI(F);
1797 auto *BFI = LookupBFI(F);
1798 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1799 }
1800 return true;
1801}
1802
1805 createProfileFileNameVar(M, CSInstrName);
1806 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1807 // will be retained.
1812 return PA;
1813}
1814
1817 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1818 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1820 };
1821 auto LookupBPI = [&FAM](Function &F) {
1823 };
1824 auto LookupBFI = [&FAM](Function &F) {
1826 };
1827
1828 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1829 return PreservedAnalyses::all();
1830
1831 return PreservedAnalyses::none();
1832}
1833
1834// Using the ratio b/w sums of profile count values and BFI count values to
1835// adjust the func entry count.
1836static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1837 BranchProbabilityInfo &NBPI) {
1838 Function &F = Func.getFunc();
1839 BlockFrequencyInfo NBFI(F, NBPI, LI);
1840#ifndef NDEBUG
1841 auto BFIEntryCount = F.getEntryCount();
1842 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1843 "Invalid BFI Entrycount");
1844#endif
1845 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1846 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1847 for (auto &BBI : F) {
1848 uint64_t CountValue = 0;
1849 uint64_t BFICountValue = 0;
1850 if (!Func.findBBInfo(&BBI))
1851 continue;
1852 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1853 CountValue = Func.getBBInfo(&BBI).CountValue;
1854 BFICountValue = *BFICount;
1855 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1856 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1857 }
1858 if (SumCount.isZero())
1859 return;
1860
1861 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1862 "Incorrect sum of BFI counts");
1863 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1864 return;
1865 double Scale = (SumCount / SumBFICount).convertToDouble();
1866 if (Scale < 1.001 && Scale > 0.999)
1867 return;
1868
1869 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1870 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1871 if (NewEntryCount == 0)
1872 NewEntryCount = 1;
1873 if (NewEntryCount != FuncEntryCount) {
1874 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1875 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1876 << ", entry_count " << FuncEntryCount << " --> "
1877 << NewEntryCount << "\n");
1878 }
1879}
1880
1881// Compare the profile count values with BFI count values, and print out
1882// the non-matching ones.
1883static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1885 uint64_t HotCountThreshold,
1887 Function &F = Func.getFunc();
1888 BlockFrequencyInfo NBFI(F, NBPI, LI);
1889 // bool PrintFunc = false;
1890 bool HotBBOnly = PGOVerifyHotBFI;
1891 StringRef Msg;
1893
1894 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1895 for (auto &BBI : F) {
1896 uint64_t CountValue = 0;
1897 uint64_t BFICountValue = 0;
1898
1899 if (Func.getBBInfo(&BBI).CountValid)
1900 CountValue = Func.getBBInfo(&BBI).CountValue;
1901
1902 BBNum++;
1903 if (CountValue)
1904 NonZeroBBNum++;
1905 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1906 if (BFICount)
1907 BFICountValue = *BFICount;
1908
1909 if (HotBBOnly) {
1910 bool rawIsHot = CountValue >= HotCountThreshold;
1911 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1912 bool rawIsCold = CountValue <= ColdCountThreshold;
1913 bool ShowCount = false;
1914 if (rawIsHot && !BFIIsHot) {
1915 Msg = "raw-Hot to BFI-nonHot";
1916 ShowCount = true;
1917 } else if (rawIsCold && BFIIsHot) {
1918 Msg = "raw-Cold to BFI-Hot";
1919 ShowCount = true;
1920 }
1921 if (!ShowCount)
1922 continue;
1923 } else {
1924 if ((CountValue < PGOVerifyBFICutoff) &&
1925 (BFICountValue < PGOVerifyBFICutoff))
1926 continue;
1927 uint64_t Diff = (BFICountValue >= CountValue)
1928 ? BFICountValue - CountValue
1929 : CountValue - BFICountValue;
1930 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1931 continue;
1932 }
1933 BBMisMatchNum++;
1934
1935 ORE.emit([&]() {
1937 F.getSubprogram(), &BBI);
1938 Remark << "BB " << ore::NV("Block", BBI.getName())
1939 << " Count=" << ore::NV("Count", CountValue)
1940 << " BFI_Count=" << ore::NV("Count", BFICountValue);
1941 if (!Msg.empty())
1942 Remark << " (" << Msg << ")";
1943 return Remark;
1944 });
1945 }
1946 if (BBMisMatchNum)
1947 ORE.emit([&]() {
1948 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
1949 F.getSubprogram(), &F.getEntryBlock())
1950 << "In Func " << ore::NV("Function", F.getName())
1951 << ": Num_of_BB=" << ore::NV("Count", BBNum)
1952 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1953 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1954 });
1955}
1956
1958 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1959 vfs::FileSystem &FS,
1960 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1963 ProfileSummaryInfo *PSI, bool IsCS) {
1964 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1965 auto &Ctx = M.getContext();
1966 // Read the counter array from file.
1967 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
1968 ProfileRemappingFileName);
1969 if (Error E = ReaderOrErr.takeError()) {
1970 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1971 Ctx.diagnose(
1972 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1973 });
1974 return false;
1975 }
1976
1977 std::unique_ptr<IndexedInstrProfReader> PGOReader =
1978 std::move(ReaderOrErr.get());
1979 if (!PGOReader) {
1980 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1981 StringRef("Cannot get PGOReader")));
1982 return false;
1983 }
1984 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1985 return false;
1986
1987 // TODO: might need to change the warning once the clang option is finalized.
1988 if (!PGOReader->isIRLevelProfile()) {
1989 Ctx.diagnose(DiagnosticInfoPGOProfile(
1990 ProfileFileName.data(), "Not an IR level instrumentation profile"));
1991 return false;
1992 }
1993 if (PGOReader->functionEntryOnly()) {
1994 Ctx.diagnose(DiagnosticInfoPGOProfile(
1995 ProfileFileName.data(),
1996 "Function entry profiles are not yet supported for optimization"));
1997 return false;
1998 }
1999
2000 // Add the profile summary (read from the header of the indexed summary) here
2001 // so that we can use it below when reading counters (which checks if the
2002 // function should be marked with a cold or inlinehint attribute).
2003 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2006 PSI->refresh();
2007
2008 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2009 collectComdatMembers(M, ComdatMembers);
2010 std::vector<Function *> HotFunctions;
2011 std::vector<Function *> ColdFunctions;
2012
2013 // If the profile marked as always instrument the entry BB, do the
2014 // same. Note this can be overwritten by the internal option in CFGMST.h
2015 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2016 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2017 InstrumentFuncEntry = PGOInstrumentEntry;
2018 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2019 for (auto &F : M) {
2020 if (skipPGOUse(F))
2021 continue;
2022 auto &TLI = LookupTLI(F);
2023 auto *BPI = LookupBPI(F);
2024 auto *BFI = LookupBFI(F);
2025 if (!HasSingleByteCoverage) {
2026 // Split indirectbr critical edges here before computing the MST rather
2027 // than later in getInstrBB() to avoid invalidating it.
2028 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2029 BFI);
2030 }
2031 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2032 InstrumentFuncEntry, HasSingleByteCoverage);
2033 if (HasSingleByteCoverage) {
2034 Func.populateCoverage(PGOReader.get());
2035 continue;
2036 }
2037 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2038 // it means the profile for the function is unrepresentative and this
2039 // function is actually hot / warm. We will reset the function hot / cold
2040 // attribute and drop all the profile counters.
2042 bool AllZeros = false;
2043 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2044 continue;
2045 if (AllZeros) {
2046 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2047 if (Func.getProgramMaxCount() != 0)
2048 ColdFunctions.push_back(&F);
2049 continue;
2050 }
2051 if (PseudoKind != InstrProfRecord::NotPseudo) {
2052 // Clear function attribute cold.
2053 if (F.hasFnAttribute(Attribute::Cold))
2054 F.removeFnAttr(Attribute::Cold);
2055 // Set function attribute as hot.
2056 if (PseudoKind == InstrProfRecord::PseudoHot)
2057 F.addFnAttr(Attribute::Hot);
2058 continue;
2059 }
2060 Func.populateCounters();
2061 Func.setBranchWeights();
2062 Func.annotateValueSites();
2063 Func.annotateIrrLoopHeaderWeights();
2064 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2065 if (FreqAttr == PGOUseFunc::FFA_Cold)
2066 ColdFunctions.push_back(&F);
2067 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2068 HotFunctions.push_back(&F);
2069 if (PGOViewCounts != PGOVCT_None &&
2070 (ViewBlockFreqFuncName.empty() ||
2071 F.getName().equals(ViewBlockFreqFuncName))) {
2073 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2074 std::make_unique<BranchProbabilityInfo>(F, LI);
2075 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2076 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2078 NewBFI->view();
2079 else if (PGOViewCounts == PGOVCT_Text) {
2080 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2081 NewBFI->print(dbgs());
2082 }
2083 }
2085 (ViewBlockFreqFuncName.empty() ||
2086 F.getName().equals(ViewBlockFreqFuncName))) {
2088 if (ViewBlockFreqFuncName.empty())
2089 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2090 else
2091 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2092 else if (PGOViewRawCounts == PGOVCT_Text) {
2093 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2094 Func.dumpInfo();
2095 }
2096 }
2097
2100 BranchProbabilityInfo NBPI(F, LI);
2101
2102 // Fix func entry count.
2103 if (PGOFixEntryCount)
2104 fixFuncEntryCount(Func, LI, NBPI);
2105
2106 // Verify BlockFrequency information.
2107 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2108 if (PGOVerifyHotBFI) {
2109 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2111 }
2112 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2113 }
2114 }
2115
2116 // Set function hotness attribute from the profile.
2117 // We have to apply these attributes at the end because their presence
2118 // can affect the BranchProbabilityInfo of any callers, resulting in an
2119 // inconsistent MST between prof-gen and prof-use.
2120 for (auto &F : HotFunctions) {
2121 F->addFnAttr(Attribute::InlineHint);
2122 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2123 << "\n");
2124 }
2125 for (auto &F : ColdFunctions) {
2126 // Only set when there is no Attribute::Hot set by the user. For Hot
2127 // attribute, user's annotation has the precedence over the profile.
2128 if (F->hasFnAttribute(Attribute::Hot)) {
2129 auto &Ctx = M.getContext();
2130 std::string Msg = std::string("Function ") + F->getName().str() +
2131 std::string(" is annotated as a hot function but"
2132 " the profile is cold");
2133 Ctx.diagnose(
2134 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2135 continue;
2136 }
2137 F->addFnAttr(Attribute::Cold);
2138 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2139 << "\n");
2140 }
2141 return true;
2142}
2143
2145 std::string Filename, std::string RemappingFilename, bool IsCS,
2147 : ProfileFileName(std::move(Filename)),
2148 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2149 FS(std::move(VFS)) {
2150 if (!PGOTestProfileFile.empty())
2151 ProfileFileName = PGOTestProfileFile;
2152 if (!PGOTestProfileRemappingFile.empty())
2153 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2154 if (!FS)
2156}
2157
2160
2161 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2162 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2164 };
2165 auto LookupBPI = [&FAM](Function &F) {
2167 };
2168 auto LookupBFI = [&FAM](Function &F) {
2170 };
2171
2172 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2173
2174 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2175 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2176 return PreservedAnalyses::all();
2177
2178 return PreservedAnalyses::none();
2179}
2180
2181static std::string getSimpleNodeName(const BasicBlock *Node) {
2182 if (!Node->getName().empty())
2183 return Node->getName().str();
2184
2185 std::string SimpleNodeName;
2186 raw_string_ostream OS(SimpleNodeName);
2187 Node->printAsOperand(OS, false);
2188 return OS.str();
2189}
2190
2192 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2193 assert(MaxCount > 0 && "Bad max count");
2194 uint64_t Scale = calculateCountScale(MaxCount);
2196 for (const auto &ECI : EdgeCounts)
2197 Weights.push_back(scaleBranchCount(ECI, Scale));
2198
2199 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2200 : Weights) {
2201 dbgs() << W << " ";
2202 } dbgs() << "\n";);
2203
2204 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2205
2206 setBranchWeights(*TI, Weights);
2208 std::string BrCondStr = getBranchCondString(TI);
2209 if (BrCondStr.empty())
2210 return;
2211
2212 uint64_t WSum =
2213 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2214 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2215 uint64_t TotalCount =
2216 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2217 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2218 Scale = calculateCountScale(WSum);
2219 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2220 scaleBranchCount(WSum, Scale));
2221 std::string BranchProbStr;
2222 raw_string_ostream OS(BranchProbStr);
2223 OS << BP;
2224 OS << " (total count : " << TotalCount << ")";
2225 OS.flush();
2226 Function *F = TI->getParent()->getParent();
2228 ORE.emit([&]() {
2229 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2230 << BrCondStr << " is true with probability : " << BranchProbStr;
2231 });
2232 }
2233}
2234
2235namespace llvm {
2236
2238 MDBuilder MDB(M->getContext());
2239 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2240 MDB.createIrrLoopHeaderWeight(Count));
2241}
2242
2243template <> struct GraphTraits<PGOUseFunc *> {
2244 using NodeRef = const BasicBlock *;
2247
2248 static NodeRef getEntryNode(const PGOUseFunc *G) {
2249 return &G->getFunc().front();
2250 }
2251
2253 return succ_begin(N);
2254 }
2255
2256 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2257
2258 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2259 return nodes_iterator(G->getFunc().begin());
2260 }
2261
2262 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2263 return nodes_iterator(G->getFunc().end());
2264 }
2265};
2266
2267template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2268 explicit DOTGraphTraits(bool isSimple = false)
2270
2271 static std::string getGraphName(const PGOUseFunc *G) {
2272 return std::string(G->getFunc().getName());
2273 }
2274
2275 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2276 std::string Result;
2277 raw_string_ostream OS(Result);
2278
2279 OS << getSimpleNodeName(Node) << ":\\l";
2280 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2281 OS << "Count : ";
2282 if (BI && BI->CountValid)
2283 OS << BI->CountValue << "\\l";
2284 else
2285 OS << "Unknown\\l";
2286
2287 if (!PGOInstrSelect)
2288 return Result;
2289
2290 for (const Instruction &I : *Node) {
2291 if (!isa<SelectInst>(&I))
2292 continue;
2293 // Display scaled counts for SELECT instruction:
2294 OS << "SELECT : { T = ";
2295 uint64_t TC, FC;
2296 bool HasProf = extractBranchWeights(I, TC, FC);
2297 if (!HasProf)
2298 OS << "Unknown, F = Unknown }\\l";
2299 else
2300 OS << TC << ", F = " << FC << " }\\l";
2301 }
2302 return Result;
2303 }
2304};
2305
2306} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:957
Class for arbitrary precision integers.
Definition: APInt.h:76
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:47
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:451
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:454
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:214
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1068
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:216
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:210
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:204
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class to represent profile counts.
Definition: Function.h:277
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:518
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2028
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:525
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:485
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:480
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2100
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:658
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:388
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:241
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:800
const BasicBlock * getParent() const
Definition: Instruction.h:150
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1633
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:330
Metadata node.
Definition: Metadata.h:1067
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
Tuple of metadata.
Definition: Metadata.h:1470
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1447
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:455
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:365
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1279
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:354
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:970
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > DebugInfoCorrelate
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:446
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1174
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
InstrProfValueKind
Definition: InstrProf.h:258
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1339
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1362
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1858
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:704
std::vector< uint64_t > Counts
Definition: InstrProf.h:705
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:815
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:929
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:910