LLVM 19.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
117#include <algorithm>
118#include <cassert>
119#include <cstdint>
120#include <memory>
121#include <numeric>
122#include <optional>
123#include <stack>
124#include <string>
125#include <unordered_map>
126#include <utility>
127#include <vector>
128
129using namespace llvm;
132
133#define DEBUG_TYPE "pgo-instrumentation"
134
135STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138STATISTIC(NumOfPGOEdge, "Number of edges.");
139STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOSelectInsts,
147 "Number of select instruction instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOMemIntrinsics,
149 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153STATISTIC(NumOfCSPGOFunc,
154 "Number of functions having valid profile counts in CSPGO.");
155STATISTIC(NumOfCSPGOMismatch,
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159
160// Command line option to specify the file to read profile from. This is
161// mainly used for testing.
163 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
164 cl::value_desc("filename"),
165 cl::desc("Specify the path of profile data file. This is"
166 "mainly for test purpose."));
168 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
169 cl::value_desc("filename"),
170 cl::desc("Specify the path of profile remapping file. This is mainly for "
171 "test purpose."));
172
173// Command line option to disable value profiling. The default is false:
174// i.e. value profiling is enabled by default. This is for debug purpose.
175static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
177 cl::desc("Disable Value Profiling"));
178
179// Command line option to set the maximum number of VP annotations to write to
180// the metadata for a single indirect call callsite.
182 "icp-max-annotations", cl::init(3), cl::Hidden,
183 cl::desc("Max number of annotations for a single indirect "
184 "call callsite"));
185
186// Command line option to set the maximum number of value annotations
187// to write to the metadata for a single memop intrinsic.
189 "memop-max-annotations", cl::init(4), cl::Hidden,
190 cl::desc("Max number of preicise value annotations for a single memop"
191 "intrinsic"));
192
193// Command line option to control appending FunctionHash to the name of a COMDAT
194// function. This is to avoid the hash mismatch caused by the preinliner.
196 "do-comdat-renaming", cl::init(false), cl::Hidden,
197 cl::desc("Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
199
200namespace llvm {
201// Command line option to enable/disable the warning about missing profile
202// information.
203cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
205 cl::desc("Use this option to turn on/off "
206 "warnings about missing profile data for "
207 "functions."));
208
209// Command line option to enable/disable the warning about a hash mismatch in
210// the profile data.
212 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
213 cl::desc("Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
215
216// Command line option to enable/disable the warning about a hash mismatch in
217// the profile data for Comdat functions, which often turns out to be false
218// positive due to the pre-instrumentation inline.
220 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
221 cl::desc("The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
224} // namespace llvm
225
226// Command line option to enable/disable select instruction instrumentation.
227static cl::opt<bool>
228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229 cl::desc("Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
231
232// Command line option to turn on CFG dot or text dump of raw profile counts
234 "pgo-view-raw-counts", cl::Hidden,
235 cl::desc("A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243 clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245// Command line option to enable/disable memop intrinsic call.size profiling.
246static cl::opt<bool>
247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248 cl::desc("Use this option to turn on/off "
249 "memory intrinsic size profiling."));
250
251// Emit branch probability as optimization remarks.
252static cl::opt<bool>
253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254 cl::desc("When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
258
260 "pgo-instrument-entry", cl::init(false), cl::Hidden,
261 cl::desc("Force to instrument function entry basicblock."));
262
264 "pgo-function-entry-coverage", cl::Hidden,
265 cl::desc(
266 "Use this option to enable function entry coverage instrumentation."));
267
269 "pgo-block-coverage",
270 cl::desc("Use this option to enable basic block coverage instrumentation"));
271
272static cl::opt<bool>
273 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
274 cl::desc("Create a dot file of CFGs with block "
275 "coverage inference information"));
276
278 "pgo-temporal-instrumentation",
279 cl::desc("Use this option to enable temporal instrumentation"));
280
281static cl::opt<bool>
282 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
283 cl::desc("Fix function entry count in profile use."));
284
286 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
287 cl::desc("Print out the non-match BFI count if a hot raw profile count "
288 "becomes non-hot, or a cold raw profile count becomes hot. "
289 "The print is enabled under -Rpass-analysis=pgo, or "
290 "internal option -pass-remakrs-analysis=pgo."));
291
293 "pgo-verify-bfi", cl::init(false), cl::Hidden,
294 cl::desc("Print out mismatched BFI counts after setting profile metadata "
295 "The print is enabled under -Rpass-analysis=pgo, or "
296 "internal option -pass-remakrs-analysis=pgo."));
297
299 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
300 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
301 "mismatched BFI if the difference percentage is greater than "
302 "this value (in percentage)."));
303
305 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
306 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
307 "profile count value is below."));
308
310 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
311 cl::value_desc("function name"),
312 cl::desc("Trace the hash of the function with this name."));
313
315 "pgo-function-size-threshold", cl::Hidden,
316 cl::desc("Do not instrument functions smaller than this threshold."));
317
319 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
320 cl::desc("Do not instrument functions with the number of critical edges "
321 " greater than this threshold."));
322
323namespace llvm {
324// Command line option to turn on CFG dot dump after profile annotation.
325// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
327
328// Command line option to specify the name of the function for CFG dump
329// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
331
332// Command line option to enable vtable value profiling. Defined in
333// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
336} // namespace llvm
337
339 return PGOInstrumentEntry ||
341}
342
343// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
344// profiling implicitly captures indirect call cases, but not other values.
345// Supporting other values is relatively straight-forward - just another counter
346// range within the context.
348 return DisableValueProfiling ||
350}
351
352// Return a string describing the branch condition that can be
353// used in static branch probability heuristics:
354static std::string getBranchCondString(Instruction *TI) {
355 BranchInst *BI = dyn_cast<BranchInst>(TI);
356 if (!BI || !BI->isConditional())
357 return std::string();
358
359 Value *Cond = BI->getCondition();
360 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
361 if (!CI)
362 return std::string();
363
364 std::string result;
365 raw_string_ostream OS(result);
366 OS << CI->getPredicate() << "_";
367 CI->getOperand(0)->getType()->print(OS, true);
368
369 Value *RHS = CI->getOperand(1);
370 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
371 if (CV) {
372 if (CV->isZero())
373 OS << "_Zero";
374 else if (CV->isOne())
375 OS << "_One";
376 else if (CV->isMinusOne())
377 OS << "_MinusOne";
378 else
379 OS << "_Const";
380 }
381 OS.flush();
382 return result;
383}
384
385static const char *ValueProfKindDescr[] = {
386#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
388};
389
390// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
391// aware this is an ir_level profile so it can set the version flag.
393 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
394 Type *IntTy64 = Type::getInt64Ty(M.getContext());
395 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
396 if (IsCS)
397 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
399 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
401 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
403 ProfileVersion |=
404 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
406 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
408 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
409 auto IRLevelVersionVariable = new GlobalVariable(
410 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
411 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
412 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
413 Triple TT(M.getTargetTriple());
414 if (TT.supportsCOMDAT()) {
415 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
416 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
417 }
418 return IRLevelVersionVariable;
419}
420
421namespace {
422
423/// The select instruction visitor plays three roles specified
424/// by the mode. In \c VM_counting mode, it simply counts the number of
425/// select instructions. In \c VM_instrument mode, it inserts code to count
426/// the number times TrueValue of select is taken. In \c VM_annotate mode,
427/// it reads the profile data and annotate the select instruction with metadata.
428enum VisitMode { VM_counting, VM_instrument, VM_annotate };
429class PGOUseFunc;
430
431/// Instruction Visitor class to visit select instructions.
432struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
433 Function &F;
434 unsigned NSIs = 0; // Number of select instructions instrumented.
435 VisitMode Mode = VM_counting; // Visiting mode.
436 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
437 unsigned TotalNumCtrs = 0; // Total number of counters
438 GlobalVariable *FuncNameVar = nullptr;
439 uint64_t FuncHash = 0;
440 PGOUseFunc *UseFunc = nullptr;
441 bool HasSingleByteCoverage;
442
443 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
444 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
445
446 void countSelects() {
447 NSIs = 0;
448 Mode = VM_counting;
449 visit(F);
450 }
451
452 // Visit the IR stream and instrument all select instructions. \p
453 // Ind is a pointer to the counter index variable; \p TotalNC
454 // is the total number of counters; \p FNV is the pointer to the
455 // PGO function name var; \p FHash is the function hash.
456 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
457 uint64_t FHash) {
458 Mode = VM_instrument;
459 CurCtrIdx = Ind;
460 TotalNumCtrs = TotalNC;
461 FuncHash = FHash;
462 FuncNameVar = FNV;
463 visit(F);
464 }
465
466 // Visit the IR stream and annotate all select instructions.
467 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
468 Mode = VM_annotate;
469 UseFunc = UF;
470 CurCtrIdx = Ind;
471 visit(F);
472 }
473
474 void instrumentOneSelectInst(SelectInst &SI);
475 void annotateOneSelectInst(SelectInst &SI);
476
477 // Visit \p SI instruction and perform tasks according to visit mode.
478 void visitSelectInst(SelectInst &SI);
479
480 // Return the number of select instructions. This needs be called after
481 // countSelects().
482 unsigned getNumOfSelectInsts() const { return NSIs; }
483};
484
485/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
486/// based instrumentation.
487/// Note that the CFG can be a multi-graph. So there might be multiple edges
488/// with the same SrcBB and DestBB.
489struct PGOEdge {
490 BasicBlock *SrcBB;
491 BasicBlock *DestBB;
492 uint64_t Weight;
493 bool InMST = false;
494 bool Removed = false;
495 bool IsCritical = false;
496
497 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
498 : SrcBB(Src), DestBB(Dest), Weight(W) {}
499
500 /// Return the information string of an edge.
501 std::string infoString() const {
502 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
503 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
504 .str();
505 }
506};
507
508/// This class stores the auxiliary information for each BB in the MST.
509struct PGOBBInfo {
510 PGOBBInfo *Group;
512 uint32_t Rank = 0;
513
514 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
515
516 /// Return the information string of this object.
517 std::string infoString() const {
518 return (Twine("Index=") + Twine(Index)).str();
519 }
520};
521
522// This class implements the CFG edges. Note the CFG can be a multi-graph.
523template <class Edge, class BBInfo> class FuncPGOInstrumentation {
524private:
525 Function &F;
526
527 // Is this is context-sensitive instrumentation.
528 bool IsCS;
529
530 // A map that stores the Comdat group in function F.
531 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
532
534
535 void computeCFGHash();
536 void renameComdatFunction();
537
538public:
539 const TargetLibraryInfo &TLI;
540 std::vector<std::vector<VPCandidateInfo>> ValueSites;
541 SelectInstVisitor SIVisitor;
542 std::string FuncName;
543 std::string DeprecatedFuncName;
544 GlobalVariable *FuncNameVar;
545
546 // CFG hash value for this function.
547 uint64_t FunctionHash = 0;
548
549 // The Minimum Spanning Tree of function CFG.
551
552 const std::optional<BlockCoverageInference> BCI;
553
554 static std::optional<BlockCoverageInference>
555 constructBCI(Function &Func, bool HasSingleByteCoverage,
556 bool InstrumentFuncEntry) {
557 if (HasSingleByteCoverage)
558 return BlockCoverageInference(Func, InstrumentFuncEntry);
559 return {};
560 }
561
562 // Collect all the BBs that will be instrumented, and store them in
563 // InstrumentBBs.
564 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
565
566 // Give an edge, find the BB that will be instrumented.
567 // Return nullptr if there is no BB to be instrumented.
568 BasicBlock *getInstrBB(Edge *E);
569
570 // Return the auxiliary BB information.
571 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
572
573 // Return the auxiliary BB information if available.
574 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
575
576 // Dump edges and BB information.
577 void dumpInfo(StringRef Str = "") const {
578 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
579 " Hash: " + Twine(FunctionHash) + "\t" + Str);
580 }
581
582 FuncPGOInstrumentation(
583 Function &Func, TargetLibraryInfo &TLI,
584 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
585 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
586 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
587 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
588 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
589 TLI(TLI), ValueSites(IPVK_Last + 1),
590 SIVisitor(Func, HasSingleByteCoverage),
591 MST(F, InstrumentFuncEntry, BPI, BFI),
592 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
593 if (BCI && PGOViewBlockCoverageGraph)
594 BCI->viewBlockCoverageGraph();
595 // This should be done before CFG hash computation.
596 SIVisitor.countSelects();
597 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
598 if (!IsCS) {
599 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
600 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
601 NumOfPGOBB += MST.bbInfoSize();
602 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
604 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
605 } else {
606 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
607 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
608 NumOfCSPGOBB += MST.bbInfoSize();
609 }
610
611 FuncName = getIRPGOFuncName(F);
612 DeprecatedFuncName = getPGOFuncName(F);
613 computeCFGHash();
614 if (!ComdatMembers.empty())
615 renameComdatFunction();
616 LLVM_DEBUG(dumpInfo("after CFGMST"));
617
618 for (const auto &E : MST.allEdges()) {
619 if (E->Removed)
620 continue;
621 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
622 if (!E->InMST)
623 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
624 }
625
626 if (CreateGlobalVar)
627 FuncNameVar = createPGOFuncNameVar(F, FuncName);
628 }
629};
630
631} // end anonymous namespace
632
633// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
634// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
635// of selects, indirect calls, mem ops and edges.
636template <class Edge, class BBInfo>
637void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
638 std::vector<uint8_t> Indexes;
639 JamCRC JC;
640 for (auto &BB : F) {
641 for (BasicBlock *Succ : successors(&BB)) {
642 auto BI = findBBInfo(Succ);
643 if (BI == nullptr)
644 continue;
645 uint32_t Index = BI->Index;
646 for (int J = 0; J < 4; J++)
647 Indexes.push_back((uint8_t)(Index >> (J * 8)));
648 }
649 }
650 JC.update(Indexes);
651
652 JamCRC JCH;
653 // The higher 32 bits.
654 auto updateJCH = [&JCH](uint64_t Num) {
655 uint8_t Data[8];
657 JCH.update(Data);
658 };
659 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
660 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
661 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
662 if (BCI) {
663 updateJCH(BCI->getInstrumentedBlocksHash());
664 } else {
665 updateJCH((uint64_t)MST.numEdges());
666 }
667
668 // Hash format for context sensitive profile. Reserve 4 bits for other
669 // information.
670 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
671
672 // Reserve bit 60-63 for other information purpose.
673 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
674 if (IsCS)
676 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
677 << " CRC = " << JC.getCRC()
678 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
679 << ", Edges = " << MST.numEdges() << ", ICSites = "
680 << ValueSites[IPVK_IndirectCallTarget].size()
681 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
682 << ", High32 CRC = " << JCH.getCRC()
683 << ", Hash = " << FunctionHash << "\n";);
684
685 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
686 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
687 << " in building " << F.getParent()->getSourceFileName() << "\n";
688}
689
690// Check if we can safely rename this Comdat function.
691static bool canRenameComdat(
692 Function &F,
693 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
694 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
695 return false;
696
697 // FIXME: Current only handle those Comdat groups that only containing one
698 // function.
699 // (1) For a Comdat group containing multiple functions, we need to have a
700 // unique postfix based on the hashes for each function. There is a
701 // non-trivial code refactoring to do this efficiently.
702 // (2) Variables can not be renamed, so we can not rename Comdat function in a
703 // group including global vars.
704 Comdat *C = F.getComdat();
705 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
706 assert(!isa<GlobalAlias>(CM.second));
707 Function *FM = dyn_cast<Function>(CM.second);
708 if (FM != &F)
709 return false;
710 }
711 return true;
712}
713
714// Append the CFGHash to the Comdat function name.
715template <class Edge, class BBInfo>
716void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
717 if (!canRenameComdat(F, ComdatMembers))
718 return;
719 std::string OrigName = F.getName().str();
720 std::string NewFuncName =
721 Twine(F.getName() + "." + Twine(FunctionHash)).str();
722 F.setName(Twine(NewFuncName));
724 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
725 Comdat *NewComdat;
726 Module *M = F.getParent();
727 // For AvailableExternallyLinkage functions, change the linkage to
728 // LinkOnceODR and put them into comdat. This is because after renaming, there
729 // is no backup external copy available for the function.
730 if (!F.hasComdat()) {
732 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
734 F.setComdat(NewComdat);
735 return;
736 }
737
738 // This function belongs to a single function Comdat group.
739 Comdat *OrigComdat = F.getComdat();
740 std::string NewComdatName =
741 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
742 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
743 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
744
745 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
746 // Must be a function.
747 cast<Function>(CM.second)->setComdat(NewComdat);
748 }
749}
750
751/// Collect all the BBs that will be instruments and add them to
752/// `InstrumentBBs`.
753template <class Edge, class BBInfo>
754void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
755 std::vector<BasicBlock *> &InstrumentBBs) {
756 if (BCI) {
757 for (auto &BB : F)
758 if (BCI->shouldInstrumentBlock(BB))
759 InstrumentBBs.push_back(&BB);
760 return;
761 }
762
763 // Use a worklist as we will update the vector during the iteration.
764 std::vector<Edge *> EdgeList;
765 EdgeList.reserve(MST.numEdges());
766 for (const auto &E : MST.allEdges())
767 EdgeList.push_back(E.get());
768
769 for (auto &E : EdgeList) {
770 BasicBlock *InstrBB = getInstrBB(E);
771 if (InstrBB)
772 InstrumentBBs.push_back(InstrBB);
773 }
774}
775
776// Given a CFG E to be instrumented, find which BB to place the instrumented
777// code. The function will split the critical edge if necessary.
778template <class Edge, class BBInfo>
779BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
780 if (E->InMST || E->Removed)
781 return nullptr;
782
783 BasicBlock *SrcBB = E->SrcBB;
784 BasicBlock *DestBB = E->DestBB;
785 // For a fake edge, instrument the real BB.
786 if (SrcBB == nullptr)
787 return DestBB;
788 if (DestBB == nullptr)
789 return SrcBB;
790
791 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
792 // There are basic blocks (such as catchswitch) cannot be instrumented.
793 // If the returned first insertion point is the end of BB, skip this BB.
794 if (BB->getFirstInsertionPt() == BB->end())
795 return nullptr;
796 return BB;
797 };
798
799 // Instrument the SrcBB if it has a single successor,
800 // otherwise, the DestBB if this is not a critical edge.
801 Instruction *TI = SrcBB->getTerminator();
802 if (TI->getNumSuccessors() <= 1)
803 return canInstrument(SrcBB);
804 if (!E->IsCritical)
805 return canInstrument(DestBB);
806
807 // Some IndirectBr critical edges cannot be split by the previous
808 // SplitIndirectBrCriticalEdges call. Bail out.
809 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
810 BasicBlock *InstrBB =
811 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
812 if (!InstrBB) {
814 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
815 return nullptr;
816 }
817 // For a critical edge, we have to split. Instrument the newly
818 // created BB.
819 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
820 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
821 << " --> " << getBBInfo(DestBB).Index << "\n");
822 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
823 MST.addEdge(SrcBB, InstrBB, 0);
824 // Second one: Add new edge of InstrBB->DestBB.
825 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
826 NewEdge1.InMST = true;
827 E->Removed = true;
828
829 return canInstrument(InstrBB);
830}
831
832// When generating value profiling calls on Windows routines that make use of
833// handler funclets for exception processing an operand bundle needs to attached
834// to the called function. This routine will set \p OpBundles to contain the
835// funclet information, if any is needed, that should be placed on the generated
836// value profiling call for the value profile candidate call.
837static void
841 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
842 if (!OrigCall)
843 return;
844
845 if (!isa<IntrinsicInst>(OrigCall)) {
846 // The instrumentation call should belong to the same funclet as a
847 // non-intrinsic call, so just copy the operand bundle, if any exists.
848 std::optional<OperandBundleUse> ParentFunclet =
849 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
850 if (ParentFunclet)
851 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
852 } else {
853 // Intrinsics or other instructions do not get funclet information from the
854 // front-end. Need to use the BlockColors that was computed by the routine
855 // colorEHFunclets to determine whether a funclet is needed.
856 if (!BlockColors.empty()) {
857 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
858 assert(CV.size() == 1 && "non-unique color for block!");
859 Instruction *EHPad = CV.front()->getFirstNonPHI();
860 if (EHPad->isEHPad())
861 OpBundles.emplace_back("funclet", EHPad);
862 }
863 }
864}
865
866// Visit all edge and instrument the edges not in MST, and do value profiling.
867// Critical edges will be split.
871 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
872 bool IsCS) {
873 if (!PGOBlockCoverage) {
874 // Split indirectbr critical edges here before computing the MST rather than
875 // later in getInstrBB() to avoid invalidating it.
876 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
877 }
878
879 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
880 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
882
883 auto Name = FuncInfo.FuncNameVar;
884 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
885 FuncInfo.FunctionHash);
887 auto &EntryBB = F.getEntryBlock();
888 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
889 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
890 // i32 <index>)
891 Builder.CreateCall(
892 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
893 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
894 return;
895 }
896
897 std::vector<BasicBlock *> InstrumentBBs;
898 FuncInfo.getInstrumentBBs(InstrumentBBs);
899 unsigned NumCounters =
900 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
901
903 auto *CSIntrinsic =
904 Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
905 // We want to count the instrumentable callsites, then instrument them. This
906 // is because the llvm.instrprof.callsite intrinsic has an argument (like
907 // the other instrprof intrinsics) capturing the total number of
908 // instrumented objects (counters, or callsites, in this case). In this
909 // case, we want that value so we can readily pass it to the compiler-rt
910 // APIs that may have to allocate memory based on the nr of callsites.
911 // The traversal logic is the same for both counting and instrumentation,
912 // just needs to be done in succession.
913 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
914 for (auto &BB : F)
915 for (auto &Instr : BB)
916 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
917 if ((CS->getCalledFunction() &&
918 CS->getCalledFunction()->isIntrinsic()) ||
919 dyn_cast<InlineAsm>(CS->getCalledOperand()))
920 continue;
921 Visitor(CS);
922 }
923 };
924 // First, count callsites.
925 uint32_t TotalNrCallsites = 0;
926 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
927
928 // Now instrument.
929 uint32_t CallsiteIndex = 0;
930 Visit([&](auto *CB) {
931 IRBuilder<> Builder(CB);
932 Builder.CreateCall(CSIntrinsic,
933 {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
934 Builder.getInt32(CallsiteIndex++),
935 CB->getCalledOperand()});
936 });
937 }
938
939 uint32_t I = 0;
941 NumCounters += PGOBlockCoverage ? 8 : 1;
942 auto &EntryBB = F.getEntryBlock();
943 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
944 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
945 // i32 <index>)
946 Builder.CreateCall(
947 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
948 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
949 I += PGOBlockCoverage ? 8 : 1;
950 }
951
952 for (auto *InstrBB : InstrumentBBs) {
953 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
954 assert(Builder.GetInsertPoint() != InstrBB->end() &&
955 "Cannot get the Instrumentation point");
956 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
957 // i32 <index>)
958 Builder.CreateCall(
960 ? Intrinsic::instrprof_cover
961 : Intrinsic::instrprof_increment),
962 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
963 }
964
965 // Now instrument select instructions:
966 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
967 FuncInfo.FunctionHash);
968 assert(I == NumCounters);
969
971 return;
972
973 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
974
975 // Intrinsic function calls do not have funclet operand bundles needed for
976 // Windows exception handling attached to them. However, if value profiling is
977 // inserted for one of these calls, then a funclet value will need to be set
978 // on the instrumentation call based on the funclet coloring.
980 if (F.hasPersonalityFn() &&
981 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
982 BlockColors = colorEHFunclets(F);
983
984 // For each VP Kind, walk the VP candidates and instrument each one.
985 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
986 unsigned SiteIndex = 0;
987 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
988 continue;
989
990 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
991 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
992 << " site: CallSite Index = " << SiteIndex << "\n");
993
994 IRBuilder<> Builder(Cand.InsertPt);
995 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
996 "Cannot get the Instrumentation point");
997
998 Value *ToProfile = nullptr;
999 if (Cand.V->getType()->isIntegerTy())
1000 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1001 else if (Cand.V->getType()->isPointerTy())
1002 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1003 assert(ToProfile && "value profiling Value is of unexpected type");
1004
1006 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1007 Builder.CreateCall(
1008 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1009 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1010 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1011 OpBundles);
1012 }
1013 } // IPVK_First <= Kind <= IPVK_Last
1014}
1015
1016namespace {
1017
1018// This class represents a CFG edge in profile use compilation.
1019struct PGOUseEdge : public PGOEdge {
1020 using PGOEdge::PGOEdge;
1021
1022 std::optional<uint64_t> Count;
1023
1024 // Set edge count value
1025 void setEdgeCount(uint64_t Value) { Count = Value; }
1026
1027 // Return the information string for this object.
1028 std::string infoString() const {
1029 if (!Count)
1030 return PGOEdge::infoString();
1031 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1032 }
1033};
1034
1035using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1036
1037// This class stores the auxiliary information for each BB.
1038struct PGOUseBBInfo : public PGOBBInfo {
1039 std::optional<uint64_t> Count;
1040 int32_t UnknownCountInEdge = 0;
1041 int32_t UnknownCountOutEdge = 0;
1042 DirectEdges InEdges;
1043 DirectEdges OutEdges;
1044
1045 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1046
1047 // Set the profile count value for this BB.
1048 void setBBInfoCount(uint64_t Value) { Count = Value; }
1049
1050 // Return the information string of this object.
1051 std::string infoString() const {
1052 if (!Count)
1053 return PGOBBInfo::infoString();
1054 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1055 }
1056
1057 // Add an OutEdge and update the edge count.
1058 void addOutEdge(PGOUseEdge *E) {
1059 OutEdges.push_back(E);
1060 UnknownCountOutEdge++;
1061 }
1062
1063 // Add an InEdge and update the edge count.
1064 void addInEdge(PGOUseEdge *E) {
1065 InEdges.push_back(E);
1066 UnknownCountInEdge++;
1067 }
1068};
1069
1070} // end anonymous namespace
1071
1072// Sum up the count values for all the edges.
1074 uint64_t Total = 0;
1075 for (const auto &E : Edges) {
1076 if (E->Removed)
1077 continue;
1078 if (E->Count)
1079 Total += *E->Count;
1080 }
1081 return Total;
1082}
1083
1084namespace {
1085
1086class PGOUseFunc {
1087public:
1088 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1089 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1091 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1092 bool HasSingleByteCoverage)
1093 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1094 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1095 InstrumentFuncEntry, HasSingleByteCoverage),
1096 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1097
1098 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1099
1100 // Read counts for the instrumented BB from profile.
1101 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1103
1104 // Populate the counts for all BBs.
1105 void populateCounters();
1106
1107 // Set block coverage based on profile coverage values.
1108 void populateCoverage(IndexedInstrProfReader *PGOReader);
1109
1110 // Set the branch weights based on the count values.
1111 void setBranchWeights();
1112
1113 // Annotate the value profile call sites for all value kind.
1114 void annotateValueSites();
1115
1116 // Annotate the value profile call sites for one value kind.
1117 void annotateValueSites(uint32_t Kind);
1118
1119 // Annotate the irreducible loop header weights.
1120 void annotateIrrLoopHeaderWeights();
1121
1122 // The hotness of the function from the profile count.
1123 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1124
1125 // Return the function hotness from the profile.
1126 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1127
1128 // Return the function hash.
1129 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1130
1131 // Return the profile record for this function;
1132 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1133
1134 // Return the auxiliary BB information.
1135 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1136 return FuncInfo.getBBInfo(BB);
1137 }
1138
1139 // Return the auxiliary BB information if available.
1140 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1141 return FuncInfo.findBBInfo(BB);
1142 }
1143
1144 Function &getFunc() const { return F; }
1145
1146 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1147
1148 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1149
1150private:
1151 Function &F;
1152 Module *M;
1154 ProfileSummaryInfo *PSI;
1155
1156 // This member stores the shared information with class PGOGenFunc.
1157 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1158
1159 // The maximum count value in the profile. This is only used in PGO use
1160 // compilation.
1161 uint64_t ProgramMaxCount;
1162
1163 // Position of counter that remains to be read.
1164 uint32_t CountPosition = 0;
1165
1166 // Total size of the profile count for this function.
1167 uint32_t ProfileCountSize = 0;
1168
1169 // ProfileRecord for this function.
1170 InstrProfRecord ProfileRecord;
1171
1172 // Function hotness info derived from profile.
1173 FuncFreqAttr FreqAttr;
1174
1175 // Is to use the context sensitive profile.
1176 bool IsCS;
1177
1178 // Find the Instrumented BB and set the value. Return false on error.
1179 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1180
1181 // Set the edge counter value for the unknown edge -- there should be only
1182 // one unknown edge.
1183 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1184
1185 // Set the hot/cold inline hints based on the count values.
1186 // FIXME: This function should be removed once the functionality in
1187 // the inliner is implemented.
1188 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1189 if (PSI->isHotCount(EntryCount))
1190 FreqAttr = FFA_Hot;
1191 else if (PSI->isColdCount(MaxCount))
1192 FreqAttr = FFA_Cold;
1193 }
1194};
1195
1196} // end anonymous namespace
1197
1198/// Set up InEdges/OutEdges for all BBs in the MST.
1200 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1201 // This is not required when there is block coverage inference.
1202 if (FuncInfo.BCI)
1203 return;
1204 for (const auto &E : FuncInfo.MST.allEdges()) {
1205 if (E->Removed)
1206 continue;
1207 const BasicBlock *SrcBB = E->SrcBB;
1208 const BasicBlock *DestBB = E->DestBB;
1209 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1210 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1211 SrcInfo.addOutEdge(E.get());
1212 DestInfo.addInEdge(E.get());
1213 }
1214}
1215
1216// Visit all the edges and assign the count value for the instrumented
1217// edges and the BB. Return false on error.
1218bool PGOUseFunc::setInstrumentedCounts(
1219 const std::vector<uint64_t> &CountFromProfile) {
1220
1221 std::vector<BasicBlock *> InstrumentBBs;
1222 FuncInfo.getInstrumentBBs(InstrumentBBs);
1223
1224 setupBBInfoEdges(FuncInfo);
1225
1226 unsigned NumCounters =
1227 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1228 // The number of counters here should match the number of counters
1229 // in profile. Return if they mismatch.
1230 if (NumCounters != CountFromProfile.size()) {
1231 return false;
1232 }
1233 auto *FuncEntry = &*F.begin();
1234
1235 // Set the profile count to the Instrumented BBs.
1236 uint32_t I = 0;
1237 for (BasicBlock *InstrBB : InstrumentBBs) {
1238 uint64_t CountValue = CountFromProfile[I++];
1239 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1240 // If we reach here, we know that we have some nonzero count
1241 // values in this function. The entry count should not be 0.
1242 // Fix it if necessary.
1243 if (InstrBB == FuncEntry && CountValue == 0)
1244 CountValue = 1;
1245 Info.setBBInfoCount(CountValue);
1246 }
1247 ProfileCountSize = CountFromProfile.size();
1248 CountPosition = I;
1249
1250 // Set the edge count and update the count of unknown edges for BBs.
1251 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1252 E->setEdgeCount(Value);
1253 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1254 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1255 };
1256
1257 // Set the profile count the Instrumented edges. There are BBs that not in
1258 // MST but not instrumented. Need to set the edge count value so that we can
1259 // populate the profile counts later.
1260 for (const auto &E : FuncInfo.MST.allEdges()) {
1261 if (E->Removed || E->InMST)
1262 continue;
1263 const BasicBlock *SrcBB = E->SrcBB;
1264 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1265
1266 // If only one out-edge, the edge profile count should be the same as BB
1267 // profile count.
1268 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1269 setEdgeCount(E.get(), *SrcInfo.Count);
1270 else {
1271 const BasicBlock *DestBB = E->DestBB;
1272 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1273 // If only one in-edge, the edge profile count should be the same as BB
1274 // profile count.
1275 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1276 setEdgeCount(E.get(), *DestInfo.Count);
1277 }
1278 if (E->Count)
1279 continue;
1280 // E's count should have been set from profile. If not, this meenas E skips
1281 // the instrumentation. We set the count to 0.
1282 setEdgeCount(E.get(), 0);
1283 }
1284 return true;
1285}
1286
1287// Set the count value for the unknown edge. There should be one and only one
1288// unknown edge in Edges vector.
1289void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1290 for (auto &E : Edges) {
1291 if (E->Count)
1292 continue;
1293 E->setEdgeCount(Value);
1294
1295 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1296 getBBInfo(E->DestBB).UnknownCountInEdge--;
1297 return;
1298 }
1299 llvm_unreachable("Cannot find the unknown count edge");
1300}
1301
1302// Emit function metadata indicating PGO profile mismatch.
1304 const char MetadataName[] = "instr_prof_hash_mismatch";
1306 // If this metadata already exists, ignore.
1307 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1308 if (Existing) {
1309 MDTuple *Tuple = cast<MDTuple>(Existing);
1310 for (const auto &N : Tuple->operands()) {
1311 if (N.equalsStr(MetadataName))
1312 return;
1313 Names.push_back(N.get());
1314 }
1315 }
1316
1317 MDBuilder MDB(ctx);
1318 Names.push_back(MDB.createString(MetadataName));
1319 MDNode *MD = MDTuple::get(ctx, Names);
1320 F.setMetadata(LLVMContext::MD_annotation, MD);
1321}
1322
1323void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1324 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1325 auto &Ctx = M->getContext();
1326 auto Err = IPE.get();
1327 bool SkipWarning = false;
1328 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1329 << FuncInfo.FuncName << ": ");
1330 if (Err == instrprof_error::unknown_function) {
1331 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1332 SkipWarning = !PGOWarnMissing;
1333 LLVM_DEBUG(dbgs() << "unknown function");
1334 } else if (Err == instrprof_error::hash_mismatch ||
1335 Err == instrprof_error::malformed) {
1336 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1337 SkipWarning =
1340 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1342 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1343 << " skip=" << SkipWarning << ")");
1344 // Emit function metadata indicating PGO profile mismatch.
1345 annotateFunctionWithHashMismatch(F, M->getContext());
1346 }
1347
1348 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1349 if (SkipWarning)
1350 return;
1351
1352 std::string Msg =
1353 IPE.message() + std::string(" ") + F.getName().str() +
1354 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1355 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1356 std::string(" count discarded");
1357
1358 Ctx.diagnose(
1359 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1360 });
1361}
1362
1363// Read the profile from ProfileFileName and assign the value to the
1364// instrumented BB and the edges. This function also updates ProgramMaxCount.
1365// Return true if the profile are successfully read, and false on errors.
1366bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1368 auto &Ctx = M->getContext();
1369 uint64_t MismatchedFuncSum = 0;
1371 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1372 &MismatchedFuncSum);
1373 if (Error E = Result.takeError()) {
1374 handleInstrProfError(std::move(E), MismatchedFuncSum);
1375 return false;
1376 }
1377 ProfileRecord = std::move(Result.get());
1378 PseudoKind = ProfileRecord.getCountPseudoKind();
1379 if (PseudoKind != InstrProfRecord::NotPseudo) {
1380 return true;
1381 }
1382 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1383
1384 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1385 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1386
1387 uint64_t ValueSum = 0;
1388 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1389 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1390 ValueSum += CountFromProfile[I];
1391 }
1392 AllZeros = (ValueSum == 0);
1393
1394 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1395
1396 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1397 getBBInfo(nullptr).UnknownCountInEdge = 2;
1398
1399 if (!setInstrumentedCounts(CountFromProfile)) {
1400 LLVM_DEBUG(
1401 dbgs() << "Inconsistent number of counts, skipping this function");
1402 Ctx.diagnose(DiagnosticInfoPGOProfile(
1403 M->getName().data(),
1404 Twine("Inconsistent number of counts in ") + F.getName().str() +
1405 Twine(": the profile may be stale or there is a function name "
1406 "collision."),
1407 DS_Warning));
1408 return false;
1409 }
1410 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1411 return true;
1412}
1413
1414void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1415 uint64_t MismatchedFuncSum = 0;
1417 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1418 &MismatchedFuncSum);
1419 if (auto Err = Result.takeError()) {
1420 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1421 return;
1422 }
1423 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1424
1425 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1427 unsigned Index = 0;
1428 for (auto &BB : F)
1429 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1430 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1431 assert(Index == CountsFromProfile.size());
1432
1433 // For each B in InverseDependencies[A], if A is covered then B is covered.
1435 InverseDependencies;
1436 for (auto &BB : F) {
1437 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1438 // If Dep is covered then BB is covered.
1439 InverseDependencies[Dep].insert(&BB);
1440 }
1441 }
1442
1443 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1444 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1445 for (auto &[BB, IsCovered] : Coverage)
1446 if (IsCovered)
1447 CoveredBlocksToProcess.push(BB);
1448
1449 while (!CoveredBlocksToProcess.empty()) {
1450 auto *CoveredBlock = CoveredBlocksToProcess.top();
1451 assert(Coverage[CoveredBlock]);
1452 CoveredBlocksToProcess.pop();
1453 for (auto *BB : InverseDependencies[CoveredBlock]) {
1454 // If CoveredBlock is covered then BB is covered.
1455 if (Coverage[BB])
1456 continue;
1457 Coverage[BB] = true;
1458 CoveredBlocksToProcess.push(BB);
1459 }
1460 }
1461
1462 // Annotate block coverage.
1463 MDBuilder MDB(F.getContext());
1464 // We set the entry count to 10000 if the entry block is covered so that BFI
1465 // can propagate a fraction of this count to the other covered blocks.
1466 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1467 for (auto &BB : F) {
1468 // For a block A and its successor B, we set the edge weight as follows:
1469 // If A is covered and B is covered, set weight=1.
1470 // If A is covered and B is uncovered, set weight=0.
1471 // If A is uncovered, set weight=1.
1472 // This setup will allow BFI to give nonzero profile counts to only covered
1473 // blocks.
1475 for (auto *Succ : successors(&BB))
1476 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1477 if (Weights.size() >= 2)
1478 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1479 /*IsExpected=*/false);
1480 }
1481
1482 unsigned NumCorruptCoverage = 0;
1483 DominatorTree DT(F);
1484 LoopInfo LI(DT);
1485 BranchProbabilityInfo BPI(F, LI);
1486 BlockFrequencyInfo BFI(F, BPI, LI);
1487 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1488 if (auto C = BFI.getBlockProfileCount(&BB))
1489 return C == 0;
1490 return {};
1491 };
1492 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1493 for (auto &BB : F) {
1494 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1495 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1496 << "\n");
1497 // In some cases it is possible to find a covered block that has no covered
1498 // successors, e.g., when a block calls a function that may call exit(). In
1499 // those cases, BFI could find its successor to be covered while BCI could
1500 // find its successor to be dead.
1501 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1502 LLVM_DEBUG(
1503 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1504 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1505 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1506 ++NumCorruptCoverage;
1507 }
1508 if (Coverage[&BB])
1509 ++NumCoveredBlocks;
1510 }
1511 if (PGOVerifyBFI && NumCorruptCoverage) {
1512 auto &Ctx = M->getContext();
1513 Ctx.diagnose(DiagnosticInfoPGOProfile(
1514 M->getName().data(),
1515 Twine("Found inconsistent block coverage for function ") + F.getName() +
1516 " in " + Twine(NumCorruptCoverage) + " blocks.",
1517 DS_Warning));
1518 }
1520 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1521}
1522
1523// Populate the counters from instrumented BBs to all BBs.
1524// In the end of this operation, all BBs should have a valid count value.
1525void PGOUseFunc::populateCounters() {
1526 bool Changes = true;
1527 unsigned NumPasses = 0;
1528 while (Changes) {
1529 NumPasses++;
1530 Changes = false;
1531
1532 // For efficient traversal, it's better to start from the end as most
1533 // of the instrumented edges are at the end.
1534 for (auto &BB : reverse(F)) {
1535 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1536 if (UseBBInfo == nullptr)
1537 continue;
1538 if (!UseBBInfo->Count) {
1539 if (UseBBInfo->UnknownCountOutEdge == 0) {
1540 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1541 Changes = true;
1542 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1543 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1544 Changes = true;
1545 }
1546 }
1547 if (UseBBInfo->Count) {
1548 if (UseBBInfo->UnknownCountOutEdge == 1) {
1549 uint64_t Total = 0;
1550 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1551 // If the one of the successor block can early terminate (no-return),
1552 // we can end up with situation where out edge sum count is larger as
1553 // the source BB's count is collected by a post-dominated block.
1554 if (*UseBBInfo->Count > OutSum)
1555 Total = *UseBBInfo->Count - OutSum;
1556 setEdgeCount(UseBBInfo->OutEdges, Total);
1557 Changes = true;
1558 }
1559 if (UseBBInfo->UnknownCountInEdge == 1) {
1560 uint64_t Total = 0;
1561 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1562 if (*UseBBInfo->Count > InSum)
1563 Total = *UseBBInfo->Count - InSum;
1564 setEdgeCount(UseBBInfo->InEdges, Total);
1565 Changes = true;
1566 }
1567 }
1568 }
1569 }
1570
1571 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1572 (void)NumPasses;
1573#ifndef NDEBUG
1574 // Assert every BB has a valid counter.
1575 for (auto &BB : F) {
1576 auto BI = findBBInfo(&BB);
1577 if (BI == nullptr)
1578 continue;
1579 assert(BI->Count && "BB count is not valid");
1580 }
1581#endif
1582 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1583 uint64_t FuncMaxCount = FuncEntryCount;
1584 for (auto &BB : F) {
1585 auto BI = findBBInfo(&BB);
1586 if (BI == nullptr)
1587 continue;
1588 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1589 }
1590
1591 // Fix the obviously inconsistent entry count.
1592 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1593 FuncEntryCount = 1;
1595 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1596
1597 // Now annotate select instructions
1598 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1599 assert(CountPosition == ProfileCountSize);
1600
1601 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1602}
1603
1604// Assign the scaled count values to the BB with multiple out edges.
1605void PGOUseFunc::setBranchWeights() {
1606 // Generate MD_prof metadata for every branch instruction.
1607 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1608 << " IsCS=" << IsCS << "\n");
1609 for (auto &BB : F) {
1610 Instruction *TI = BB.getTerminator();
1611 if (TI->getNumSuccessors() < 2)
1612 continue;
1613 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1614 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1615 isa<CallBrInst>(TI)))
1616 continue;
1617
1618 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1619 if (!*BBCountInfo.Count)
1620 continue;
1621
1622 // We have a non-zero Branch BB.
1623 unsigned Size = BBCountInfo.OutEdges.size();
1624 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1625 uint64_t MaxCount = 0;
1626 for (unsigned s = 0; s < Size; s++) {
1627 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1628 const BasicBlock *SrcBB = E->SrcBB;
1629 const BasicBlock *DestBB = E->DestBB;
1630 if (DestBB == nullptr)
1631 continue;
1632 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1633 uint64_t EdgeCount = *E->Count;
1634 if (EdgeCount > MaxCount)
1635 MaxCount = EdgeCount;
1636 EdgeCounts[SuccNum] = EdgeCount;
1637 }
1638
1639 if (MaxCount)
1640 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1641 else {
1642 // A zero MaxCount can come about when we have a BB with a positive
1643 // count, and whose successor blocks all have 0 count. This can happen
1644 // when there is no exit block and the code exits via a noreturn function.
1645 auto &Ctx = M->getContext();
1646 Ctx.diagnose(DiagnosticInfoPGOProfile(
1647 M->getName().data(),
1648 Twine("Profile in ") + F.getName().str() +
1649 Twine(" partially ignored") +
1650 Twine(", possibly due to the lack of a return path."),
1651 DS_Warning));
1652 }
1653 }
1654}
1655
1657 for (BasicBlock *Pred : predecessors(BB)) {
1658 if (isa<IndirectBrInst>(Pred->getTerminator()))
1659 return true;
1660 }
1661 return false;
1662}
1663
1664void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1665 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1666 // Find irr loop headers
1667 for (auto &BB : F) {
1668 // As a heuristic also annotate indrectbr targets as they have a high chance
1669 // to become an irreducible loop header after the indirectbr tail
1670 // duplication.
1671 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1672 Instruction *TI = BB.getTerminator();
1673 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1674 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1675 }
1676 }
1677}
1678
1679void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1680 Module *M = F.getParent();
1681 IRBuilder<> Builder(&SI);
1682 Type *Int64Ty = Builder.getInt64Ty();
1683 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1684 Builder.CreateCall(
1685 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1686 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1687 Builder.getInt32(*CurCtrIdx), Step});
1688 ++(*CurCtrIdx);
1689}
1690
1691void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1692 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1693 assert(*CurCtrIdx < CountFromProfile.size() &&
1694 "Out of bound access of counters");
1695 uint64_t SCounts[2];
1696 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1697 ++(*CurCtrIdx);
1698 uint64_t TotalCount = 0;
1699 auto BI = UseFunc->findBBInfo(SI.getParent());
1700 if (BI != nullptr)
1701 TotalCount = *BI->Count;
1702 // False Count
1703 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1704 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1705 if (MaxCount)
1706 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1707}
1708
1709void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1710 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1711 return;
1712 // FIXME: do not handle this yet.
1713 if (SI.getCondition()->getType()->isVectorTy())
1714 return;
1715
1716 switch (Mode) {
1717 case VM_counting:
1718 NSIs++;
1719 return;
1720 case VM_instrument:
1721 instrumentOneSelectInst(SI);
1722 return;
1723 case VM_annotate:
1724 annotateOneSelectInst(SI);
1725 return;
1726 }
1727
1728 llvm_unreachable("Unknown visiting mode");
1729}
1730
1731// Traverse all valuesites and annotate the instructions for all value kind.
1732void PGOUseFunc::annotateValueSites() {
1734 return;
1735
1736 // Create the PGOFuncName meta data.
1737 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1738
1739 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1740 annotateValueSites(Kind);
1741}
1742
1743// Annotate the instructions for a specific value kind.
1744void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1745 assert(Kind <= IPVK_Last);
1746 unsigned ValueSiteIndex = 0;
1747 auto &ValueSites = FuncInfo.ValueSites[Kind];
1748 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1749 if (NumValueSites != ValueSites.size()) {
1750 auto &Ctx = M->getContext();
1751 Ctx.diagnose(DiagnosticInfoPGOProfile(
1752 M->getName().data(),
1753 Twine("Inconsistent number of value sites for ") +
1754 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1755 F.getName().str() +
1756 Twine("\", possibly due to the use of a stale profile."),
1757 DS_Warning));
1758 return;
1759 }
1760
1761 for (VPCandidateInfo &I : ValueSites) {
1762 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1763 << "): Index = " << ValueSiteIndex << " out of "
1764 << NumValueSites << "\n");
1765 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1766 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1767 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1769 ValueSiteIndex++;
1770 }
1771}
1772
1773// Collect the set of members for each Comdat in module M and store
1774// in ComdatMembers.
1776 Module &M,
1777 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1778 if (!DoComdatRenaming)
1779 return;
1780 for (Function &F : M)
1781 if (Comdat *C = F.getComdat())
1782 ComdatMembers.insert(std::make_pair(C, &F));
1783 for (GlobalVariable &GV : M.globals())
1784 if (Comdat *C = GV.getComdat())
1785 ComdatMembers.insert(std::make_pair(C, &GV));
1786 for (GlobalAlias &GA : M.aliases())
1787 if (Comdat *C = GA.getComdat())
1788 ComdatMembers.insert(std::make_pair(C, &GA));
1789}
1790
1791// Return true if we should not find instrumentation data for this function
1792static bool skipPGOUse(const Function &F) {
1793 if (F.isDeclaration())
1794 return true;
1795 // If there are too many critical edges, PGO might cause
1796 // compiler time problem. Skip PGO if the number of
1797 // critical edges execeed the threshold.
1798 unsigned NumCriticalEdges = 0;
1799 for (auto &BB : F) {
1800 const Instruction *TI = BB.getTerminator();
1801 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1802 if (isCriticalEdge(TI, I))
1803 NumCriticalEdges++;
1804 }
1805 }
1806 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1807 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1808 << ", NumCriticalEdges=" << NumCriticalEdges
1809 << " exceed the threshold. Skip PGO.\n");
1810 return true;
1811 }
1812 return false;
1813}
1814
1815// Return true if we should not instrument this function
1816static bool skipPGOGen(const Function &F) {
1817 if (skipPGOUse(F))
1818 return true;
1819 if (F.hasFnAttribute(llvm::Attribute::Naked))
1820 return true;
1821 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1822 return true;
1823 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1824 return true;
1825 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1826 return true;
1827 return false;
1828}
1829
1831 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1833 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1834 // For the context-sensitve instrumentation, we should have a separated pass
1835 // (before LTO/ThinLTO linking) to create these variables.
1837 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1838
1839 Triple TT(M.getTargetTriple());
1840 LLVMContext &Ctx = M.getContext();
1841 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1843 M.getName().data(),
1844 Twine("VTable value profiling is presently not "
1845 "supported for non-ELF object formats"),
1846 DS_Warning));
1847 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1848 collectComdatMembers(M, ComdatMembers);
1849
1850 for (auto &F : M) {
1851 if (skipPGOGen(F))
1852 continue;
1853 auto &TLI = LookupTLI(F);
1854 auto *BPI = LookupBPI(F);
1855 auto *BFI = LookupBFI(F);
1856 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1857 }
1858 return true;
1859}
1860
1863 createProfileFileNameVar(M, CSInstrName);
1864 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1865 // will be retained.
1870 return PA;
1871}
1872
1875 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1876 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1878 };
1879 auto LookupBPI = [&FAM](Function &F) {
1881 };
1882 auto LookupBFI = [&FAM](Function &F) {
1884 };
1885
1886 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1887 return PreservedAnalyses::all();
1888
1889 return PreservedAnalyses::none();
1890}
1891
1892// Using the ratio b/w sums of profile count values and BFI count values to
1893// adjust the func entry count.
1894static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1895 BranchProbabilityInfo &NBPI) {
1896 Function &F = Func.getFunc();
1897 BlockFrequencyInfo NBFI(F, NBPI, LI);
1898#ifndef NDEBUG
1899 auto BFIEntryCount = F.getEntryCount();
1900 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1901 "Invalid BFI Entrycount");
1902#endif
1903 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1904 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1905 for (auto &BBI : F) {
1906 uint64_t CountValue = 0;
1907 uint64_t BFICountValue = 0;
1908 if (!Func.findBBInfo(&BBI))
1909 continue;
1910 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1911 CountValue = *Func.getBBInfo(&BBI).Count;
1912 BFICountValue = *BFICount;
1913 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1914 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1915 }
1916 if (SumCount.isZero())
1917 return;
1918
1919 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1920 "Incorrect sum of BFI counts");
1921 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1922 return;
1923 double Scale = (SumCount / SumBFICount).convertToDouble();
1924 if (Scale < 1.001 && Scale > 0.999)
1925 return;
1926
1927 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1928 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1929 if (NewEntryCount == 0)
1930 NewEntryCount = 1;
1931 if (NewEntryCount != FuncEntryCount) {
1932 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1933 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1934 << ", entry_count " << FuncEntryCount << " --> "
1935 << NewEntryCount << "\n");
1936 }
1937}
1938
1939// Compare the profile count values with BFI count values, and print out
1940// the non-matching ones.
1941static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1943 uint64_t HotCountThreshold,
1945 Function &F = Func.getFunc();
1946 BlockFrequencyInfo NBFI(F, NBPI, LI);
1947 // bool PrintFunc = false;
1948 bool HotBBOnly = PGOVerifyHotBFI;
1949 StringRef Msg;
1951
1952 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1953 for (auto &BBI : F) {
1954 uint64_t CountValue = 0;
1955 uint64_t BFICountValue = 0;
1956
1957 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1958
1959 BBNum++;
1960 if (CountValue)
1961 NonZeroBBNum++;
1962 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1963 if (BFICount)
1964 BFICountValue = *BFICount;
1965
1966 if (HotBBOnly) {
1967 bool rawIsHot = CountValue >= HotCountThreshold;
1968 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1969 bool rawIsCold = CountValue <= ColdCountThreshold;
1970 bool ShowCount = false;
1971 if (rawIsHot && !BFIIsHot) {
1972 Msg = "raw-Hot to BFI-nonHot";
1973 ShowCount = true;
1974 } else if (rawIsCold && BFIIsHot) {
1975 Msg = "raw-Cold to BFI-Hot";
1976 ShowCount = true;
1977 }
1978 if (!ShowCount)
1979 continue;
1980 } else {
1981 if ((CountValue < PGOVerifyBFICutoff) &&
1982 (BFICountValue < PGOVerifyBFICutoff))
1983 continue;
1984 uint64_t Diff = (BFICountValue >= CountValue)
1985 ? BFICountValue - CountValue
1986 : CountValue - BFICountValue;
1987 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1988 continue;
1989 }
1990 BBMisMatchNum++;
1991
1992 ORE.emit([&]() {
1994 F.getSubprogram(), &BBI);
1995 Remark << "BB " << ore::NV("Block", BBI.getName())
1996 << " Count=" << ore::NV("Count", CountValue)
1997 << " BFI_Count=" << ore::NV("Count", BFICountValue);
1998 if (!Msg.empty())
1999 Remark << " (" << Msg << ")";
2000 return Remark;
2001 });
2002 }
2003 if (BBMisMatchNum)
2004 ORE.emit([&]() {
2005 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2006 F.getSubprogram(), &F.getEntryBlock())
2007 << "In Func " << ore::NV("Function", F.getName())
2008 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2009 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2010 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2011 });
2012}
2013
2015 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2016 vfs::FileSystem &FS,
2017 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2020 ProfileSummaryInfo *PSI, bool IsCS) {
2021 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2022 auto &Ctx = M.getContext();
2023 // Read the counter array from file.
2024 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2025 ProfileRemappingFileName);
2026 if (Error E = ReaderOrErr.takeError()) {
2027 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2028 Ctx.diagnose(
2029 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2030 });
2031 return false;
2032 }
2033
2034 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2035 std::move(ReaderOrErr.get());
2036 if (!PGOReader) {
2037 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2038 StringRef("Cannot get PGOReader")));
2039 return false;
2040 }
2041 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2042 return false;
2043
2044 // TODO: might need to change the warning once the clang option is finalized.
2045 if (!PGOReader->isIRLevelProfile()) {
2046 Ctx.diagnose(DiagnosticInfoPGOProfile(
2047 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2048 return false;
2049 }
2050 if (PGOReader->functionEntryOnly()) {
2051 Ctx.diagnose(DiagnosticInfoPGOProfile(
2052 ProfileFileName.data(),
2053 "Function entry profiles are not yet supported for optimization"));
2054 return false;
2055 }
2056
2057 // Add the profile summary (read from the header of the indexed summary) here
2058 // so that we can use it below when reading counters (which checks if the
2059 // function should be marked with a cold or inlinehint attribute).
2060 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2063 PSI->refresh();
2064
2065 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2066 collectComdatMembers(M, ComdatMembers);
2067 std::vector<Function *> HotFunctions;
2068 std::vector<Function *> ColdFunctions;
2069
2070 // If the profile marked as always instrument the entry BB, do the
2071 // same. Note this can be overwritten by the internal option in CFGMST.h
2072 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2073 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2074 InstrumentFuncEntry = PGOInstrumentEntry;
2075 InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2076
2077 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2078 for (auto &F : M) {
2079 if (skipPGOUse(F))
2080 continue;
2081 auto &TLI = LookupTLI(F);
2082 auto *BPI = LookupBPI(F);
2083 auto *BFI = LookupBFI(F);
2084 if (!HasSingleByteCoverage) {
2085 // Split indirectbr critical edges here before computing the MST rather
2086 // than later in getInstrBB() to avoid invalidating it.
2087 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2088 BFI);
2089 }
2090 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2091 InstrumentFuncEntry, HasSingleByteCoverage);
2092 if (HasSingleByteCoverage) {
2093 Func.populateCoverage(PGOReader.get());
2094 continue;
2095 }
2096 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2097 // it means the profile for the function is unrepresentative and this
2098 // function is actually hot / warm. We will reset the function hot / cold
2099 // attribute and drop all the profile counters.
2101 bool AllZeros = false;
2102 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2103 continue;
2104 if (AllZeros) {
2105 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2106 if (Func.getProgramMaxCount() != 0)
2107 ColdFunctions.push_back(&F);
2108 continue;
2109 }
2110 if (PseudoKind != InstrProfRecord::NotPseudo) {
2111 // Clear function attribute cold.
2112 if (F.hasFnAttribute(Attribute::Cold))
2113 F.removeFnAttr(Attribute::Cold);
2114 // Set function attribute as hot.
2115 if (PseudoKind == InstrProfRecord::PseudoHot)
2116 F.addFnAttr(Attribute::Hot);
2117 continue;
2118 }
2119 Func.populateCounters();
2120 Func.setBranchWeights();
2121 Func.annotateValueSites();
2122 Func.annotateIrrLoopHeaderWeights();
2123 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2124 if (FreqAttr == PGOUseFunc::FFA_Cold)
2125 ColdFunctions.push_back(&F);
2126 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2127 HotFunctions.push_back(&F);
2128 if (PGOViewCounts != PGOVCT_None &&
2129 (ViewBlockFreqFuncName.empty() ||
2130 F.getName() == ViewBlockFreqFuncName)) {
2132 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2133 std::make_unique<BranchProbabilityInfo>(F, LI);
2134 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2135 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2137 NewBFI->view();
2138 else if (PGOViewCounts == PGOVCT_Text) {
2139 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2140 NewBFI->print(dbgs());
2141 }
2142 }
2144 (ViewBlockFreqFuncName.empty() ||
2145 F.getName() == ViewBlockFreqFuncName)) {
2147 if (ViewBlockFreqFuncName.empty())
2148 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2149 else
2150 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2151 else if (PGOViewRawCounts == PGOVCT_Text) {
2152 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2153 Func.dumpInfo();
2154 }
2155 }
2156
2159 BranchProbabilityInfo NBPI(F, LI);
2160
2161 // Fix func entry count.
2162 if (PGOFixEntryCount)
2163 fixFuncEntryCount(Func, LI, NBPI);
2164
2165 // Verify BlockFrequency information.
2166 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2167 if (PGOVerifyHotBFI) {
2168 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2170 }
2171 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2172 }
2173 }
2174
2175 // Set function hotness attribute from the profile.
2176 // We have to apply these attributes at the end because their presence
2177 // can affect the BranchProbabilityInfo of any callers, resulting in an
2178 // inconsistent MST between prof-gen and prof-use.
2179 for (auto &F : HotFunctions) {
2180 F->addFnAttr(Attribute::InlineHint);
2181 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2182 << "\n");
2183 }
2184 for (auto &F : ColdFunctions) {
2185 // Only set when there is no Attribute::Hot set by the user. For Hot
2186 // attribute, user's annotation has the precedence over the profile.
2187 if (F->hasFnAttribute(Attribute::Hot)) {
2188 auto &Ctx = M.getContext();
2189 std::string Msg = std::string("Function ") + F->getName().str() +
2190 std::string(" is annotated as a hot function but"
2191 " the profile is cold");
2192 Ctx.diagnose(
2193 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2194 continue;
2195 }
2196 F->addFnAttr(Attribute::Cold);
2197 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2198 << "\n");
2199 }
2200 return true;
2201}
2202
2204 std::string Filename, std::string RemappingFilename, bool IsCS,
2206 : ProfileFileName(std::move(Filename)),
2207 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2208 FS(std::move(VFS)) {
2209 if (!PGOTestProfileFile.empty())
2210 ProfileFileName = PGOTestProfileFile;
2211 if (!PGOTestProfileRemappingFile.empty())
2212 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2213 if (!FS)
2215}
2216
2219
2220 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2221 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2223 };
2224 auto LookupBPI = [&FAM](Function &F) {
2226 };
2227 auto LookupBFI = [&FAM](Function &F) {
2229 };
2230
2231 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2232
2233 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2234 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2235 return PreservedAnalyses::all();
2236
2237 return PreservedAnalyses::none();
2238}
2239
2240static std::string getSimpleNodeName(const BasicBlock *Node) {
2241 if (!Node->getName().empty())
2242 return Node->getName().str();
2243
2244 std::string SimpleNodeName;
2245 raw_string_ostream OS(SimpleNodeName);
2246 Node->printAsOperand(OS, false);
2247 return OS.str();
2248}
2249
2251 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2252 assert(MaxCount > 0 && "Bad max count");
2253 uint64_t Scale = calculateCountScale(MaxCount);
2255 for (const auto &ECI : EdgeCounts)
2256 Weights.push_back(scaleBranchCount(ECI, Scale));
2257
2258 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2259 : Weights) {
2260 dbgs() << W << " ";
2261 } dbgs() << "\n";);
2262
2263 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2264
2265 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2267 std::string BrCondStr = getBranchCondString(TI);
2268 if (BrCondStr.empty())
2269 return;
2270
2271 uint64_t WSum =
2272 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2273 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2274 uint64_t TotalCount =
2275 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2276 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2277 Scale = calculateCountScale(WSum);
2278 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2279 scaleBranchCount(WSum, Scale));
2280 std::string BranchProbStr;
2281 raw_string_ostream OS(BranchProbStr);
2282 OS << BP;
2283 OS << " (total count : " << TotalCount << ")";
2284 OS.flush();
2285 Function *F = TI->getParent()->getParent();
2287 ORE.emit([&]() {
2288 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2289 << BrCondStr << " is true with probability : " << BranchProbStr;
2290 });
2291 }
2292}
2293
2294namespace llvm {
2295
2297 MDBuilder MDB(M->getContext());
2298 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2299 MDB.createIrrLoopHeaderWeight(Count));
2300}
2301
2302template <> struct GraphTraits<PGOUseFunc *> {
2303 using NodeRef = const BasicBlock *;
2306
2307 static NodeRef getEntryNode(const PGOUseFunc *G) {
2308 return &G->getFunc().front();
2309 }
2310
2312 return succ_begin(N);
2313 }
2314
2315 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2316
2317 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2318 return nodes_iterator(G->getFunc().begin());
2319 }
2320
2321 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2322 return nodes_iterator(G->getFunc().end());
2323 }
2324};
2325
2326template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2327 explicit DOTGraphTraits(bool isSimple = false)
2329
2330 static std::string getGraphName(const PGOUseFunc *G) {
2331 return std::string(G->getFunc().getName());
2332 }
2333
2334 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2335 std::string Result;
2336 raw_string_ostream OS(Result);
2337
2338 OS << getSimpleNodeName(Node) << ":\\l";
2339 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2340 OS << "Count : ";
2341 if (BI && BI->Count)
2342 OS << *BI->Count << "\\l";
2343 else
2344 OS << "Unknown\\l";
2345
2346 if (!PGOInstrSelect)
2347 return Result;
2348
2349 for (const Instruction &I : *Node) {
2350 if (!isa<SelectInst>(&I))
2351 continue;
2352 // Display scaled counts for SELECT instruction:
2353 OS << "SELECT : { T = ";
2354 uint64_t TC, FC;
2355 bool HasProf = extractBranchWeights(I, TC, FC);
2356 if (!HasProf)
2357 OS << "Unknown, F = Unknown }\\l";
2358 else
2359 OS << TC << ", F = " << FC << " }\\l";
2360 }
2361 return Result;
2362 }
2363};
2364
2365} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
bool isValueProfilingDisabled()
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
bool shouldInstrumentEntryBB()
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:982
Class for arbitrary precision integers.
Definition: APInt.h:77
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:451
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:289
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:544
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2037
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:173
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:529
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:489
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:484
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2115
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2410
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:409
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:250
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:834
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:344
Metadata node.
Definition: Metadata.h:1067
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
Tuple of metadata.
Definition: Metadata.h:1470
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1484
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:203
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:374
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1428
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:363
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:462
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1273
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:267
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1488
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1511
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:272
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:824
std::vector< uint64_t > Counts
Definition: InstrProf.h:825
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:922
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1024
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1005