LLVM 19.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/Twine.h"
59#include "llvm/ADT/iterator.h"
63#include "llvm/Analysis/CFG.h"
68#include "llvm/IR/Attributes.h"
69#include "llvm/IR/BasicBlock.h"
70#include "llvm/IR/CFG.h"
71#include "llvm/IR/Comdat.h"
72#include "llvm/IR/Constant.h"
73#include "llvm/IR/Constants.h"
75#include "llvm/IR/Dominators.h"
77#include "llvm/IR/Function.h"
78#include "llvm/IR/GlobalAlias.h"
79#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/IRBuilder.h"
82#include "llvm/IR/InstVisitor.h"
83#include "llvm/IR/InstrTypes.h"
84#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Intrinsics.h"
88#include "llvm/IR/LLVMContext.h"
89#include "llvm/IR/MDBuilder.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/PassManager.h"
94#include "llvm/IR/Type.h"
95#include "llvm/IR/Value.h"
99#include "llvm/Support/CRC.h"
100#include "llvm/Support/Casting.h"
103#include "llvm/Support/Debug.h"
104#include "llvm/Support/Error.h"
117#include <algorithm>
118#include <cassert>
119#include <cstdint>
120#include <memory>
121#include <numeric>
122#include <optional>
123#include <stack>
124#include <string>
125#include <unordered_map>
126#include <utility>
127#include <vector>
128
129using namespace llvm;
132
133#define DEBUG_TYPE "pgo-instrumentation"
134
135STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138STATISTIC(NumOfPGOEdge, "Number of edges.");
139STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOSelectInsts,
147 "Number of select instruction instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOMemIntrinsics,
149 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153STATISTIC(NumOfCSPGOFunc,
154 "Number of functions having valid profile counts in CSPGO.");
155STATISTIC(NumOfCSPGOMismatch,
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159
160// Command line option to specify the file to read profile from. This is
161// mainly used for testing.
163 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
164 cl::value_desc("filename"),
165 cl::desc("Specify the path of profile data file. This is"
166 "mainly for test purpose."));
168 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
169 cl::value_desc("filename"),
170 cl::desc("Specify the path of profile remapping file. This is mainly for "
171 "test purpose."));
172
173// Command line option to disable value profiling. The default is false:
174// i.e. value profiling is enabled by default. This is for debug purpose.
175static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
177 cl::desc("Disable Value Profiling"));
178
179// Command line option to set the maximum number of VP annotations to write to
180// the metadata for a single indirect call callsite.
182 "icp-max-annotations", cl::init(3), cl::Hidden,
183 cl::desc("Max number of annotations for a single indirect "
184 "call callsite"));
185
186// Command line option to set the maximum number of value annotations
187// to write to the metadata for a single memop intrinsic.
189 "memop-max-annotations", cl::init(4), cl::Hidden,
190 cl::desc("Max number of preicise value annotations for a single memop"
191 "intrinsic"));
192
193// Command line option to control appending FunctionHash to the name of a COMDAT
194// function. This is to avoid the hash mismatch caused by the preinliner.
196 "do-comdat-renaming", cl::init(false), cl::Hidden,
197 cl::desc("Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
199
200namespace llvm {
201// Command line option to enable/disable the warning about missing profile
202// information.
203cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
205 cl::desc("Use this option to turn on/off "
206 "warnings about missing profile data for "
207 "functions."));
208
209// Command line option to enable/disable the warning about a hash mismatch in
210// the profile data.
212 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
213 cl::desc("Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
215
216// Command line option to enable/disable the warning about a hash mismatch in
217// the profile data for Comdat functions, which often turns out to be false
218// positive due to the pre-instrumentation inline.
220 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
221 cl::desc("The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
224} // namespace llvm
225
226// Command line option to enable/disable select instruction instrumentation.
227static cl::opt<bool>
228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229 cl::desc("Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
231
232// Command line option to turn on CFG dot or text dump of raw profile counts
234 "pgo-view-raw-counts", cl::Hidden,
235 cl::desc("A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243 clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245// Command line option to enable/disable memop intrinsic call.size profiling.
246static cl::opt<bool>
247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248 cl::desc("Use this option to turn on/off "
249 "memory intrinsic size profiling."));
250
251// Emit branch probability as optimization remarks.
252static cl::opt<bool>
253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254 cl::desc("When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
258
260 "pgo-instrument-entry", cl::init(false), cl::Hidden,
261 cl::desc("Force to instrument function entry basicblock."));
262
264 "pgo-function-entry-coverage", cl::Hidden,
265 cl::desc(
266 "Use this option to enable function entry coverage instrumentation."));
267
269 "pgo-block-coverage",
270 cl::desc("Use this option to enable basic block coverage instrumentation"));
271
272static cl::opt<bool>
273 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
274 cl::desc("Create a dot file of CFGs with block "
275 "coverage inference information"));
276
278 "pgo-temporal-instrumentation",
279 cl::desc("Use this option to enable temporal instrumentation"));
280
281static cl::opt<bool>
282 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
283 cl::desc("Fix function entry count in profile use."));
284
286 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
287 cl::desc("Print out the non-match BFI count if a hot raw profile count "
288 "becomes non-hot, or a cold raw profile count becomes hot. "
289 "The print is enabled under -Rpass-analysis=pgo, or "
290 "internal option -pass-remakrs-analysis=pgo."));
291
293 "pgo-verify-bfi", cl::init(false), cl::Hidden,
294 cl::desc("Print out mismatched BFI counts after setting profile metadata "
295 "The print is enabled under -Rpass-analysis=pgo, or "
296 "internal option -pass-remakrs-analysis=pgo."));
297
299 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
300 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
301 "mismatched BFI if the difference percentage is greater than "
302 "this value (in percentage)."));
303
305 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
306 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
307 "profile count value is below."));
308
310 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
311 cl::value_desc("function name"),
312 cl::desc("Trace the hash of the function with this name."));
313
315 "pgo-function-size-threshold", cl::Hidden,
316 cl::desc("Do not instrument functions smaller than this threshold."));
317
319 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
320 cl::desc("Do not instrument functions with the number of critical edges "
321 " greater than this threshold."));
322
324
325namespace llvm {
326// Command line option to turn on CFG dot dump after profile annotation.
327// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
329
330// Command line option to specify the name of the function for CFG dump
331// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
333
334// Command line option to enable vtable value profiling. Defined in
335// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
339} // namespace llvm
340
342 return PGOInstrumentEntry ||
344}
345
346// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
347// profiling implicitly captures indirect call cases, but not other values.
348// Supporting other values is relatively straight-forward - just another counter
349// range within the context.
351 return DisableValueProfiling ||
353}
354
355// Return a string describing the branch condition that can be
356// used in static branch probability heuristics:
357static std::string getBranchCondString(Instruction *TI) {
358 BranchInst *BI = dyn_cast<BranchInst>(TI);
359 if (!BI || !BI->isConditional())
360 return std::string();
361
362 Value *Cond = BI->getCondition();
363 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
364 if (!CI)
365 return std::string();
366
367 std::string result;
368 raw_string_ostream OS(result);
369 OS << CI->getPredicate() << "_";
370 CI->getOperand(0)->getType()->print(OS, true);
371
372 Value *RHS = CI->getOperand(1);
373 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
374 if (CV) {
375 if (CV->isZero())
376 OS << "_Zero";
377 else if (CV->isOne())
378 OS << "_One";
379 else if (CV->isMinusOne())
380 OS << "_MinusOne";
381 else
382 OS << "_Const";
383 }
384 OS.flush();
385 return result;
386}
387
388static const char *ValueProfKindDescr[] = {
389#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
391};
392
393// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
394// aware this is an ir_level profile so it can set the version flag.
396 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
397 Type *IntTy64 = Type::getInt64Ty(M.getContext());
398 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
399 if (IsCS)
400 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
402 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
404 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
406 ProfileVersion |=
407 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
409 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
411 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
412 auto IRLevelVersionVariable = new GlobalVariable(
413 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
414 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
415 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
416 Triple TT(M.getTargetTriple());
417 if (TT.supportsCOMDAT()) {
418 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
419 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
420 }
421 return IRLevelVersionVariable;
422}
423
424namespace {
425
426/// The select instruction visitor plays three roles specified
427/// by the mode. In \c VM_counting mode, it simply counts the number of
428/// select instructions. In \c VM_instrument mode, it inserts code to count
429/// the number times TrueValue of select is taken. In \c VM_annotate mode,
430/// it reads the profile data and annotate the select instruction with metadata.
431enum VisitMode { VM_counting, VM_instrument, VM_annotate };
432class PGOUseFunc;
433
434/// Instruction Visitor class to visit select instructions.
435struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
436 Function &F;
437 unsigned NSIs = 0; // Number of select instructions instrumented.
438 VisitMode Mode = VM_counting; // Visiting mode.
439 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
440 unsigned TotalNumCtrs = 0; // Total number of counters
441 GlobalVariable *FuncNameVar = nullptr;
442 uint64_t FuncHash = 0;
443 PGOUseFunc *UseFunc = nullptr;
444 bool HasSingleByteCoverage;
445
446 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
447 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
448
449 void countSelects() {
450 NSIs = 0;
451 Mode = VM_counting;
452 visit(F);
453 }
454
455 // Visit the IR stream and instrument all select instructions. \p
456 // Ind is a pointer to the counter index variable; \p TotalNC
457 // is the total number of counters; \p FNV is the pointer to the
458 // PGO function name var; \p FHash is the function hash.
459 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
460 uint64_t FHash) {
461 Mode = VM_instrument;
462 CurCtrIdx = Ind;
463 TotalNumCtrs = TotalNC;
464 FuncHash = FHash;
465 FuncNameVar = FNV;
466 visit(F);
467 }
468
469 // Visit the IR stream and annotate all select instructions.
470 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
471 Mode = VM_annotate;
472 UseFunc = UF;
473 CurCtrIdx = Ind;
474 visit(F);
475 }
476
477 void instrumentOneSelectInst(SelectInst &SI);
478 void annotateOneSelectInst(SelectInst &SI);
479
480 // Visit \p SI instruction and perform tasks according to visit mode.
481 void visitSelectInst(SelectInst &SI);
482
483 // Return the number of select instructions. This needs be called after
484 // countSelects().
485 unsigned getNumOfSelectInsts() const { return NSIs; }
486};
487
488/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
489/// based instrumentation.
490/// Note that the CFG can be a multi-graph. So there might be multiple edges
491/// with the same SrcBB and DestBB.
492struct PGOEdge {
493 BasicBlock *SrcBB;
494 BasicBlock *DestBB;
495 uint64_t Weight;
496 bool InMST = false;
497 bool Removed = false;
498 bool IsCritical = false;
499
500 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
501 : SrcBB(Src), DestBB(Dest), Weight(W) {}
502
503 /// Return the information string of an edge.
504 std::string infoString() const {
505 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
506 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
507 .str();
508 }
509};
510
511/// This class stores the auxiliary information for each BB in the MST.
512struct PGOBBInfo {
513 PGOBBInfo *Group;
515 uint32_t Rank = 0;
516
517 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
518
519 /// Return the information string of this object.
520 std::string infoString() const {
521 return (Twine("Index=") + Twine(Index)).str();
522 }
523};
524
525// This class implements the CFG edges. Note the CFG can be a multi-graph.
526template <class Edge, class BBInfo> class FuncPGOInstrumentation {
527private:
528 Function &F;
529
530 // Is this is context-sensitive instrumentation.
531 bool IsCS;
532
533 // A map that stores the Comdat group in function F.
534 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
535
537
538 void computeCFGHash();
539 void renameComdatFunction();
540
541public:
542 const TargetLibraryInfo &TLI;
543 std::vector<std::vector<VPCandidateInfo>> ValueSites;
544 SelectInstVisitor SIVisitor;
545 std::string FuncName;
546 std::string DeprecatedFuncName;
547 GlobalVariable *FuncNameVar;
548
549 // CFG hash value for this function.
550 uint64_t FunctionHash = 0;
551
552 // The Minimum Spanning Tree of function CFG.
554
555 const std::optional<BlockCoverageInference> BCI;
556
557 static std::optional<BlockCoverageInference>
558 constructBCI(Function &Func, bool HasSingleByteCoverage,
559 bool InstrumentFuncEntry) {
560 if (HasSingleByteCoverage)
561 return BlockCoverageInference(Func, InstrumentFuncEntry);
562 return {};
563 }
564
565 // Collect all the BBs that will be instrumented, and store them in
566 // InstrumentBBs.
567 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
568
569 // Give an edge, find the BB that will be instrumented.
570 // Return nullptr if there is no BB to be instrumented.
571 BasicBlock *getInstrBB(Edge *E);
572
573 // Return the auxiliary BB information.
574 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
575
576 // Return the auxiliary BB information if available.
577 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
578
579 // Dump edges and BB information.
580 void dumpInfo(StringRef Str = "") const {
581 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
582 " Hash: " + Twine(FunctionHash) + "\t" + Str);
583 }
584
585 FuncPGOInstrumentation(
586 Function &Func, TargetLibraryInfo &TLI,
587 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
588 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
589 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
590 bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
591 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
592 TLI(TLI), ValueSites(IPVK_Last + 1),
593 SIVisitor(Func, HasSingleByteCoverage),
594 MST(F, InstrumentFuncEntry, BPI, BFI),
595 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
596 if (BCI && PGOViewBlockCoverageGraph)
597 BCI->viewBlockCoverageGraph();
598 // This should be done before CFG hash computation.
599 SIVisitor.countSelects();
600 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
601 if (!IsCS) {
602 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
603 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
604 NumOfPGOBB += MST.bbInfoSize();
605 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
607 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
608 } else {
609 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
610 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
611 NumOfCSPGOBB += MST.bbInfoSize();
612 }
613
614 FuncName = getIRPGOFuncName(F);
615 DeprecatedFuncName = getPGOFuncName(F);
616 computeCFGHash();
617 if (!ComdatMembers.empty())
618 renameComdatFunction();
619 LLVM_DEBUG(dumpInfo("after CFGMST"));
620
621 for (const auto &E : MST.allEdges()) {
622 if (E->Removed)
623 continue;
624 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
625 if (!E->InMST)
626 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
627 }
628
629 if (CreateGlobalVar)
630 FuncNameVar = createPGOFuncNameVar(F, FuncName);
631 }
632};
633
634} // end anonymous namespace
635
636// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
637// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
638// of selects, indirect calls, mem ops and edges.
639template <class Edge, class BBInfo>
640void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
641 std::vector<uint8_t> Indexes;
642 JamCRC JC;
643 for (auto &BB : F) {
644 for (BasicBlock *Succ : successors(&BB)) {
645 auto BI = findBBInfo(Succ);
646 if (BI == nullptr)
647 continue;
648 uint32_t Index = BI->Index;
649 for (int J = 0; J < 4; J++)
650 Indexes.push_back((uint8_t)(Index >> (J * 8)));
651 }
652 }
653 JC.update(Indexes);
654
655 JamCRC JCH;
656 // The higher 32 bits.
657 auto updateJCH = [&JCH](uint64_t Num) {
658 uint8_t Data[8];
660 JCH.update(Data);
661 };
662 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
663 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
664 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
665 if (BCI) {
666 updateJCH(BCI->getInstrumentedBlocksHash());
667 } else {
668 updateJCH((uint64_t)MST.numEdges());
669 }
670
671 // Hash format for context sensitive profile. Reserve 4 bits for other
672 // information.
673 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
674
675 // Reserve bit 60-63 for other information purpose.
676 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
677 if (IsCS)
679 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
680 << " CRC = " << JC.getCRC()
681 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
682 << ", Edges = " << MST.numEdges() << ", ICSites = "
683 << ValueSites[IPVK_IndirectCallTarget].size()
684 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
685 << ", High32 CRC = " << JCH.getCRC()
686 << ", Hash = " << FunctionHash << "\n";);
687
688 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
689 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
690 << " in building " << F.getParent()->getSourceFileName() << "\n";
691}
692
693// Check if we can safely rename this Comdat function.
694static bool canRenameComdat(
695 Function &F,
696 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
697 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
698 return false;
699
700 // FIXME: Current only handle those Comdat groups that only containing one
701 // function.
702 // (1) For a Comdat group containing multiple functions, we need to have a
703 // unique postfix based on the hashes for each function. There is a
704 // non-trivial code refactoring to do this efficiently.
705 // (2) Variables can not be renamed, so we can not rename Comdat function in a
706 // group including global vars.
707 Comdat *C = F.getComdat();
708 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
709 assert(!isa<GlobalAlias>(CM.second));
710 Function *FM = dyn_cast<Function>(CM.second);
711 if (FM != &F)
712 return false;
713 }
714 return true;
715}
716
717// Append the CFGHash to the Comdat function name.
718template <class Edge, class BBInfo>
719void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
720 if (!canRenameComdat(F, ComdatMembers))
721 return;
722 std::string OrigName = F.getName().str();
723 std::string NewFuncName =
724 Twine(F.getName() + "." + Twine(FunctionHash)).str();
725 F.setName(Twine(NewFuncName));
727 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
728 Comdat *NewComdat;
729 Module *M = F.getParent();
730 // For AvailableExternallyLinkage functions, change the linkage to
731 // LinkOnceODR and put them into comdat. This is because after renaming, there
732 // is no backup external copy available for the function.
733 if (!F.hasComdat()) {
735 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
737 F.setComdat(NewComdat);
738 return;
739 }
740
741 // This function belongs to a single function Comdat group.
742 Comdat *OrigComdat = F.getComdat();
743 std::string NewComdatName =
744 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
745 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
746 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
747
748 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
749 // Must be a function.
750 cast<Function>(CM.second)->setComdat(NewComdat);
751 }
752}
753
754/// Collect all the BBs that will be instruments and add them to
755/// `InstrumentBBs`.
756template <class Edge, class BBInfo>
757void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
758 std::vector<BasicBlock *> &InstrumentBBs) {
759 if (BCI) {
760 for (auto &BB : F)
761 if (BCI->shouldInstrumentBlock(BB))
762 InstrumentBBs.push_back(&BB);
763 return;
764 }
765
766 // Use a worklist as we will update the vector during the iteration.
767 std::vector<Edge *> EdgeList;
768 EdgeList.reserve(MST.numEdges());
769 for (const auto &E : MST.allEdges())
770 EdgeList.push_back(E.get());
771
772 for (auto &E : EdgeList) {
773 BasicBlock *InstrBB = getInstrBB(E);
774 if (InstrBB)
775 InstrumentBBs.push_back(InstrBB);
776 }
777}
778
779// Given a CFG E to be instrumented, find which BB to place the instrumented
780// code. The function will split the critical edge if necessary.
781template <class Edge, class BBInfo>
782BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
783 if (E->InMST || E->Removed)
784 return nullptr;
785
786 BasicBlock *SrcBB = E->SrcBB;
787 BasicBlock *DestBB = E->DestBB;
788 // For a fake edge, instrument the real BB.
789 if (SrcBB == nullptr)
790 return DestBB;
791 if (DestBB == nullptr)
792 return SrcBB;
793
794 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
795 // There are basic blocks (such as catchswitch) cannot be instrumented.
796 // If the returned first insertion point is the end of BB, skip this BB.
797 if (BB->getFirstInsertionPt() == BB->end())
798 return nullptr;
799 return BB;
800 };
801
802 // Instrument the SrcBB if it has a single successor,
803 // otherwise, the DestBB if this is not a critical edge.
804 Instruction *TI = SrcBB->getTerminator();
805 if (TI->getNumSuccessors() <= 1)
806 return canInstrument(SrcBB);
807 if (!E->IsCritical)
808 return canInstrument(DestBB);
809
810 // Some IndirectBr critical edges cannot be split by the previous
811 // SplitIndirectBrCriticalEdges call. Bail out.
812 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
813 BasicBlock *InstrBB =
814 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
815 if (!InstrBB) {
817 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
818 return nullptr;
819 }
820 // For a critical edge, we have to split. Instrument the newly
821 // created BB.
822 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
823 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
824 << " --> " << getBBInfo(DestBB).Index << "\n");
825 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
826 MST.addEdge(SrcBB, InstrBB, 0);
827 // Second one: Add new edge of InstrBB->DestBB.
828 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
829 NewEdge1.InMST = true;
830 E->Removed = true;
831
832 return canInstrument(InstrBB);
833}
834
835// When generating value profiling calls on Windows routines that make use of
836// handler funclets for exception processing an operand bundle needs to attached
837// to the called function. This routine will set \p OpBundles to contain the
838// funclet information, if any is needed, that should be placed on the generated
839// value profiling call for the value profile candidate call.
840static void
844 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
845 if (!OrigCall)
846 return;
847
848 if (!isa<IntrinsicInst>(OrigCall)) {
849 // The instrumentation call should belong to the same funclet as a
850 // non-intrinsic call, so just copy the operand bundle, if any exists.
851 std::optional<OperandBundleUse> ParentFunclet =
852 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
853 if (ParentFunclet)
854 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
855 } else {
856 // Intrinsics or other instructions do not get funclet information from the
857 // front-end. Need to use the BlockColors that was computed by the routine
858 // colorEHFunclets to determine whether a funclet is needed.
859 if (!BlockColors.empty()) {
860 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
861 assert(CV.size() == 1 && "non-unique color for block!");
862 Instruction *EHPad = CV.front()->getFirstNonPHI();
863 if (EHPad->isEHPad())
864 OpBundles.emplace_back("funclet", EHPad);
865 }
866 }
867}
868
869// Visit all edge and instrument the edges not in MST, and do value profiling.
870// Critical edges will be split.
874 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
875 bool IsCS) {
876 if (!PGOBlockCoverage) {
877 // Split indirectbr critical edges here before computing the MST rather than
878 // later in getInstrBB() to avoid invalidating it.
879 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
880 }
881
882 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
883 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
885
886 auto Name = FuncInfo.FuncNameVar;
887 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
888 FuncInfo.FunctionHash);
890 auto &EntryBB = F.getEntryBlock();
891 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
892 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
893 // i32 <index>)
894 Builder.CreateCall(
895 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
896 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
897 return;
898 }
899
900 std::vector<BasicBlock *> InstrumentBBs;
901 FuncInfo.getInstrumentBBs(InstrumentBBs);
902 unsigned NumCounters =
903 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
904
906 auto *CSIntrinsic =
907 Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
908 // We want to count the instrumentable callsites, then instrument them. This
909 // is because the llvm.instrprof.callsite intrinsic has an argument (like
910 // the other instrprof intrinsics) capturing the total number of
911 // instrumented objects (counters, or callsites, in this case). In this
912 // case, we want that value so we can readily pass it to the compiler-rt
913 // APIs that may have to allocate memory based on the nr of callsites.
914 // The traversal logic is the same for both counting and instrumentation,
915 // just needs to be done in succession.
916 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
917 for (auto &BB : F)
918 for (auto &Instr : BB)
919 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
920 if ((CS->getCalledFunction() &&
921 CS->getCalledFunction()->isIntrinsic()) ||
922 dyn_cast<InlineAsm>(CS->getCalledOperand()))
923 continue;
924 Visitor(CS);
925 }
926 };
927 // First, count callsites.
928 uint32_t TotalNrCallsites = 0;
929 Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
930
931 // Now instrument.
932 uint32_t CallsiteIndex = 0;
933 Visit([&](auto *CB) {
934 IRBuilder<> Builder(CB);
935 Builder.CreateCall(CSIntrinsic,
936 {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
937 Builder.getInt32(CallsiteIndex++),
938 CB->getCalledOperand()});
939 });
940 }
941
942 uint32_t I = 0;
944 NumCounters += PGOBlockCoverage ? 8 : 1;
945 auto &EntryBB = F.getEntryBlock();
946 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
947 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
948 // i32 <index>)
949 Builder.CreateCall(
950 Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
951 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
952 I += PGOBlockCoverage ? 8 : 1;
953 }
954
955 for (auto *InstrBB : InstrumentBBs) {
956 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
957 assert(Builder.GetInsertPoint() != InstrBB->end() &&
958 "Cannot get the Instrumentation point");
959 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
960 // i32 <index>)
961 Builder.CreateCall(
963 ? Intrinsic::instrprof_cover
964 : Intrinsic::instrprof_increment),
965 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
966 }
967
968 // Now instrument select instructions:
969 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
970 FuncInfo.FunctionHash);
971 assert(I == NumCounters);
972
974 return;
975
976 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
977
978 // Intrinsic function calls do not have funclet operand bundles needed for
979 // Windows exception handling attached to them. However, if value profiling is
980 // inserted for one of these calls, then a funclet value will need to be set
981 // on the instrumentation call based on the funclet coloring.
983 if (F.hasPersonalityFn() &&
984 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
985 BlockColors = colorEHFunclets(F);
986
987 // For each VP Kind, walk the VP candidates and instrument each one.
988 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
989 unsigned SiteIndex = 0;
990 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
991 continue;
992
993 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
994 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
995 << " site: CallSite Index = " << SiteIndex << "\n");
996
997 IRBuilder<> Builder(Cand.InsertPt);
998 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
999 "Cannot get the Instrumentation point");
1000
1001 Value *ToProfile = nullptr;
1002 if (Cand.V->getType()->isIntegerTy())
1003 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1004 else if (Cand.V->getType()->isPointerTy())
1005 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1006 assert(ToProfile && "value profiling Value is of unexpected type");
1007
1009 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1010 Builder.CreateCall(
1011 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1012 {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1013 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1014 OpBundles);
1015 }
1016 } // IPVK_First <= Kind <= IPVK_Last
1017}
1018
1019namespace {
1020
1021// This class represents a CFG edge in profile use compilation.
1022struct PGOUseEdge : public PGOEdge {
1023 using PGOEdge::PGOEdge;
1024
1025 std::optional<uint64_t> Count;
1026
1027 // Set edge count value
1028 void setEdgeCount(uint64_t Value) { Count = Value; }
1029
1030 // Return the information string for this object.
1031 std::string infoString() const {
1032 if (!Count)
1033 return PGOEdge::infoString();
1034 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1035 }
1036};
1037
1038using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1039
1040// This class stores the auxiliary information for each BB.
1041struct PGOUseBBInfo : public PGOBBInfo {
1042 std::optional<uint64_t> Count;
1043 int32_t UnknownCountInEdge = 0;
1044 int32_t UnknownCountOutEdge = 0;
1045 DirectEdges InEdges;
1046 DirectEdges OutEdges;
1047
1048 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1049
1050 // Set the profile count value for this BB.
1051 void setBBInfoCount(uint64_t Value) { Count = Value; }
1052
1053 // Return the information string of this object.
1054 std::string infoString() const {
1055 if (!Count)
1056 return PGOBBInfo::infoString();
1057 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1058 }
1059
1060 // Add an OutEdge and update the edge count.
1061 void addOutEdge(PGOUseEdge *E) {
1062 OutEdges.push_back(E);
1063 UnknownCountOutEdge++;
1064 }
1065
1066 // Add an InEdge and update the edge count.
1067 void addInEdge(PGOUseEdge *E) {
1068 InEdges.push_back(E);
1069 UnknownCountInEdge++;
1070 }
1071};
1072
1073} // end anonymous namespace
1074
1075// Sum up the count values for all the edges.
1077 uint64_t Total = 0;
1078 for (const auto &E : Edges) {
1079 if (E->Removed)
1080 continue;
1081 if (E->Count)
1082 Total += *E->Count;
1083 }
1084 return Total;
1085}
1086
1087namespace {
1088
1089class PGOUseFunc {
1090public:
1091 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1092 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1094 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1095 bool HasSingleByteCoverage)
1096 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1097 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1098 InstrumentFuncEntry, HasSingleByteCoverage),
1099 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1100
1101 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1102
1103 // Read counts for the instrumented BB from profile.
1104 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1106
1107 // Populate the counts for all BBs.
1108 void populateCounters();
1109
1110 // Set block coverage based on profile coverage values.
1111 void populateCoverage(IndexedInstrProfReader *PGOReader);
1112
1113 // Set the branch weights based on the count values.
1114 void setBranchWeights();
1115
1116 // Annotate the value profile call sites for all value kind.
1117 void annotateValueSites();
1118
1119 // Annotate the value profile call sites for one value kind.
1120 void annotateValueSites(uint32_t Kind);
1121
1122 // Annotate the irreducible loop header weights.
1123 void annotateIrrLoopHeaderWeights();
1124
1125 // The hotness of the function from the profile count.
1126 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1127
1128 // Return the function hotness from the profile.
1129 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1130
1131 // Return the function hash.
1132 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1133
1134 // Return the profile record for this function;
1135 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1136
1137 // Return the auxiliary BB information.
1138 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1139 return FuncInfo.getBBInfo(BB);
1140 }
1141
1142 // Return the auxiliary BB information if available.
1143 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1144 return FuncInfo.findBBInfo(BB);
1145 }
1146
1147 Function &getFunc() const { return F; }
1148
1149 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1150
1151 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1152
1153private:
1154 Function &F;
1155 Module *M;
1157 ProfileSummaryInfo *PSI;
1158
1159 // This member stores the shared information with class PGOGenFunc.
1160 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1161
1162 // The maximum count value in the profile. This is only used in PGO use
1163 // compilation.
1164 uint64_t ProgramMaxCount;
1165
1166 // Position of counter that remains to be read.
1167 uint32_t CountPosition = 0;
1168
1169 // Total size of the profile count for this function.
1170 uint32_t ProfileCountSize = 0;
1171
1172 // ProfileRecord for this function.
1173 InstrProfRecord ProfileRecord;
1174
1175 // Function hotness info derived from profile.
1176 FuncFreqAttr FreqAttr;
1177
1178 // Is to use the context sensitive profile.
1179 bool IsCS;
1180
1181 // Find the Instrumented BB and set the value. Return false on error.
1182 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1183
1184 // Set the edge counter value for the unknown edge -- there should be only
1185 // one unknown edge.
1186 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1187
1188 // Set the hot/cold inline hints based on the count values.
1189 // FIXME: This function should be removed once the functionality in
1190 // the inliner is implemented.
1191 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1192 if (PSI->isHotCount(EntryCount))
1193 FreqAttr = FFA_Hot;
1194 else if (PSI->isColdCount(MaxCount))
1195 FreqAttr = FFA_Cold;
1196 }
1197};
1198
1199} // end anonymous namespace
1200
1201/// Set up InEdges/OutEdges for all BBs in the MST.
1203 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1204 // This is not required when there is block coverage inference.
1205 if (FuncInfo.BCI)
1206 return;
1207 for (const auto &E : FuncInfo.MST.allEdges()) {
1208 if (E->Removed)
1209 continue;
1210 const BasicBlock *SrcBB = E->SrcBB;
1211 const BasicBlock *DestBB = E->DestBB;
1212 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1213 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1214 SrcInfo.addOutEdge(E.get());
1215 DestInfo.addInEdge(E.get());
1216 }
1217}
1218
1219// Visit all the edges and assign the count value for the instrumented
1220// edges and the BB. Return false on error.
1221bool PGOUseFunc::setInstrumentedCounts(
1222 const std::vector<uint64_t> &CountFromProfile) {
1223
1224 std::vector<BasicBlock *> InstrumentBBs;
1225 FuncInfo.getInstrumentBBs(InstrumentBBs);
1226
1227 setupBBInfoEdges(FuncInfo);
1228
1229 unsigned NumCounters =
1230 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1231 // The number of counters here should match the number of counters
1232 // in profile. Return if they mismatch.
1233 if (NumCounters != CountFromProfile.size()) {
1234 return false;
1235 }
1236 auto *FuncEntry = &*F.begin();
1237
1238 // Set the profile count to the Instrumented BBs.
1239 uint32_t I = 0;
1240 for (BasicBlock *InstrBB : InstrumentBBs) {
1241 uint64_t CountValue = CountFromProfile[I++];
1242 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1243 // If we reach here, we know that we have some nonzero count
1244 // values in this function. The entry count should not be 0.
1245 // Fix it if necessary.
1246 if (InstrBB == FuncEntry && CountValue == 0)
1247 CountValue = 1;
1248 Info.setBBInfoCount(CountValue);
1249 }
1250 ProfileCountSize = CountFromProfile.size();
1251 CountPosition = I;
1252
1253 // Set the edge count and update the count of unknown edges for BBs.
1254 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1255 E->setEdgeCount(Value);
1256 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1257 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1258 };
1259
1260 // Set the profile count the Instrumented edges. There are BBs that not in
1261 // MST but not instrumented. Need to set the edge count value so that we can
1262 // populate the profile counts later.
1263 for (const auto &E : FuncInfo.MST.allEdges()) {
1264 if (E->Removed || E->InMST)
1265 continue;
1266 const BasicBlock *SrcBB = E->SrcBB;
1267 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1268
1269 // If only one out-edge, the edge profile count should be the same as BB
1270 // profile count.
1271 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1272 setEdgeCount(E.get(), *SrcInfo.Count);
1273 else {
1274 const BasicBlock *DestBB = E->DestBB;
1275 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1276 // If only one in-edge, the edge profile count should be the same as BB
1277 // profile count.
1278 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1279 setEdgeCount(E.get(), *DestInfo.Count);
1280 }
1281 if (E->Count)
1282 continue;
1283 // E's count should have been set from profile. If not, this meenas E skips
1284 // the instrumentation. We set the count to 0.
1285 setEdgeCount(E.get(), 0);
1286 }
1287 return true;
1288}
1289
1290// Set the count value for the unknown edge. There should be one and only one
1291// unknown edge in Edges vector.
1292void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1293 for (auto &E : Edges) {
1294 if (E->Count)
1295 continue;
1296 E->setEdgeCount(Value);
1297
1298 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1299 getBBInfo(E->DestBB).UnknownCountInEdge--;
1300 return;
1301 }
1302 llvm_unreachable("Cannot find the unknown count edge");
1303}
1304
1305// Emit function metadata indicating PGO profile mismatch.
1307 const char MetadataName[] = "instr_prof_hash_mismatch";
1309 // If this metadata already exists, ignore.
1310 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1311 if (Existing) {
1312 MDTuple *Tuple = cast<MDTuple>(Existing);
1313 for (const auto &N : Tuple->operands()) {
1314 if (N.equalsStr(MetadataName))
1315 return;
1316 Names.push_back(N.get());
1317 }
1318 }
1319
1320 MDBuilder MDB(ctx);
1321 Names.push_back(MDB.createString(MetadataName));
1322 MDNode *MD = MDTuple::get(ctx, Names);
1323 F.setMetadata(LLVMContext::MD_annotation, MD);
1324}
1325
1326void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1327 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1328 auto &Ctx = M->getContext();
1329 auto Err = IPE.get();
1330 bool SkipWarning = false;
1331 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1332 << FuncInfo.FuncName << ": ");
1333 if (Err == instrprof_error::unknown_function) {
1334 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1335 SkipWarning = !PGOWarnMissing;
1336 LLVM_DEBUG(dbgs() << "unknown function");
1337 } else if (Err == instrprof_error::hash_mismatch ||
1338 Err == instrprof_error::malformed) {
1339 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1340 SkipWarning =
1343 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1345 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1346 << " skip=" << SkipWarning << ")");
1347 // Emit function metadata indicating PGO profile mismatch.
1348 annotateFunctionWithHashMismatch(F, M->getContext());
1349 }
1350
1351 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1352 if (SkipWarning)
1353 return;
1354
1355 std::string Msg =
1356 IPE.message() + std::string(" ") + F.getName().str() +
1357 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1358 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1359 std::string(" count discarded");
1360
1361 Ctx.diagnose(
1362 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1363 });
1364}
1365
1366// Read the profile from ProfileFileName and assign the value to the
1367// instrumented BB and the edges. This function also updates ProgramMaxCount.
1368// Return true if the profile are successfully read, and false on errors.
1369bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1371 auto &Ctx = M->getContext();
1372 uint64_t MismatchedFuncSum = 0;
1374 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1375 &MismatchedFuncSum);
1376 if (Error E = Result.takeError()) {
1377 handleInstrProfError(std::move(E), MismatchedFuncSum);
1378 return false;
1379 }
1380 ProfileRecord = std::move(Result.get());
1381 PseudoKind = ProfileRecord.getCountPseudoKind();
1382 if (PseudoKind != InstrProfRecord::NotPseudo) {
1383 return true;
1384 }
1385 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1386
1387 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1388 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1389
1390 uint64_t ValueSum = 0;
1391 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1392 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1393 ValueSum += CountFromProfile[I];
1394 }
1395 AllZeros = (ValueSum == 0);
1396
1397 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1398
1399 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1400 getBBInfo(nullptr).UnknownCountInEdge = 2;
1401
1402 if (!setInstrumentedCounts(CountFromProfile)) {
1403 LLVM_DEBUG(
1404 dbgs() << "Inconsistent number of counts, skipping this function");
1405 Ctx.diagnose(DiagnosticInfoPGOProfile(
1406 M->getName().data(),
1407 Twine("Inconsistent number of counts in ") + F.getName().str() +
1408 Twine(": the profile may be stale or there is a function name "
1409 "collision."),
1410 DS_Warning));
1411 return false;
1412 }
1413 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1414 return true;
1415}
1416
1417void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1418 uint64_t MismatchedFuncSum = 0;
1420 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1421 &MismatchedFuncSum);
1422 if (auto Err = Result.takeError()) {
1423 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1424 return;
1425 }
1426 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1427
1428 std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1430 unsigned Index = 0;
1431 for (auto &BB : F)
1432 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1433 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1434 assert(Index == CountsFromProfile.size());
1435
1436 // For each B in InverseDependencies[A], if A is covered then B is covered.
1438 InverseDependencies;
1439 for (auto &BB : F) {
1440 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1441 // If Dep is covered then BB is covered.
1442 InverseDependencies[Dep].insert(&BB);
1443 }
1444 }
1445
1446 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1447 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1448 for (auto &[BB, IsCovered] : Coverage)
1449 if (IsCovered)
1450 CoveredBlocksToProcess.push(BB);
1451
1452 while (!CoveredBlocksToProcess.empty()) {
1453 auto *CoveredBlock = CoveredBlocksToProcess.top();
1454 assert(Coverage[CoveredBlock]);
1455 CoveredBlocksToProcess.pop();
1456 for (auto *BB : InverseDependencies[CoveredBlock]) {
1457 // If CoveredBlock is covered then BB is covered.
1458 if (Coverage[BB])
1459 continue;
1460 Coverage[BB] = true;
1461 CoveredBlocksToProcess.push(BB);
1462 }
1463 }
1464
1465 // Annotate block coverage.
1466 MDBuilder MDB(F.getContext());
1467 // We set the entry count to 10000 if the entry block is covered so that BFI
1468 // can propagate a fraction of this count to the other covered blocks.
1469 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1470 for (auto &BB : F) {
1471 // For a block A and its successor B, we set the edge weight as follows:
1472 // If A is covered and B is covered, set weight=1.
1473 // If A is covered and B is uncovered, set weight=0.
1474 // If A is uncovered, set weight=1.
1475 // This setup will allow BFI to give nonzero profile counts to only covered
1476 // blocks.
1478 for (auto *Succ : successors(&BB))
1479 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1480 if (Weights.size() >= 2)
1481 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1482 /*IsExpected=*/false);
1483 }
1484
1485 unsigned NumCorruptCoverage = 0;
1486 DominatorTree DT(F);
1487 LoopInfo LI(DT);
1488 BranchProbabilityInfo BPI(F, LI);
1489 BlockFrequencyInfo BFI(F, BPI, LI);
1490 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1491 if (auto C = BFI.getBlockProfileCount(&BB))
1492 return C == 0;
1493 return {};
1494 };
1495 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1496 for (auto &BB : F) {
1497 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1498 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1499 << "\n");
1500 // In some cases it is possible to find a covered block that has no covered
1501 // successors, e.g., when a block calls a function that may call exit(). In
1502 // those cases, BFI could find its successor to be covered while BCI could
1503 // find its successor to be dead.
1504 if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1505 LLVM_DEBUG(
1506 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1507 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1508 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1509 ++NumCorruptCoverage;
1510 }
1511 if (Coverage[&BB])
1512 ++NumCoveredBlocks;
1513 }
1514 if (PGOVerifyBFI && NumCorruptCoverage) {
1515 auto &Ctx = M->getContext();
1516 Ctx.diagnose(DiagnosticInfoPGOProfile(
1517 M->getName().data(),
1518 Twine("Found inconsistent block coverage for function ") + F.getName() +
1519 " in " + Twine(NumCorruptCoverage) + " blocks.",
1520 DS_Warning));
1521 }
1523 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1524}
1525
1526// Populate the counters from instrumented BBs to all BBs.
1527// In the end of this operation, all BBs should have a valid count value.
1528void PGOUseFunc::populateCounters() {
1529 bool Changes = true;
1530 unsigned NumPasses = 0;
1531 while (Changes) {
1532 NumPasses++;
1533 Changes = false;
1534
1535 // For efficient traversal, it's better to start from the end as most
1536 // of the instrumented edges are at the end.
1537 for (auto &BB : reverse(F)) {
1538 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1539 if (UseBBInfo == nullptr)
1540 continue;
1541 if (!UseBBInfo->Count) {
1542 if (UseBBInfo->UnknownCountOutEdge == 0) {
1543 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1544 Changes = true;
1545 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1546 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1547 Changes = true;
1548 }
1549 }
1550 if (UseBBInfo->Count) {
1551 if (UseBBInfo->UnknownCountOutEdge == 1) {
1552 uint64_t Total = 0;
1553 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1554 // If the one of the successor block can early terminate (no-return),
1555 // we can end up with situation where out edge sum count is larger as
1556 // the source BB's count is collected by a post-dominated block.
1557 if (*UseBBInfo->Count > OutSum)
1558 Total = *UseBBInfo->Count - OutSum;
1559 setEdgeCount(UseBBInfo->OutEdges, Total);
1560 Changes = true;
1561 }
1562 if (UseBBInfo->UnknownCountInEdge == 1) {
1563 uint64_t Total = 0;
1564 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1565 if (*UseBBInfo->Count > InSum)
1566 Total = *UseBBInfo->Count - InSum;
1567 setEdgeCount(UseBBInfo->InEdges, Total);
1568 Changes = true;
1569 }
1570 }
1571 }
1572 }
1573
1574 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1575 (void)NumPasses;
1576#ifndef NDEBUG
1577 // Assert every BB has a valid counter.
1578 for (auto &BB : F) {
1579 auto BI = findBBInfo(&BB);
1580 if (BI == nullptr)
1581 continue;
1582 assert(BI->Count && "BB count is not valid");
1583 }
1584#endif
1585 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1586 uint64_t FuncMaxCount = FuncEntryCount;
1587 for (auto &BB : F) {
1588 auto BI = findBBInfo(&BB);
1589 if (BI == nullptr)
1590 continue;
1591 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1592 }
1593
1594 // Fix the obviously inconsistent entry count.
1595 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1596 FuncEntryCount = 1;
1598 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1599
1600 // Now annotate select instructions
1601 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1602 assert(CountPosition == ProfileCountSize);
1603
1604 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1605}
1606
1607// Assign the scaled count values to the BB with multiple out edges.
1608void PGOUseFunc::setBranchWeights() {
1609 // Generate MD_prof metadata for every branch instruction.
1610 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1611 << " IsCS=" << IsCS << "\n");
1612 for (auto &BB : F) {
1613 Instruction *TI = BB.getTerminator();
1614 if (TI->getNumSuccessors() < 2)
1615 continue;
1616 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1617 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1618 isa<CallBrInst>(TI)))
1619 continue;
1620
1621 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1622 if (!*BBCountInfo.Count)
1623 continue;
1624
1625 // We have a non-zero Branch BB.
1626 unsigned Size = BBCountInfo.OutEdges.size();
1627 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1628 uint64_t MaxCount = 0;
1629 for (unsigned s = 0; s < Size; s++) {
1630 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1631 const BasicBlock *SrcBB = E->SrcBB;
1632 const BasicBlock *DestBB = E->DestBB;
1633 if (DestBB == nullptr)
1634 continue;
1635 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1636 uint64_t EdgeCount = *E->Count;
1637 if (EdgeCount > MaxCount)
1638 MaxCount = EdgeCount;
1639 EdgeCounts[SuccNum] = EdgeCount;
1640 }
1641
1642 if (MaxCount)
1643 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1644 else {
1645 // A zero MaxCount can come about when we have a BB with a positive
1646 // count, and whose successor blocks all have 0 count. This can happen
1647 // when there is no exit block and the code exits via a noreturn function.
1648 auto &Ctx = M->getContext();
1649 Ctx.diagnose(DiagnosticInfoPGOProfile(
1650 M->getName().data(),
1651 Twine("Profile in ") + F.getName().str() +
1652 Twine(" partially ignored") +
1653 Twine(", possibly due to the lack of a return path."),
1654 DS_Warning));
1655 }
1656 }
1657}
1658
1660 for (BasicBlock *Pred : predecessors(BB)) {
1661 if (isa<IndirectBrInst>(Pred->getTerminator()))
1662 return true;
1663 }
1664 return false;
1665}
1666
1667void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1668 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1669 // Find irr loop headers
1670 for (auto &BB : F) {
1671 // As a heuristic also annotate indrectbr targets as they have a high chance
1672 // to become an irreducible loop header after the indirectbr tail
1673 // duplication.
1674 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1675 Instruction *TI = BB.getTerminator();
1676 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1677 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1678 }
1679 }
1680}
1681
1682void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1683 Module *M = F.getParent();
1684 IRBuilder<> Builder(&SI);
1685 Type *Int64Ty = Builder.getInt64Ty();
1686 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1687 Builder.CreateCall(
1688 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1689 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1690 Builder.getInt32(*CurCtrIdx), Step});
1691 ++(*CurCtrIdx);
1692}
1693
1694void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1695 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1696 assert(*CurCtrIdx < CountFromProfile.size() &&
1697 "Out of bound access of counters");
1698 uint64_t SCounts[2];
1699 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1700 ++(*CurCtrIdx);
1701 uint64_t TotalCount = 0;
1702 auto BI = UseFunc->findBBInfo(SI.getParent());
1703 if (BI != nullptr)
1704 TotalCount = *BI->Count;
1705 // False Count
1706 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1707 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1708 if (MaxCount)
1709 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1710}
1711
1712void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1713 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1714 return;
1715 // FIXME: do not handle this yet.
1716 if (SI.getCondition()->getType()->isVectorTy())
1717 return;
1718
1719 switch (Mode) {
1720 case VM_counting:
1721 NSIs++;
1722 return;
1723 case VM_instrument:
1724 instrumentOneSelectInst(SI);
1725 return;
1726 case VM_annotate:
1727 annotateOneSelectInst(SI);
1728 return;
1729 }
1730
1731 llvm_unreachable("Unknown visiting mode");
1732}
1733
1735 if (ValueProfKind == IPVK_MemOPSize)
1737 if (ValueProfKind == llvm::IPVK_VTableTarget)
1739 return MaxNumAnnotations;
1740}
1741
1742// Traverse all valuesites and annotate the instructions for all value kind.
1743void PGOUseFunc::annotateValueSites() {
1745 return;
1746
1747 // Create the PGOFuncName meta data.
1748 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1749
1750 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1751 annotateValueSites(Kind);
1752}
1753
1754// Annotate the instructions for a specific value kind.
1755void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1756 assert(Kind <= IPVK_Last);
1757 unsigned ValueSiteIndex = 0;
1758 auto &ValueSites = FuncInfo.ValueSites[Kind];
1759 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1760 if (NumValueSites != ValueSites.size()) {
1761 auto &Ctx = M->getContext();
1762 Ctx.diagnose(DiagnosticInfoPGOProfile(
1763 M->getName().data(),
1764 Twine("Inconsistent number of value sites for ") +
1765 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1766 F.getName().str() +
1767 Twine("\", possibly due to the use of a stale profile."),
1768 DS_Warning));
1769 return;
1770 }
1771
1772 for (VPCandidateInfo &I : ValueSites) {
1773 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1774 << "): Index = " << ValueSiteIndex << " out of "
1775 << NumValueSites << "\n");
1777 *M, *I.AnnotatedInst, ProfileRecord,
1778 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1779 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1780 ValueSiteIndex++;
1781 }
1782}
1783
1784// Collect the set of members for each Comdat in module M and store
1785// in ComdatMembers.
1787 Module &M,
1788 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1789 if (!DoComdatRenaming)
1790 return;
1791 for (Function &F : M)
1792 if (Comdat *C = F.getComdat())
1793 ComdatMembers.insert(std::make_pair(C, &F));
1794 for (GlobalVariable &GV : M.globals())
1795 if (Comdat *C = GV.getComdat())
1796 ComdatMembers.insert(std::make_pair(C, &GV));
1797 for (GlobalAlias &GA : M.aliases())
1798 if (Comdat *C = GA.getComdat())
1799 ComdatMembers.insert(std::make_pair(C, &GA));
1800}
1801
1802// Return true if we should not find instrumentation data for this function
1803static bool skipPGOUse(const Function &F) {
1804 if (F.isDeclaration())
1805 return true;
1806 // If there are too many critical edges, PGO might cause
1807 // compiler time problem. Skip PGO if the number of
1808 // critical edges execeed the threshold.
1809 unsigned NumCriticalEdges = 0;
1810 for (auto &BB : F) {
1811 const Instruction *TI = BB.getTerminator();
1812 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1813 if (isCriticalEdge(TI, I))
1814 NumCriticalEdges++;
1815 }
1816 }
1817 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1818 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1819 << ", NumCriticalEdges=" << NumCriticalEdges
1820 << " exceed the threshold. Skip PGO.\n");
1821 return true;
1822 }
1823 return false;
1824}
1825
1826// Return true if we should not instrument this function
1827static bool skipPGOGen(const Function &F) {
1828 if (skipPGOUse(F))
1829 return true;
1830 if (F.hasFnAttribute(llvm::Attribute::Naked))
1831 return true;
1832 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1833 return true;
1834 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1835 return true;
1836 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1837 return true;
1838 return false;
1839}
1840
1842 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1844 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1845 // For the context-sensitve instrumentation, we should have a separated pass
1846 // (before LTO/ThinLTO linking) to create these variables.
1848 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1849
1850 Triple TT(M.getTargetTriple());
1851 LLVMContext &Ctx = M.getContext();
1852 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1854 M.getName().data(),
1855 Twine("VTable value profiling is presently not "
1856 "supported for non-ELF object formats"),
1857 DS_Warning));
1858 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1859 collectComdatMembers(M, ComdatMembers);
1860
1861 for (auto &F : M) {
1862 if (skipPGOGen(F))
1863 continue;
1864 auto &TLI = LookupTLI(F);
1865 auto *BPI = LookupBPI(F);
1866 auto *BFI = LookupBFI(F);
1867 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1868 }
1869 return true;
1870}
1871
1874 createProfileFileNameVar(M, CSInstrName);
1875 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1876 // will be retained.
1881 return PA;
1882}
1883
1886 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1887 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1889 };
1890 auto LookupBPI = [&FAM](Function &F) {
1892 };
1893 auto LookupBFI = [&FAM](Function &F) {
1895 };
1896
1897 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1898 return PreservedAnalyses::all();
1899
1900 return PreservedAnalyses::none();
1901}
1902
1903// Using the ratio b/w sums of profile count values and BFI count values to
1904// adjust the func entry count.
1905static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1906 BranchProbabilityInfo &NBPI) {
1907 Function &F = Func.getFunc();
1908 BlockFrequencyInfo NBFI(F, NBPI, LI);
1909#ifndef NDEBUG
1910 auto BFIEntryCount = F.getEntryCount();
1911 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1912 "Invalid BFI Entrycount");
1913#endif
1914 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1915 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1916 for (auto &BBI : F) {
1917 uint64_t CountValue = 0;
1918 uint64_t BFICountValue = 0;
1919 if (!Func.findBBInfo(&BBI))
1920 continue;
1921 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1922 CountValue = *Func.getBBInfo(&BBI).Count;
1923 BFICountValue = *BFICount;
1924 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1925 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1926 }
1927 if (SumCount.isZero())
1928 return;
1929
1930 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1931 "Incorrect sum of BFI counts");
1932 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1933 return;
1934 double Scale = (SumCount / SumBFICount).convertToDouble();
1935 if (Scale < 1.001 && Scale > 0.999)
1936 return;
1937
1938 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1939 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1940 if (NewEntryCount == 0)
1941 NewEntryCount = 1;
1942 if (NewEntryCount != FuncEntryCount) {
1943 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1944 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1945 << ", entry_count " << FuncEntryCount << " --> "
1946 << NewEntryCount << "\n");
1947 }
1948}
1949
1950// Compare the profile count values with BFI count values, and print out
1951// the non-matching ones.
1952static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1954 uint64_t HotCountThreshold,
1956 Function &F = Func.getFunc();
1957 BlockFrequencyInfo NBFI(F, NBPI, LI);
1958 // bool PrintFunc = false;
1959 bool HotBBOnly = PGOVerifyHotBFI;
1960 StringRef Msg;
1962
1963 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1964 for (auto &BBI : F) {
1965 uint64_t CountValue = 0;
1966 uint64_t BFICountValue = 0;
1967
1968 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1969
1970 BBNum++;
1971 if (CountValue)
1972 NonZeroBBNum++;
1973 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1974 if (BFICount)
1975 BFICountValue = *BFICount;
1976
1977 if (HotBBOnly) {
1978 bool rawIsHot = CountValue >= HotCountThreshold;
1979 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1980 bool rawIsCold = CountValue <= ColdCountThreshold;
1981 bool ShowCount = false;
1982 if (rawIsHot && !BFIIsHot) {
1983 Msg = "raw-Hot to BFI-nonHot";
1984 ShowCount = true;
1985 } else if (rawIsCold && BFIIsHot) {
1986 Msg = "raw-Cold to BFI-Hot";
1987 ShowCount = true;
1988 }
1989 if (!ShowCount)
1990 continue;
1991 } else {
1992 if ((CountValue < PGOVerifyBFICutoff) &&
1993 (BFICountValue < PGOVerifyBFICutoff))
1994 continue;
1995 uint64_t Diff = (BFICountValue >= CountValue)
1996 ? BFICountValue - CountValue
1997 : CountValue - BFICountValue;
1998 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
1999 continue;
2000 }
2001 BBMisMatchNum++;
2002
2003 ORE.emit([&]() {
2005 F.getSubprogram(), &BBI);
2006 Remark << "BB " << ore::NV("Block", BBI.getName())
2007 << " Count=" << ore::NV("Count", CountValue)
2008 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2009 if (!Msg.empty())
2010 Remark << " (" << Msg << ")";
2011 return Remark;
2012 });
2013 }
2014 if (BBMisMatchNum)
2015 ORE.emit([&]() {
2016 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2017 F.getSubprogram(), &F.getEntryBlock())
2018 << "In Func " << ore::NV("Function", F.getName())
2019 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2020 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2021 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2022 });
2023}
2024
2026 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2027 vfs::FileSystem &FS,
2028 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2031 ProfileSummaryInfo *PSI, bool IsCS) {
2032 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2033 auto &Ctx = M.getContext();
2034 // Read the counter array from file.
2035 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2036 ProfileRemappingFileName);
2037 if (Error E = ReaderOrErr.takeError()) {
2038 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2039 Ctx.diagnose(
2040 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2041 });
2042 return false;
2043 }
2044
2045 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2046 std::move(ReaderOrErr.get());
2047 if (!PGOReader) {
2048 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2049 StringRef("Cannot get PGOReader")));
2050 return false;
2051 }
2052 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2053 return false;
2054
2055 // TODO: might need to change the warning once the clang option is finalized.
2056 if (!PGOReader->isIRLevelProfile()) {
2057 Ctx.diagnose(DiagnosticInfoPGOProfile(
2058 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2059 return false;
2060 }
2061 if (PGOReader->functionEntryOnly()) {
2062 Ctx.diagnose(DiagnosticInfoPGOProfile(
2063 ProfileFileName.data(),
2064 "Function entry profiles are not yet supported for optimization"));
2065 return false;
2066 }
2067
2069 for (GlobalVariable &G : M.globals()) {
2070 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2071 continue;
2072
2073 // Create the PGOFuncName meta data.
2074 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2075 }
2076 }
2077
2078 // Add the profile summary (read from the header of the indexed summary) here
2079 // so that we can use it below when reading counters (which checks if the
2080 // function should be marked with a cold or inlinehint attribute).
2081 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2084 PSI->refresh();
2085
2086 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2087 collectComdatMembers(M, ComdatMembers);
2088 std::vector<Function *> HotFunctions;
2089 std::vector<Function *> ColdFunctions;
2090
2091 // If the profile marked as always instrument the entry BB, do the
2092 // same. Note this can be overwritten by the internal option in CFGMST.h
2093 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2094 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2095 InstrumentFuncEntry = PGOInstrumentEntry;
2096 InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2097
2098 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2099 for (auto &F : M) {
2100 if (skipPGOUse(F))
2101 continue;
2102 auto &TLI = LookupTLI(F);
2103 auto *BPI = LookupBPI(F);
2104 auto *BFI = LookupBFI(F);
2105 if (!HasSingleByteCoverage) {
2106 // Split indirectbr critical edges here before computing the MST rather
2107 // than later in getInstrBB() to avoid invalidating it.
2108 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2109 BFI);
2110 }
2111 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2112 InstrumentFuncEntry, HasSingleByteCoverage);
2113 if (HasSingleByteCoverage) {
2114 Func.populateCoverage(PGOReader.get());
2115 continue;
2116 }
2117 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2118 // it means the profile for the function is unrepresentative and this
2119 // function is actually hot / warm. We will reset the function hot / cold
2120 // attribute and drop all the profile counters.
2122 bool AllZeros = false;
2123 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2124 continue;
2125 if (AllZeros) {
2126 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2127 if (Func.getProgramMaxCount() != 0)
2128 ColdFunctions.push_back(&F);
2129 continue;
2130 }
2131 if (PseudoKind != InstrProfRecord::NotPseudo) {
2132 // Clear function attribute cold.
2133 if (F.hasFnAttribute(Attribute::Cold))
2134 F.removeFnAttr(Attribute::Cold);
2135 // Set function attribute as hot.
2136 if (PseudoKind == InstrProfRecord::PseudoHot)
2137 F.addFnAttr(Attribute::Hot);
2138 continue;
2139 }
2140 Func.populateCounters();
2141 Func.setBranchWeights();
2142 Func.annotateValueSites();
2143 Func.annotateIrrLoopHeaderWeights();
2144 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2145 if (FreqAttr == PGOUseFunc::FFA_Cold)
2146 ColdFunctions.push_back(&F);
2147 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2148 HotFunctions.push_back(&F);
2149 if (PGOViewCounts != PGOVCT_None &&
2150 (ViewBlockFreqFuncName.empty() ||
2151 F.getName() == ViewBlockFreqFuncName)) {
2153 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2154 std::make_unique<BranchProbabilityInfo>(F, LI);
2155 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2156 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2158 NewBFI->view();
2159 else if (PGOViewCounts == PGOVCT_Text) {
2160 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2161 NewBFI->print(dbgs());
2162 }
2163 }
2165 (ViewBlockFreqFuncName.empty() ||
2166 F.getName() == ViewBlockFreqFuncName)) {
2168 if (ViewBlockFreqFuncName.empty())
2169 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2170 else
2171 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2172 else if (PGOViewRawCounts == PGOVCT_Text) {
2173 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2174 Func.dumpInfo();
2175 }
2176 }
2177
2180 BranchProbabilityInfo NBPI(F, LI);
2181
2182 // Fix func entry count.
2183 if (PGOFixEntryCount)
2184 fixFuncEntryCount(Func, LI, NBPI);
2185
2186 // Verify BlockFrequency information.
2187 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2188 if (PGOVerifyHotBFI) {
2189 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2191 }
2192 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2193 }
2194 }
2195
2196 // Set function hotness attribute from the profile.
2197 // We have to apply these attributes at the end because their presence
2198 // can affect the BranchProbabilityInfo of any callers, resulting in an
2199 // inconsistent MST between prof-gen and prof-use.
2200 for (auto &F : HotFunctions) {
2201 F->addFnAttr(Attribute::InlineHint);
2202 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2203 << "\n");
2204 }
2205 for (auto &F : ColdFunctions) {
2206 // Only set when there is no Attribute::Hot set by the user. For Hot
2207 // attribute, user's annotation has the precedence over the profile.
2208 if (F->hasFnAttribute(Attribute::Hot)) {
2209 auto &Ctx = M.getContext();
2210 std::string Msg = std::string("Function ") + F->getName().str() +
2211 std::string(" is annotated as a hot function but"
2212 " the profile is cold");
2213 Ctx.diagnose(
2214 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2215 continue;
2216 }
2217 F->addFnAttr(Attribute::Cold);
2218 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2219 << "\n");
2220 }
2221 return true;
2222}
2223
2225 std::string Filename, std::string RemappingFilename, bool IsCS,
2227 : ProfileFileName(std::move(Filename)),
2228 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2229 FS(std::move(VFS)) {
2230 if (!PGOTestProfileFile.empty())
2231 ProfileFileName = PGOTestProfileFile;
2232 if (!PGOTestProfileRemappingFile.empty())
2233 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2234 if (!FS)
2236}
2237
2240
2241 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2242 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2244 };
2245 auto LookupBPI = [&FAM](Function &F) {
2247 };
2248 auto LookupBFI = [&FAM](Function &F) {
2250 };
2251
2252 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2253 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2254 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2255 return PreservedAnalyses::all();
2256
2257 return PreservedAnalyses::none();
2258}
2259
2260static std::string getSimpleNodeName(const BasicBlock *Node) {
2261 if (!Node->getName().empty())
2262 return Node->getName().str();
2263
2264 std::string SimpleNodeName;
2265 raw_string_ostream OS(SimpleNodeName);
2266 Node->printAsOperand(OS, false);
2267 return SimpleNodeName;
2268}
2269
2271 ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2272 assert(MaxCount > 0 && "Bad max count");
2273 uint64_t Scale = calculateCountScale(MaxCount);
2275 for (const auto &ECI : EdgeCounts)
2276 Weights.push_back(scaleBranchCount(ECI, Scale));
2277
2278 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2279 : Weights) {
2280 dbgs() << W << " ";
2281 } dbgs() << "\n";);
2282
2283 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2284
2285 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2287 std::string BrCondStr = getBranchCondString(TI);
2288 if (BrCondStr.empty())
2289 return;
2290
2291 uint64_t WSum =
2292 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2293 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2294 uint64_t TotalCount =
2295 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2296 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2297 Scale = calculateCountScale(WSum);
2298 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2299 scaleBranchCount(WSum, Scale));
2300 std::string BranchProbStr;
2301 raw_string_ostream OS(BranchProbStr);
2302 OS << BP;
2303 OS << " (total count : " << TotalCount << ")";
2304 OS.flush();
2305 Function *F = TI->getParent()->getParent();
2307 ORE.emit([&]() {
2308 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2309 << BrCondStr << " is true with probability : " << BranchProbStr;
2310 });
2311 }
2312}
2313
2314namespace llvm {
2315
2317 MDBuilder MDB(M->getContext());
2318 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2319 MDB.createIrrLoopHeaderWeight(Count));
2320}
2321
2322template <> struct GraphTraits<PGOUseFunc *> {
2323 using NodeRef = const BasicBlock *;
2326
2327 static NodeRef getEntryNode(const PGOUseFunc *G) {
2328 return &G->getFunc().front();
2329 }
2330
2332 return succ_begin(N);
2333 }
2334
2335 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2336
2337 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2338 return nodes_iterator(G->getFunc().begin());
2339 }
2340
2341 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2342 return nodes_iterator(G->getFunc().end());
2343 }
2344};
2345
2346template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2347 explicit DOTGraphTraits(bool isSimple = false)
2349
2350 static std::string getGraphName(const PGOUseFunc *G) {
2351 return std::string(G->getFunc().getName());
2352 }
2353
2354 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2355 std::string Result;
2356 raw_string_ostream OS(Result);
2357
2358 OS << getSimpleNodeName(Node) << ":\\l";
2359 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2360 OS << "Count : ";
2361 if (BI && BI->Count)
2362 OS << *BI->Count << "\\l";
2363 else
2364 OS << "Unknown\\l";
2365
2366 if (!PGOInstrSelect)
2367 return Result;
2368
2369 for (const Instruction &I : *Node) {
2370 if (!isa<SelectInst>(&I))
2371 continue;
2372 // Display scaled counts for SELECT instruction:
2373 OS << "SELECT : { T = ";
2374 uint64_t TC, FC;
2375 bool HasProf = extractBranchWeights(I, TC, FC);
2376 if (!HasProf)
2377 OS << "Unknown, F = Unknown }\\l";
2378 else
2379 OS << TC << ", F = " << FC << " }\\l";
2380 }
2381 return Result;
2382 }
2383};
2384
2385} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
cl::opt< unsigned > MaxNumVTableAnnotations
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
bool isValueProfilingDisabled()
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
bool shouldInstrumentEntryBB()
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:988
Class for arbitrary precision integers.
Definition: APInt.h:78
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
iterator begin() const
Definition: ArrayRef.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:451
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:276
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition: CFGMST.h:306
size_t bbInfoSize() const
Definition: CFGMST.h:314
size_t numEdges() const
Definition: CFGMST.h:312
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:324
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:317
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:257
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Value * getCalledOperand() const
Definition: InstrTypes.h:1458
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:212
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Base class for error info classes.
Definition: Error.h:45
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:53
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class to represent profile counts.
Definition: Function.h:289
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:544
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:53
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2044
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:488
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:409
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:255
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1635
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:344
Metadata node.
Definition: Metadata.h:1067
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
Tuple of metadata.
Definition: Metadata.h:1470
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:204
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
void write64le(void *P, uint64_t V)
Definition: Endian.h:471
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
Definition: InstrProf.cpp:379
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1408
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Definition: InstrProf.cpp:368
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
Definition: InstrProf.cpp:1412
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
Definition: InstrProf.cpp:395
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:467
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1282
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
Definition: MemProfiler.cpp:55
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
InstrProfValueKind
Definition: InstrProf.h:267
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1464
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1487
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:277
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:823
std::vector< uint64_t > Counts
Definition: InstrProf.h:824
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:921
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:1023
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:1004