LLVM 17.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
51#include "CFGMST.h"
53#include "llvm/ADT/APInt.h"
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/STLExtras.h"
57#include "llvm/ADT/Statistic.h"
58#include "llvm/ADT/StringRef.h"
59#include "llvm/ADT/Twine.h"
60#include "llvm/ADT/iterator.h"
64#include "llvm/Analysis/CFG.h"
71#include "llvm/IR/Attributes.h"
72#include "llvm/IR/BasicBlock.h"
73#include "llvm/IR/CFG.h"
74#include "llvm/IR/Comdat.h"
75#include "llvm/IR/Constant.h"
76#include "llvm/IR/Constants.h"
78#include "llvm/IR/Dominators.h"
80#include "llvm/IR/Function.h"
81#include "llvm/IR/GlobalAlias.h"
82#include "llvm/IR/GlobalValue.h"
84#include "llvm/IR/IRBuilder.h"
85#include "llvm/IR/InstVisitor.h"
86#include "llvm/IR/InstrTypes.h"
87#include "llvm/IR/Instruction.h"
90#include "llvm/IR/Intrinsics.h"
91#include "llvm/IR/LLVMContext.h"
92#include "llvm/IR/MDBuilder.h"
93#include "llvm/IR/Module.h"
94#include "llvm/IR/PassManager.h"
97#include "llvm/IR/Type.h"
98#include "llvm/IR/Value.h"
101#include "llvm/Support/BLAKE3.h"
103#include "llvm/Support/CRC.h"
104#include "llvm/Support/Casting.h"
107#include "llvm/Support/Debug.h"
108#include "llvm/Support/Error.h"
119#include <algorithm>
120#include <cassert>
121#include <cstdint>
122#include <map>
123#include <memory>
124#include <numeric>
125#include <optional>
126#include <set>
127#include <string>
128#include <unordered_map>
129#include <utility>
130#include <vector>
131
132using namespace llvm;
133using namespace llvm::memprof;
136
137#define DEBUG_TYPE "pgo-instrumentation"
138
139STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
140STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
141STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
142STATISTIC(NumOfPGOEdge, "Number of edges.");
143STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
144STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
145STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
146STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
147STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
148STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
149STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
150STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
151STATISTIC(NumOfCSPGOSelectInsts,
152 "Number of select instruction instrumented in CSPGO.");
153STATISTIC(NumOfCSPGOMemIntrinsics,
154 "Number of mem intrinsics instrumented in CSPGO.");
155STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
156STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
157STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
158STATISTIC(NumOfCSPGOFunc,
159 "Number of functions having valid profile counts in CSPGO.");
160STATISTIC(NumOfCSPGOMismatch,
161 "Number of functions having mismatch profile in CSPGO.");
162STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
163
164// Command line option to specify the file to read profile from. This is
165// mainly used for testing.
167 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
168 cl::value_desc("filename"),
169 cl::desc("Specify the path of profile data file. This is"
170 "mainly for test purpose."));
172 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
173 cl::value_desc("filename"),
174 cl::desc("Specify the path of profile remapping file. This is mainly for "
175 "test purpose."));
176
177// Command line option to disable value profiling. The default is false:
178// i.e. value profiling is enabled by default. This is for debug purpose.
179static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
181 cl::desc("Disable Value Profiling"));
182
183// Command line option to set the maximum number of VP annotations to write to
184// the metadata for a single indirect call callsite.
186 "icp-max-annotations", cl::init(3), cl::Hidden,
187 cl::desc("Max number of annotations for a single indirect "
188 "call callsite"));
189
190// Command line option to set the maximum number of value annotations
191// to write to the metadata for a single memop intrinsic.
193 "memop-max-annotations", cl::init(4), cl::Hidden,
194 cl::desc("Max number of preicise value annotations for a single memop"
195 "intrinsic"));
196
197// Command line option to control appending FunctionHash to the name of a COMDAT
198// function. This is to avoid the hash mismatch caused by the preinliner.
200 "do-comdat-renaming", cl::init(false), cl::Hidden,
201 cl::desc("Append function hash to the name of COMDAT function to avoid "
202 "function hash mismatch due to the preinliner"));
203
204// Command line option to enable/disable the warning about missing profile
205// information.
206static cl::opt<bool>
207 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
208 cl::desc("Use this option to turn on/off "
209 "warnings about missing profile data for "
210 "functions."));
211
212namespace llvm {
213// Command line option to enable/disable the warning about a hash mismatch in
214// the profile data.
216 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
217 cl::desc("Use this option to turn off/on "
218 "warnings about profile cfg mismatch."));
219} // namespace llvm
220
221// Command line option to enable/disable the warning about a hash mismatch in
222// the profile data for Comdat functions, which often turns out to be false
223// positive due to the pre-instrumentation inline.
225 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
226 cl::desc("The option is used to turn on/off "
227 "warnings about hash mismatch for comdat "
228 "or weak functions."));
229
230// Command line option to enable/disable select instruction instrumentation.
231static cl::opt<bool>
232 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
233 cl::desc("Use this option to turn on/off SELECT "
234 "instruction instrumentation. "));
235
236// Command line option to turn on CFG dot or text dump of raw profile counts
238 "pgo-view-raw-counts", cl::Hidden,
239 cl::desc("A boolean option to show CFG dag or text "
240 "with raw profile counts from "
241 "profile data. See also option "
242 "-pgo-view-counts. To limit graph "
243 "display to only one function, use "
244 "filtering option -view-bfi-func-name."),
245 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
246 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
247 clEnumValN(PGOVCT_Text, "text", "show in text.")));
248
249// Command line option to enable/disable memop intrinsic call.size profiling.
250static cl::opt<bool>
251 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
252 cl::desc("Use this option to turn on/off "
253 "memory intrinsic size profiling."));
254
255// Emit branch probability as optimization remarks.
256static cl::opt<bool>
257 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
258 cl::desc("When this option is on, the annotated "
259 "branch probability will be emitted as "
260 "optimization remarks: -{Rpass|"
261 "pass-remarks}=pgo-instrumentation"));
262
264 "pgo-instrument-entry", cl::init(false), cl::Hidden,
265 cl::desc("Force to instrument function entry basicblock."));
266
268 "pgo-function-entry-coverage", cl::Hidden,
269 cl::desc(
270 "Use this option to enable function entry coverage instrumentation."));
271
272static cl::opt<bool>
273 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
274 cl::desc("Fix function entry count in profile use."));
275
277 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
278 cl::desc("Print out the non-match BFI count if a hot raw profile count "
279 "becomes non-hot, or a cold raw profile count becomes hot. "
280 "The print is enabled under -Rpass-analysis=pgo, or "
281 "internal option -pass-remakrs-analysis=pgo."));
282
284 "pgo-verify-bfi", cl::init(false), cl::Hidden,
285 cl::desc("Print out mismatched BFI counts after setting profile metadata "
286 "The print is enabled under -Rpass-analysis=pgo, or "
287 "internal option -pass-remakrs-analysis=pgo."));
288
290 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
291 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
292 "mismatched BFI if the difference percentage is greater than "
293 "this value (in percentage)."));
294
296 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
297 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
298 "profile count value is below."));
299
301 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
302 cl::value_desc("function name"),
303 cl::desc("Trace the hash of the function with this name."));
304
306 "pgo-function-size-threshold", cl::Hidden,
307 cl::desc("Do not instrument functions smaller than this threshold."));
308
310 "pgo-match-memprof", cl::init(true), cl::Hidden,
311 cl::desc("Perform matching and annotation of memprof profiles."));
312
314 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
315 cl::desc("Do not instrument functions with the number of critical edges "
316 " greater than this threshold."));
317
318namespace llvm {
319// Command line option to turn on CFG dot dump after profile annotation.
320// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
322
323// Command line option to specify the name of the function for CFG dump
324// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
326
328} // namespace llvm
329
330static cl::opt<bool>
331 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
332 cl::desc("Use the old CFG function hashing"));
333
334// Return a string describing the branch condition that can be
335// used in static branch probability heuristics:
336static std::string getBranchCondString(Instruction *TI) {
337 BranchInst *BI = dyn_cast<BranchInst>(TI);
338 if (!BI || !BI->isConditional())
339 return std::string();
340
341 Value *Cond = BI->getCondition();
342 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
343 if (!CI)
344 return std::string();
345
346 std::string result;
347 raw_string_ostream OS(result);
348 OS << CI->getPredicate() << "_";
349 CI->getOperand(0)->getType()->print(OS, true);
350
351 Value *RHS = CI->getOperand(1);
352 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
353 if (CV) {
354 if (CV->isZero())
355 OS << "_Zero";
356 else if (CV->isOne())
357 OS << "_One";
358 else if (CV->isMinusOne())
359 OS << "_MinusOne";
360 else
361 OS << "_Const";
362 }
363 OS.flush();
364 return result;
365}
366
367static const char *ValueProfKindDescr[] = {
368#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
370};
371
372// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
373// aware this is an ir_level profile so it can set the version flag.
375 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
376 Type *IntTy64 = Type::getInt64Ty(M.getContext());
377 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
378 if (IsCS)
379 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
381 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
383 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
385 ProfileVersion |=
386 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
387 auto IRLevelVersionVariable = new GlobalVariable(
388 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
389 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
390 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
391 Triple TT(M.getTargetTriple());
392 if (TT.supportsCOMDAT()) {
393 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
394 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
395 }
396 return IRLevelVersionVariable;
397}
398
399namespace {
400
401/// The select instruction visitor plays three roles specified
402/// by the mode. In \c VM_counting mode, it simply counts the number of
403/// select instructions. In \c VM_instrument mode, it inserts code to count
404/// the number times TrueValue of select is taken. In \c VM_annotate mode,
405/// it reads the profile data and annotate the select instruction with metadata.
406enum VisitMode { VM_counting, VM_instrument, VM_annotate };
407class PGOUseFunc;
408
409/// Instruction Visitor class to visit select instructions.
410struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
411 Function &F;
412 unsigned NSIs = 0; // Number of select instructions instrumented.
413 VisitMode Mode = VM_counting; // Visiting mode.
414 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
415 unsigned TotalNumCtrs = 0; // Total number of counters
416 GlobalVariable *FuncNameVar = nullptr;
417 uint64_t FuncHash = 0;
418 PGOUseFunc *UseFunc = nullptr;
419
420 SelectInstVisitor(Function &Func) : F(Func) {}
421
422 void countSelects(Function &Func) {
423 NSIs = 0;
424 Mode = VM_counting;
425 visit(Func);
426 }
427
428 // Visit the IR stream and instrument all select instructions. \p
429 // Ind is a pointer to the counter index variable; \p TotalNC
430 // is the total number of counters; \p FNV is the pointer to the
431 // PGO function name var; \p FHash is the function hash.
432 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
433 GlobalVariable *FNV, uint64_t FHash) {
434 Mode = VM_instrument;
435 CurCtrIdx = Ind;
436 TotalNumCtrs = TotalNC;
437 FuncHash = FHash;
438 FuncNameVar = FNV;
439 visit(Func);
440 }
441
442 // Visit the IR stream and annotate all select instructions.
443 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
444 Mode = VM_annotate;
445 UseFunc = UF;
446 CurCtrIdx = Ind;
447 visit(Func);
448 }
449
450 void instrumentOneSelectInst(SelectInst &SI);
451 void annotateOneSelectInst(SelectInst &SI);
452
453 // Visit \p SI instruction and perform tasks according to visit mode.
454 void visitSelectInst(SelectInst &SI);
455
456 // Return the number of select instructions. This needs be called after
457 // countSelects().
458 unsigned getNumOfSelectInsts() const { return NSIs; }
459};
460
461} // end anonymous namespace
462
463namespace {
464
465/// An MST based instrumentation for PGO
466///
467/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
468/// in the function level.
469struct PGOEdge {
470 // This class implements the CFG edges. Note the CFG can be a multi-graph.
471 // So there might be multiple edges with same SrcBB and DestBB.
472 const BasicBlock *SrcBB;
473 const BasicBlock *DestBB;
474 uint64_t Weight;
475 bool InMST = false;
476 bool Removed = false;
477 bool IsCritical = false;
478
479 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
480 : SrcBB(Src), DestBB(Dest), Weight(W) {}
481
482 // Return the information string of an edge.
483 std::string infoString() const {
484 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
485 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
486 }
487};
488
489// This class stores the auxiliary information for each BB.
490struct BBInfo {
491 BBInfo *Group;
493 uint32_t Rank = 0;
494
495 BBInfo(unsigned IX) : Group(this), Index(IX) {}
496
497 // Return the information string of this object.
498 std::string infoString() const {
499 return (Twine("Index=") + Twine(Index)).str();
500 }
501
502 // Empty function -- only applicable to UseBBInfo.
503 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
504
505 // Empty function -- only applicable to UseBBInfo.
506 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
507};
508
509// This class implements the CFG edges. Note the CFG can be a multi-graph.
510template <class Edge, class BBInfo> class FuncPGOInstrumentation {
511private:
512 Function &F;
513
514 // Is this is context-sensitive instrumentation.
515 bool IsCS;
516
517 // A map that stores the Comdat group in function F.
518 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
519
521
522 void computeCFGHash();
523 void renameComdatFunction();
524
525public:
526 const TargetLibraryInfo &TLI;
527 std::vector<std::vector<VPCandidateInfo>> ValueSites;
528 SelectInstVisitor SIVisitor;
529 std::string FuncName;
530 GlobalVariable *FuncNameVar;
531
532 // CFG hash value for this function.
533 uint64_t FunctionHash = 0;
534
535 // The Minimum Spanning Tree of function CFG.
537
538 // Collect all the BBs that will be instrumented, and store them in
539 // InstrumentBBs.
540 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
541
542 // Give an edge, find the BB that will be instrumented.
543 // Return nullptr if there is no BB to be instrumented.
544 BasicBlock *getInstrBB(Edge *E);
545
546 // Return the auxiliary BB information.
547 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
548
549 // Return the auxiliary BB information if available.
550 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
551
552 // Dump edges and BB information.
553 void dumpInfo(std::string Str = "") const {
554 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
555 Twine(FunctionHash) + "\t" + Str);
556 }
557
558 FuncPGOInstrumentation(
559 Function &Func, TargetLibraryInfo &TLI,
560 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
561 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
562 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
563 bool InstrumentFuncEntry = true)
564 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
565 TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func),
566 MST(F, InstrumentFuncEntry, BPI, BFI) {
567 // This should be done before CFG hash computation.
568 SIVisitor.countSelects(Func);
569 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
570 if (!IsCS) {
571 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
572 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
573 NumOfPGOBB += MST.BBInfos.size();
574 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
575 } else {
576 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
577 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
578 NumOfCSPGOBB += MST.BBInfos.size();
579 }
580
581 FuncName = getPGOFuncName(F);
582 computeCFGHash();
583 if (!ComdatMembers.empty())
584 renameComdatFunction();
585 LLVM_DEBUG(dumpInfo("after CFGMST"));
586
587 for (auto &E : MST.AllEdges) {
588 if (E->Removed)
589 continue;
590 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
591 if (!E->InMST)
592 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
593 }
594
595 if (CreateGlobalVar)
596 FuncNameVar = createPGOFuncNameVar(F, FuncName);
597 }
598};
599
600} // end anonymous namespace
601
602// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
603// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
604// of selects, indirect calls, mem ops and edges.
605template <class Edge, class BBInfo>
606void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
607 std::vector<uint8_t> Indexes;
608 JamCRC JC;
609 for (auto &BB : F) {
610 const Instruction *TI = BB.getTerminator();
611 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
612 BasicBlock *Succ = TI->getSuccessor(I);
613 auto BI = findBBInfo(Succ);
614 if (BI == nullptr)
615 continue;
616 uint32_t Index = BI->Index;
617 for (int J = 0; J < 4; J++)
618 Indexes.push_back((uint8_t)(Index >> (J * 8)));
619 }
620 }
621 JC.update(Indexes);
622
623 JamCRC JCH;
624 if (PGOOldCFGHashing) {
625 // Hash format for context sensitive profile. Reserve 4 bits for other
626 // information.
627 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
628 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
629 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
630 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
631 } else {
632 // The higher 32 bits.
633 auto updateJCH = [&JCH](uint64_t Num) {
634 uint8_t Data[8];
636 JCH.update(Data);
637 };
638 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
639 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
640 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
641 updateJCH((uint64_t)MST.AllEdges.size());
642
643 // Hash format for context sensitive profile. Reserve 4 bits for other
644 // information.
645 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
646 }
647
648 // Reserve bit 60-63 for other information purpose.
649 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
650 if (IsCS)
652 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
653 << " CRC = " << JC.getCRC()
654 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
655 << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
656 << ValueSites[IPVK_IndirectCallTarget].size());
657 if (!PGOOldCFGHashing) {
658 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
659 << ", High32 CRC = " << JCH.getCRC());
660 }
661 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
662
663 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
664 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
665 << " in building " << F.getParent()->getSourceFileName() << "\n";
666}
667
668// Check if we can safely rename this Comdat function.
669static bool canRenameComdat(
670 Function &F,
671 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
672 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
673 return false;
674
675 // FIXME: Current only handle those Comdat groups that only containing one
676 // function.
677 // (1) For a Comdat group containing multiple functions, we need to have a
678 // unique postfix based on the hashes for each function. There is a
679 // non-trivial code refactoring to do this efficiently.
680 // (2) Variables can not be renamed, so we can not rename Comdat function in a
681 // group including global vars.
682 Comdat *C = F.getComdat();
683 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
684 assert(!isa<GlobalAlias>(CM.second));
685 Function *FM = dyn_cast<Function>(CM.second);
686 if (FM != &F)
687 return false;
688 }
689 return true;
690}
691
692// Append the CFGHash to the Comdat function name.
693template <class Edge, class BBInfo>
694void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
695 if (!canRenameComdat(F, ComdatMembers))
696 return;
697 std::string OrigName = F.getName().str();
698 std::string NewFuncName =
699 Twine(F.getName() + "." + Twine(FunctionHash)).str();
700 F.setName(Twine(NewFuncName));
702 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
703 Comdat *NewComdat;
704 Module *M = F.getParent();
705 // For AvailableExternallyLinkage functions, change the linkage to
706 // LinkOnceODR and put them into comdat. This is because after renaming, there
707 // is no backup external copy available for the function.
708 if (!F.hasComdat()) {
710 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
712 F.setComdat(NewComdat);
713 return;
714 }
715
716 // This function belongs to a single function Comdat group.
717 Comdat *OrigComdat = F.getComdat();
718 std::string NewComdatName =
719 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
720 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
721 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
722
723 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
724 // Must be a function.
725 cast<Function>(CM.second)->setComdat(NewComdat);
726 }
727}
728
729// Collect all the BBs that will be instruments and return them in
730// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
731template <class Edge, class BBInfo>
732void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
733 std::vector<BasicBlock *> &InstrumentBBs) {
734 // Use a worklist as we will update the vector during the iteration.
735 std::vector<Edge *> EdgeList;
736 EdgeList.reserve(MST.AllEdges.size());
737 for (auto &E : MST.AllEdges)
738 EdgeList.push_back(E.get());
739
740 for (auto &E : EdgeList) {
741 BasicBlock *InstrBB = getInstrBB(E);
742 if (InstrBB)
743 InstrumentBBs.push_back(InstrBB);
744 }
745
746 // Set up InEdges/OutEdges for all BBs.
747 for (auto &E : MST.AllEdges) {
748 if (E->Removed)
749 continue;
750 const BasicBlock *SrcBB = E->SrcBB;
751 const BasicBlock *DestBB = E->DestBB;
752 BBInfo &SrcInfo = getBBInfo(SrcBB);
753 BBInfo &DestInfo = getBBInfo(DestBB);
754 SrcInfo.addOutEdge(E.get());
755 DestInfo.addInEdge(E.get());
756 }
757}
758
759// Given a CFG E to be instrumented, find which BB to place the instrumented
760// code. The function will split the critical edge if necessary.
761template <class Edge, class BBInfo>
762BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
763 if (E->InMST || E->Removed)
764 return nullptr;
765
766 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
767 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
768 // For a fake edge, instrument the real BB.
769 if (SrcBB == nullptr)
770 return DestBB;
771 if (DestBB == nullptr)
772 return SrcBB;
773
774 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
775 // There are basic blocks (such as catchswitch) cannot be instrumented.
776 // If the returned first insertion point is the end of BB, skip this BB.
777 if (BB->getFirstInsertionPt() == BB->end())
778 return nullptr;
779 return BB;
780 };
781
782 // Instrument the SrcBB if it has a single successor,
783 // otherwise, the DestBB if this is not a critical edge.
784 Instruction *TI = SrcBB->getTerminator();
785 if (TI->getNumSuccessors() <= 1)
786 return canInstrument(SrcBB);
787 if (!E->IsCritical)
788 return canInstrument(DestBB);
789
790 // Some IndirectBr critical edges cannot be split by the previous
791 // SplitIndirectBrCriticalEdges call. Bail out.
792 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
793 BasicBlock *InstrBB =
794 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
795 if (!InstrBB) {
797 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
798 return nullptr;
799 }
800 // For a critical edge, we have to split. Instrument the newly
801 // created BB.
802 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
803 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
804 << " --> " << getBBInfo(DestBB).Index << "\n");
805 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
806 MST.addEdge(SrcBB, InstrBB, 0);
807 // Second one: Add new edge of InstrBB->DestBB.
808 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
809 NewEdge1.InMST = true;
810 E->Removed = true;
811
812 return canInstrument(InstrBB);
813}
814
815// When generating value profiling calls on Windows routines that make use of
816// handler funclets for exception processing an operand bundle needs to attached
817// to the called function. This routine will set \p OpBundles to contain the
818// funclet information, if any is needed, that should be placed on the generated
819// value profiling call for the value profile candidate call.
820static void
824 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
825 if (!OrigCall)
826 return;
827
828 if (!isa<IntrinsicInst>(OrigCall)) {
829 // The instrumentation call should belong to the same funclet as a
830 // non-intrinsic call, so just copy the operand bundle, if any exists.
831 std::optional<OperandBundleUse> ParentFunclet =
832 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
833 if (ParentFunclet)
834 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
835 } else {
836 // Intrinsics or other instructions do not get funclet information from the
837 // front-end. Need to use the BlockColors that was computed by the routine
838 // colorEHFunclets to determine whether a funclet is needed.
839 if (!BlockColors.empty()) {
840 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
841 assert(CV.size() == 1 && "non-unique color for block!");
842 Instruction *EHPad = CV.front()->getFirstNonPHI();
843 if (EHPad->isEHPad())
844 OpBundles.emplace_back("funclet", EHPad);
845 }
846 }
847}
848
849// Visit all edge and instrument the edges not in MST, and do value profiling.
850// Critical edges will be split.
854 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
855 bool IsCS) {
856 // Split indirectbr critical edges here before computing the MST rather than
857 // later in getInstrBB() to avoid invalidating it.
858 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
859
860 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
861 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
862
863 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
864 auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
865 auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
866 FuncInfo.FunctionHash);
868 auto &EntryBB = F.getEntryBlock();
869 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
870 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
871 // i32 <index>)
872 Builder.CreateCall(
873 Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
874 {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
875 return;
876 }
877
878 std::vector<BasicBlock *> InstrumentBBs;
879 FuncInfo.getInstrumentBBs(InstrumentBBs);
880 unsigned NumCounters =
881 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
882
883 uint32_t I = 0;
884 for (auto *InstrBB : InstrumentBBs) {
885 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
886 assert(Builder.GetInsertPoint() != InstrBB->end() &&
887 "Cannot get the Instrumentation point");
888 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
889 // i32 <index>)
890 Builder.CreateCall(
891 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
892 {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
893 }
894
895 // Now instrument select instructions:
896 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
897 FuncInfo.FunctionHash);
898 assert(I == NumCounters);
899
901 return;
902
903 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
904
905 // Intrinsic function calls do not have funclet operand bundles needed for
906 // Windows exception handling attached to them. However, if value profiling is
907 // inserted for one of these calls, then a funclet value will need to be set
908 // on the instrumentation call based on the funclet coloring.
910 if (F.hasPersonalityFn() &&
912 BlockColors = colorEHFunclets(F);
913
914 // For each VP Kind, walk the VP candidates and instrument each one.
915 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
916 unsigned SiteIndex = 0;
917 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
918 continue;
919
920 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
921 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
922 << " site: CallSite Index = " << SiteIndex << "\n");
923
924 IRBuilder<> Builder(Cand.InsertPt);
925 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
926 "Cannot get the Instrumentation point");
927
928 Value *ToProfile = nullptr;
929 if (Cand.V->getType()->isIntegerTy())
930 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
931 else if (Cand.V->getType()->isPointerTy())
932 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
933 assert(ToProfile && "value profiling Value is of unexpected type");
934
936 populateEHOperandBundle(Cand, BlockColors, OpBundles);
937 Builder.CreateCall(
938 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
939 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
940 Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
941 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
942 OpBundles);
943 }
944 } // IPVK_First <= Kind <= IPVK_Last
945}
946
947namespace {
948
949// This class represents a CFG edge in profile use compilation.
950struct PGOUseEdge : public PGOEdge {
951 bool CountValid = false;
952 uint64_t CountValue = 0;
953
954 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
955 : PGOEdge(Src, Dest, W) {}
956
957 // Set edge count value
958 void setEdgeCount(uint64_t Value) {
959 CountValue = Value;
960 CountValid = true;
961 }
962
963 // Return the information string for this object.
964 std::string infoString() const {
965 if (!CountValid)
966 return PGOEdge::infoString();
967 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
968 .str();
969 }
970};
971
972using DirectEdges = SmallVector<PGOUseEdge *, 2>;
973
974// This class stores the auxiliary information for each BB.
975struct UseBBInfo : public BBInfo {
976 uint64_t CountValue = 0;
977 bool CountValid;
978 int32_t UnknownCountInEdge = 0;
979 int32_t UnknownCountOutEdge = 0;
980 DirectEdges InEdges;
981 DirectEdges OutEdges;
982
983 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
984
985 UseBBInfo(unsigned IX, uint64_t C)
986 : BBInfo(IX), CountValue(C), CountValid(true) {}
987
988 // Set the profile count value for this BB.
989 void setBBInfoCount(uint64_t Value) {
990 CountValue = Value;
991 CountValid = true;
992 }
993
994 // Return the information string of this object.
995 std::string infoString() const {
996 if (!CountValid)
997 return BBInfo::infoString();
998 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
999 }
1000
1001 // Add an OutEdge and update the edge count.
1002 void addOutEdge(PGOUseEdge *E) {
1003 OutEdges.push_back(E);
1004 UnknownCountOutEdge++;
1005 }
1006
1007 // Add an InEdge and update the edge count.
1008 void addInEdge(PGOUseEdge *E) {
1009 InEdges.push_back(E);
1010 UnknownCountInEdge++;
1011 }
1012};
1013
1014} // end anonymous namespace
1015
1016// Sum up the count values for all the edges.
1018 uint64_t Total = 0;
1019 for (const auto &E : Edges) {
1020 if (E->Removed)
1021 continue;
1022 Total += E->CountValue;
1023 }
1024 return Total;
1025}
1026
1027namespace {
1028
1029class PGOUseFunc {
1030public:
1031 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1032 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1034 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
1035 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1036 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1037 InstrumentFuncEntry),
1038 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1039
1040 // Read counts for the instrumented BB from profile.
1041 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1043
1044 // Read memprof data for the instrumented function from profile.
1045 bool readMemprof(IndexedInstrProfReader *PGOReader);
1046
1047 // Populate the counts for all BBs.
1048 void populateCounters();
1049
1050 // Set the branch weights based on the count values.
1051 void setBranchWeights();
1052
1053 // Annotate the value profile call sites for all value kind.
1054 void annotateValueSites();
1055
1056 // Annotate the value profile call sites for one value kind.
1057 void annotateValueSites(uint32_t Kind);
1058
1059 // Annotate the irreducible loop header weights.
1060 void annotateIrrLoopHeaderWeights();
1061
1062 // The hotness of the function from the profile count.
1063 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1064
1065 // Return the function hotness from the profile.
1066 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1067
1068 // Return the function hash.
1069 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1070
1071 // Return the profile record for this function;
1072 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1073
1074 // Return the auxiliary BB information.
1075 UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1076 return FuncInfo.getBBInfo(BB);
1077 }
1078
1079 // Return the auxiliary BB information if available.
1080 UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1081 return FuncInfo.findBBInfo(BB);
1082 }
1083
1084 Function &getFunc() const { return F; }
1085
1086 void dumpInfo(std::string Str = "") const {
1087 FuncInfo.dumpInfo(Str);
1088 }
1089
1090 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1091private:
1092 Function &F;
1093 Module *M;
1095 ProfileSummaryInfo *PSI;
1096
1097 // This member stores the shared information with class PGOGenFunc.
1098 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1099
1100 // The maximum count value in the profile. This is only used in PGO use
1101 // compilation.
1102 uint64_t ProgramMaxCount;
1103
1104 // Position of counter that remains to be read.
1105 uint32_t CountPosition = 0;
1106
1107 // Total size of the profile count for this function.
1108 uint32_t ProfileCountSize = 0;
1109
1110 // ProfileRecord for this function.
1111 InstrProfRecord ProfileRecord;
1112
1113 // Function hotness info derived from profile.
1114 FuncFreqAttr FreqAttr;
1115
1116 // Is to use the context sensitive profile.
1117 bool IsCS;
1118
1119 // Find the Instrumented BB and set the value. Return false on error.
1120 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1121
1122 // Set the edge counter value for the unknown edge -- there should be only
1123 // one unknown edge.
1124 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1125
1126 // Return FuncName string;
1127 std::string getFuncName() const { return FuncInfo.FuncName; }
1128
1129 // Set the hot/cold inline hints based on the count values.
1130 // FIXME: This function should be removed once the functionality in
1131 // the inliner is implemented.
1132 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1133 if (PSI->isHotCount(EntryCount))
1134 FreqAttr = FFA_Hot;
1135 else if (PSI->isColdCount(MaxCount))
1136 FreqAttr = FFA_Cold;
1137 }
1138};
1139
1140} // end anonymous namespace
1141
1142// Visit all the edges and assign the count value for the instrumented
1143// edges and the BB. Return false on error.
1144bool PGOUseFunc::setInstrumentedCounts(
1145 const std::vector<uint64_t> &CountFromProfile) {
1146
1147 std::vector<BasicBlock *> InstrumentBBs;
1148 FuncInfo.getInstrumentBBs(InstrumentBBs);
1149 unsigned NumCounters =
1150 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1151 // The number of counters here should match the number of counters
1152 // in profile. Return if they mismatch.
1153 if (NumCounters != CountFromProfile.size()) {
1154 return false;
1155 }
1156 auto *FuncEntry = &*F.begin();
1157
1158 // Set the profile count to the Instrumented BBs.
1159 uint32_t I = 0;
1160 for (BasicBlock *InstrBB : InstrumentBBs) {
1161 uint64_t CountValue = CountFromProfile[I++];
1162 UseBBInfo &Info = getBBInfo(InstrBB);
1163 // If we reach here, we know that we have some nonzero count
1164 // values in this function. The entry count should not be 0.
1165 // Fix it if necessary.
1166 if (InstrBB == FuncEntry && CountValue == 0)
1167 CountValue = 1;
1168 Info.setBBInfoCount(CountValue);
1169 }
1170 ProfileCountSize = CountFromProfile.size();
1171 CountPosition = I;
1172
1173 // Set the edge count and update the count of unknown edges for BBs.
1174 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1175 E->setEdgeCount(Value);
1176 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1177 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1178 };
1179
1180 // Set the profile count the Instrumented edges. There are BBs that not in
1181 // MST but not instrumented. Need to set the edge count value so that we can
1182 // populate the profile counts later.
1183 for (auto &E : FuncInfo.MST.AllEdges) {
1184 if (E->Removed || E->InMST)
1185 continue;
1186 const BasicBlock *SrcBB = E->SrcBB;
1187 UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1188
1189 // If only one out-edge, the edge profile count should be the same as BB
1190 // profile count.
1191 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1192 setEdgeCount(E.get(), SrcInfo.CountValue);
1193 else {
1194 const BasicBlock *DestBB = E->DestBB;
1195 UseBBInfo &DestInfo = getBBInfo(DestBB);
1196 // If only one in-edge, the edge profile count should be the same as BB
1197 // profile count.
1198 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1199 setEdgeCount(E.get(), DestInfo.CountValue);
1200 }
1201 if (E->CountValid)
1202 continue;
1203 // E's count should have been set from profile. If not, this meenas E skips
1204 // the instrumentation. We set the count to 0.
1205 setEdgeCount(E.get(), 0);
1206 }
1207 return true;
1208}
1209
1210// Set the count value for the unknown edge. There should be one and only one
1211// unknown edge in Edges vector.
1212void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1213 for (auto &E : Edges) {
1214 if (E->CountValid)
1215 continue;
1216 E->setEdgeCount(Value);
1217
1218 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1219 getBBInfo(E->DestBB).UnknownCountInEdge--;
1220 return;
1221 }
1222 llvm_unreachable("Cannot find the unknown count edge");
1223}
1224
1225// Emit function metadata indicating PGO profile mismatch.
1227 LLVMContext &ctx) {
1228 const char MetadataName[] = "instr_prof_hash_mismatch";
1230 // If this metadata already exists, ignore.
1231 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1232 if (Existing) {
1233 MDTuple *Tuple = cast<MDTuple>(Existing);
1234 for (const auto &N : Tuple->operands()) {
1235 if (cast<MDString>(N.get())->getString() == MetadataName)
1236 return;
1237 Names.push_back(N.get());
1238 }
1239 }
1240
1241 MDBuilder MDB(ctx);
1242 Names.push_back(MDB.createString(MetadataName));
1243 MDNode *MD = MDTuple::get(ctx, Names);
1244 F.setMetadata(LLVMContext::MD_annotation, MD);
1245}
1246
1248 std::vector<uint64_t> &InlinedCallStack,
1249 LLVMContext &Ctx) {
1250 I.setMetadata(LLVMContext::MD_callsite,
1251 buildCallstackMetadata(InlinedCallStack, Ctx));
1252}
1253
1255 uint32_t Column) {
1256 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
1258 HashBuilder.add(Function, LineOffset, Column);
1260 uint64_t Id;
1261 std::memcpy(&Id, Hash.data(), sizeof(Hash));
1262 return Id;
1263}
1264
1267}
1268
1269static void addCallStack(CallStackTrie &AllocTrie,
1270 const AllocationInfo *AllocInfo) {
1271 SmallVector<uint64_t> StackIds;
1272 for (auto StackFrame : AllocInfo->CallStack)
1273 StackIds.push_back(computeStackId(StackFrame));
1274 auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(),
1275 AllocInfo->Info.getMinSize(),
1276 AllocInfo->Info.getMinLifetime());
1277 AllocTrie.addCallStack(AllocType, StackIds);
1278}
1279
1280// Helper to compare the InlinedCallStack computed from an instruction's debug
1281// info to a list of Frames from profile data (either the allocation data or a
1282// callsite). For callsites, the StartIndex to use in the Frame array may be
1283// non-zero.
1284static bool
1286 ArrayRef<uint64_t> InlinedCallStack,
1287 unsigned StartIndex = 0) {
1288 auto StackFrame = ProfileCallStack.begin() + StartIndex;
1289 auto InlCallStackIter = InlinedCallStack.begin();
1290 for (; StackFrame != ProfileCallStack.end() &&
1291 InlCallStackIter != InlinedCallStack.end();
1292 ++StackFrame, ++InlCallStackIter) {
1293 uint64_t StackId = computeStackId(*StackFrame);
1294 if (StackId != *InlCallStackIter)
1295 return false;
1296 }
1297 // Return true if we found and matched all stack ids from the call
1298 // instruction.
1299 return InlCallStackIter == InlinedCallStack.end();
1300}
1301
1302bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) {
1303 if (!MatchMemProf)
1304 return true;
1305
1306 auto &Ctx = M->getContext();
1307
1308 auto FuncGUID = Function::getGUID(FuncInfo.FuncName);
1309 Expected<memprof::MemProfRecord> MemProfResult =
1310 PGOReader->getMemProfRecord(FuncGUID);
1311 if (Error E = MemProfResult.takeError()) {
1312 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1313 auto Err = IPE.get();
1314 bool SkipWarning = false;
1315 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1316 << FuncInfo.FuncName << ": ");
1317 if (Err == instrprof_error::unknown_function) {
1318 NumOfMemProfMissing++;
1319 SkipWarning = !PGOWarnMissing;
1320 LLVM_DEBUG(dbgs() << "unknown function");
1321 } else if (Err == instrprof_error::hash_mismatch) {
1322 SkipWarning =
1325 (F.hasComdat() ||
1327 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1328 }
1329
1330 if (SkipWarning)
1331 return;
1332
1333 std::string Msg =
1334 (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") +
1335 std::to_string(FuncInfo.FunctionHash))
1336 .str();
1337
1338 Ctx.diagnose(
1339 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1340 });
1341 return false;
1342 }
1343
1344 // Build maps of the location hash to all profile data with that leaf location
1345 // (allocation info and the callsites).
1346 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
1347 // For the callsites we need to record the index of the associated frame in
1348 // the frame array (see comments below where the map entries are added).
1349 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
1350 LocHashToCallSites;
1351 const auto MemProfRec = std::move(MemProfResult.get());
1352 for (auto &AI : MemProfRec.AllocSites) {
1353 // Associate the allocation info with the leaf frame. The later matching
1354 // code will match any inlined call sequences in the IR with a longer prefix
1355 // of call stack frames.
1356 uint64_t StackId = computeStackId(AI.CallStack[0]);
1357 LocHashToAllocInfo[StackId].insert(&AI);
1358 }
1359 for (auto &CS : MemProfRec.CallSites) {
1360 // Need to record all frames from leaf up to and including this function,
1361 // as any of these may or may not have been inlined at this point.
1362 unsigned Idx = 0;
1363 for (auto &StackFrame : CS) {
1364 uint64_t StackId = computeStackId(StackFrame);
1365 LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
1366 // Once we find this function, we can stop recording.
1367 if (StackFrame.Function == FuncGUID)
1368 break;
1369 }
1370 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
1371 }
1372
1373 auto GetOffset = [](const DILocation *DIL) {
1374 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
1375 0xffff;
1376 };
1377
1378 // Now walk the instructions, looking up the associated profile data using
1379 // dbug locations.
1380 for (auto &BB : F) {
1381 for (auto &I : BB) {
1382 if (I.isDebugOrPseudoInst())
1383 continue;
1384 // We are only interested in calls (allocation or interior call stack
1385 // context calls).
1386 auto *CI = dyn_cast<CallBase>(&I);
1387 if (!CI)
1388 continue;
1389 auto *CalledFunction = CI->getCalledFunction();
1390 if (CalledFunction && CalledFunction->isIntrinsic())
1391 continue;
1392 // List of call stack ids computed from the location hashes on debug
1393 // locations (leaf to inlined at root).
1394 std::vector<uint64_t> InlinedCallStack;
1395 // Was the leaf location found in one of the profile maps?
1396 bool LeafFound = false;
1397 // If leaf was found in a map, iterators pointing to its location in both
1398 // of the maps. It might exist in neither, one, or both (the latter case
1399 // can happen because we don't currently have discriminators to
1400 // distinguish the case when a single line/col maps to both an allocation
1401 // and another callsite).
1402 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
1403 AllocInfoIter;
1404 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
1405 unsigned>>>::iterator CallSitesIter;
1406 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
1407 DIL = DIL->getInlinedAt()) {
1408 // Use C++ linkage name if possible. Need to compile with
1409 // -fdebug-info-for-profiling to get linkage name.
1410 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
1411 if (Name.empty())
1412 Name = DIL->getScope()->getSubprogram()->getName();
1413 auto CalleeGUID = Function::getGUID(Name);
1414 auto StackId =
1415 computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
1416 // LeafFound will only be false on the first iteration, since we either
1417 // set it true or break out of the loop below.
1418 if (!LeafFound) {
1419 AllocInfoIter = LocHashToAllocInfo.find(StackId);
1420 CallSitesIter = LocHashToCallSites.find(StackId);
1421 // Check if the leaf is in one of the maps. If not, no need to look
1422 // further at this call.
1423 if (AllocInfoIter == LocHashToAllocInfo.end() &&
1424 CallSitesIter == LocHashToCallSites.end())
1425 break;
1426 LeafFound = true;
1427 }
1428 InlinedCallStack.push_back(StackId);
1429 }
1430 // If leaf not in either of the maps, skip inst.
1431 if (!LeafFound)
1432 continue;
1433
1434 // First add !memprof metadata from allocation info, if we found the
1435 // instruction's leaf location in that map, and if the rest of the
1436 // instruction's locations match the prefix Frame locations on an
1437 // allocation context with the same leaf.
1438 if (AllocInfoIter != LocHashToAllocInfo.end()) {
1439 // Only consider allocations via new, to reduce unnecessary metadata,
1440 // since those are the only allocations that will be targeted initially.
1441 if (!isNewLikeFn(CI, &FuncInfo.TLI))
1442 continue;
1443 // We may match this instruction's location list to multiple MIB
1444 // contexts. Add them to a Trie specialized for trimming the contexts to
1445 // the minimal needed to disambiguate contexts with unique behavior.
1446 CallStackTrie AllocTrie;
1447 for (auto *AllocInfo : AllocInfoIter->second) {
1448 // Check the full inlined call stack against this one.
1449 // If we found and thus matched all frames on the call, include
1450 // this MIB.
1452 InlinedCallStack))
1453 addCallStack(AllocTrie, AllocInfo);
1454 }
1455 // We might not have matched any to the full inlined call stack.
1456 // But if we did, create and attach metadata, or a function attribute if
1457 // all contexts have identical profiled behavior.
1458 if (!AllocTrie.empty()) {
1459 // MemprofMDAttached will be false if a function attribute was
1460 // attached.
1461 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
1462 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
1463 if (MemprofMDAttached) {
1464 // Add callsite metadata for the instruction's location list so that
1465 // it simpler later on to identify which part of the MIB contexts
1466 // are from this particular instruction (including during inlining,
1467 // when the callsite metdata will be updated appropriately).
1468 // FIXME: can this be changed to strip out the matching stack
1469 // context ids from the MIB contexts and not add any callsite
1470 // metadata here to save space?
1471 addCallsiteMetadata(I, InlinedCallStack, Ctx);
1472 }
1473 }
1474 continue;
1475 }
1476
1477 // Otherwise, add callsite metadata. If we reach here then we found the
1478 // instruction's leaf location in the callsites map and not the allocation
1479 // map.
1480 assert(CallSitesIter != LocHashToCallSites.end());
1481 for (auto CallStackIdx : CallSitesIter->second) {
1482 // If we found and thus matched all frames on the call, create and
1483 // attach call stack metadata.
1485 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
1486 addCallsiteMetadata(I, InlinedCallStack, Ctx);
1487 // Only need to find one with a matching call stack and add a single
1488 // callsite metadata.
1489 break;
1490 }
1491 }
1492 }
1493 }
1494
1495 return true;
1496}
1497
1498// Read the profile from ProfileFileName and assign the value to the
1499// instrumented BB and the edges. This function also updates ProgramMaxCount.
1500// Return true if the profile are successfully read, and false on errors.
1501bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1503 auto &Ctx = M->getContext();
1504 uint64_t MismatchedFuncSum = 0;
1506 FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
1507 if (Error E = Result.takeError()) {
1508 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1509 auto Err = IPE.get();
1510 bool SkipWarning = false;
1511 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1512 << FuncInfo.FuncName << ": ");
1513 if (Err == instrprof_error::unknown_function) {
1514 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1515 SkipWarning = !PGOWarnMissing;
1516 LLVM_DEBUG(dbgs() << "unknown function");
1517 } else if (Err == instrprof_error::hash_mismatch ||
1518 Err == instrprof_error::malformed) {
1519 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1520 SkipWarning =
1523 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1525 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1526 << " skip=" << SkipWarning << ")");
1527 // Emit function metadata indicating PGO profile mismatch.
1528 annotateFunctionWithHashMismatch(F, M->getContext());
1529 }
1530
1531 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1532 if (SkipWarning)
1533 return;
1534
1535 std::string Msg =
1536 IPE.message() + std::string(" ") + F.getName().str() +
1537 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1538 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1539 std::string(" count discarded");
1540
1541 Ctx.diagnose(
1542 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1543 });
1544 return false;
1545 }
1546 ProfileRecord = std::move(Result.get());
1547 PseudoKind = ProfileRecord.getCountPseudoKind();
1548 if (PseudoKind != InstrProfRecord::NotPseudo) {
1549 return true;
1550 }
1551 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1552
1553 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1554 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1555
1556 uint64_t ValueSum = 0;
1557 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1558 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1559 ValueSum += CountFromProfile[I];
1560 }
1561 AllZeros = (ValueSum == 0);
1562
1563 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1564
1565 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1566 getBBInfo(nullptr).UnknownCountInEdge = 2;
1567
1568 if (!setInstrumentedCounts(CountFromProfile)) {
1569 LLVM_DEBUG(
1570 dbgs() << "Inconsistent number of counts, skipping this function");
1571 Ctx.diagnose(DiagnosticInfoPGOProfile(
1572 M->getName().data(),
1573 Twine("Inconsistent number of counts in ") + F.getName().str()
1574 + Twine(": the profile may be stale or there is a function name collision."),
1575 DS_Warning));
1576 return false;
1577 }
1578 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1579 return true;
1580}
1581
1582// Populate the counters from instrumented BBs to all BBs.
1583// In the end of this operation, all BBs should have a valid count value.
1584void PGOUseFunc::populateCounters() {
1585 bool Changes = true;
1586 unsigned NumPasses = 0;
1587 while (Changes) {
1588 NumPasses++;
1589 Changes = false;
1590
1591 // For efficient traversal, it's better to start from the end as most
1592 // of the instrumented edges are at the end.
1593 for (auto &BB : reverse(F)) {
1594 UseBBInfo *Count = findBBInfo(&BB);
1595 if (Count == nullptr)
1596 continue;
1597 if (!Count->CountValid) {
1598 if (Count->UnknownCountOutEdge == 0) {
1599 Count->CountValue = sumEdgeCount(Count->OutEdges);
1600 Count->CountValid = true;
1601 Changes = true;
1602 } else if (Count->UnknownCountInEdge == 0) {
1603 Count->CountValue = sumEdgeCount(Count->InEdges);
1604 Count->CountValid = true;
1605 Changes = true;
1606 }
1607 }
1608 if (Count->CountValid) {
1609 if (Count->UnknownCountOutEdge == 1) {
1610 uint64_t Total = 0;
1611 uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1612 // If the one of the successor block can early terminate (no-return),
1613 // we can end up with situation where out edge sum count is larger as
1614 // the source BB's count is collected by a post-dominated block.
1615 if (Count->CountValue > OutSum)
1616 Total = Count->CountValue - OutSum;
1617 setEdgeCount(Count->OutEdges, Total);
1618 Changes = true;
1619 }
1620 if (Count->UnknownCountInEdge == 1) {
1621 uint64_t Total = 0;
1622 uint64_t InSum = sumEdgeCount(Count->InEdges);
1623 if (Count->CountValue > InSum)
1624 Total = Count->CountValue - InSum;
1625 setEdgeCount(Count->InEdges, Total);
1626 Changes = true;
1627 }
1628 }
1629 }
1630 }
1631
1632 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1633 (void) NumPasses;
1634#ifndef NDEBUG
1635 // Assert every BB has a valid counter.
1636 for (auto &BB : F) {
1637 auto BI = findBBInfo(&BB);
1638 if (BI == nullptr)
1639 continue;
1640 assert(BI->CountValid && "BB count is not valid");
1641 }
1642#endif
1643 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1644 uint64_t FuncMaxCount = FuncEntryCount;
1645 for (auto &BB : F) {
1646 auto BI = findBBInfo(&BB);
1647 if (BI == nullptr)
1648 continue;
1649 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1650 }
1651
1652 // Fix the obviously inconsistent entry count.
1653 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1654 FuncEntryCount = 1;
1655 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1656 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1657
1658 // Now annotate select instructions
1659 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1660 assert(CountPosition == ProfileCountSize);
1661
1662 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1663}
1664
1665// Assign the scaled count values to the BB with multiple out edges.
1666void PGOUseFunc::setBranchWeights() {
1667 // Generate MD_prof metadata for every branch instruction.
1668 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1669 << " IsCS=" << IsCS << "\n");
1670 for (auto &BB : F) {
1671 Instruction *TI = BB.getTerminator();
1672 if (TI->getNumSuccessors() < 2)
1673 continue;
1674 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1675 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1676 isa<CallBrInst>(TI)))
1677 continue;
1678
1679 if (getBBInfo(&BB).CountValue == 0)
1680 continue;
1681
1682 // We have a non-zero Branch BB.
1683 const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1684 unsigned Size = BBCountInfo.OutEdges.size();
1685 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1686 uint64_t MaxCount = 0;
1687 for (unsigned s = 0; s < Size; s++) {
1688 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1689 const BasicBlock *SrcBB = E->SrcBB;
1690 const BasicBlock *DestBB = E->DestBB;
1691 if (DestBB == nullptr)
1692 continue;
1693 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1694 uint64_t EdgeCount = E->CountValue;
1695 if (EdgeCount > MaxCount)
1696 MaxCount = EdgeCount;
1697 EdgeCounts[SuccNum] = EdgeCount;
1698 }
1699
1700 if (MaxCount)
1701 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1702 else {
1703 // A zero MaxCount can come about when we have a BB with a positive
1704 // count, and whose successor blocks all have 0 count. This can happen
1705 // when there is no exit block and the code exits via a noreturn function.
1706 auto &Ctx = M->getContext();
1707 Ctx.diagnose(DiagnosticInfoPGOProfile(
1708 M->getName().data(),
1709 Twine("Profile in ") + F.getName().str() +
1710 Twine(" partially ignored") +
1711 Twine(", possibly due to the lack of a return path."),
1712 DS_Warning));
1713 }
1714 }
1715}
1716
1718 for (BasicBlock *Pred : predecessors(BB)) {
1719 if (isa<IndirectBrInst>(Pred->getTerminator()))
1720 return true;
1721 }
1722 return false;
1723}
1724
1725void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1726 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1727 // Find irr loop headers
1728 for (auto &BB : F) {
1729 // As a heuristic also annotate indrectbr targets as they have a high chance
1730 // to become an irreducible loop header after the indirectbr tail
1731 // duplication.
1732 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1733 Instruction *TI = BB.getTerminator();
1734 const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1735 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1736 }
1737 }
1738}
1739
1740void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1742 return;
1743 Module *M = F.getParent();
1744 IRBuilder<> Builder(&SI);
1745 Type *Int64Ty = Builder.getInt64Ty();
1746 Type *I8PtrTy = Builder.getInt8PtrTy();
1747 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1748 Builder.CreateCall(
1749 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1750 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1751 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1752 Builder.getInt32(*CurCtrIdx), Step});
1753 ++(*CurCtrIdx);
1754}
1755
1756void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1757 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1758 assert(*CurCtrIdx < CountFromProfile.size() &&
1759 "Out of bound access of counters");
1760 uint64_t SCounts[2];
1761 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1762 ++(*CurCtrIdx);
1763 uint64_t TotalCount = 0;
1764 auto BI = UseFunc->findBBInfo(SI.getParent());
1765 if (BI != nullptr)
1766 TotalCount = BI->CountValue;
1767 // False Count
1768 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1769 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1770 if (MaxCount)
1771 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1772}
1773
1774void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1775 if (!PGOInstrSelect)
1776 return;
1777 // FIXME: do not handle this yet.
1778 if (SI.getCondition()->getType()->isVectorTy())
1779 return;
1780
1781 switch (Mode) {
1782 case VM_counting:
1783 NSIs++;
1784 return;
1785 case VM_instrument:
1786 instrumentOneSelectInst(SI);
1787 return;
1788 case VM_annotate:
1789 annotateOneSelectInst(SI);
1790 return;
1791 }
1792
1793 llvm_unreachable("Unknown visiting mode");
1794}
1795
1796// Traverse all valuesites and annotate the instructions for all value kind.
1797void PGOUseFunc::annotateValueSites() {
1799 return;
1800
1801 // Create the PGOFuncName meta data.
1802 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1803
1804 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1805 annotateValueSites(Kind);
1806}
1807
1808// Annotate the instructions for a specific value kind.
1809void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1810 assert(Kind <= IPVK_Last);
1811 unsigned ValueSiteIndex = 0;
1812 auto &ValueSites = FuncInfo.ValueSites[Kind];
1813 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1814 if (NumValueSites != ValueSites.size()) {
1815 auto &Ctx = M->getContext();
1816 Ctx.diagnose(DiagnosticInfoPGOProfile(
1817 M->getName().data(),
1818 Twine("Inconsistent number of value sites for ") +
1819 Twine(ValueProfKindDescr[Kind]) +
1820 Twine(" profiling in \"") + F.getName().str() +
1821 Twine("\", possibly due to the use of a stale profile."),
1822 DS_Warning));
1823 return;
1824 }
1825
1826 for (VPCandidateInfo &I : ValueSites) {
1827 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1828 << "): Index = " << ValueSiteIndex << " out of "
1829 << NumValueSites << "\n");
1830 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1831 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1832 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1834 ValueSiteIndex++;
1835 }
1836}
1837
1838// Collect the set of members for each Comdat in module M and store
1839// in ComdatMembers.
1841 Module &M,
1842 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1843 if (!DoComdatRenaming)
1844 return;
1845 for (Function &F : M)
1846 if (Comdat *C = F.getComdat())
1847 ComdatMembers.insert(std::make_pair(C, &F));
1848 for (GlobalVariable &GV : M.globals())
1849 if (Comdat *C = GV.getComdat())
1850 ComdatMembers.insert(std::make_pair(C, &GV));
1851 for (GlobalAlias &GA : M.aliases())
1852 if (Comdat *C = GA.getComdat())
1853 ComdatMembers.insert(std::make_pair(C, &GA));
1854}
1855
1856// Don't perform PGO instrumeatnion / profile-use.
1857static bool skipPGO(const Function &F) {
1858 if (F.isDeclaration())
1859 return true;
1860 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1861 return true;
1862 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1863 return true;
1864 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1865 return true;
1866
1867 // If there are too many critical edges, PGO might cause
1868 // compiler time problem. Skip PGO if the number of
1869 // critical edges execeed the threshold.
1870 unsigned NumCriticalEdges = 0;
1871 for (auto &BB : F) {
1872 const Instruction *TI = BB.getTerminator();
1873 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1874 if (isCriticalEdge(TI, I))
1875 NumCriticalEdges++;
1876 }
1877 }
1878 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1879 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1880 << ", NumCriticalEdges=" << NumCriticalEdges
1881 << " exceed the threshold. Skip PGO.\n");
1882 return true;
1883 }
1884
1885 return false;
1886}
1887
1889 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1891 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1892 // For the context-sensitve instrumentation, we should have a separated pass
1893 // (before LTO/ThinLTO linking) to create these variables.
1894 if (!IsCS)
1895 createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1896 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1897 collectComdatMembers(M, ComdatMembers);
1898
1899 for (auto &F : M) {
1900 if (skipPGO(F))
1901 continue;
1902 auto &TLI = LookupTLI(F);
1903 auto *BPI = LookupBPI(F);
1904 auto *BFI = LookupBFI(F);
1905 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1906 }
1907 return true;
1908}
1909
1912 createProfileFileNameVar(M, CSInstrName);
1913 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1914 // will be retained.
1916 return PreservedAnalyses::all();
1917}
1918
1921 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1922 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1924 };
1925 auto LookupBPI = [&FAM](Function &F) {
1927 };
1928 auto LookupBFI = [&FAM](Function &F) {
1930 };
1931
1932 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1933 return PreservedAnalyses::all();
1934
1935 return PreservedAnalyses::none();
1936}
1937
1938// Using the ratio b/w sums of profile count values and BFI count values to
1939// adjust the func entry count.
1940static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1941 BranchProbabilityInfo &NBPI) {
1942 Function &F = Func.getFunc();
1943 BlockFrequencyInfo NBFI(F, NBPI, LI);
1944#ifndef NDEBUG
1945 auto BFIEntryCount = F.getEntryCount();
1946 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1947 "Invalid BFI Entrycount");
1948#endif
1949 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1950 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1951 for (auto &BBI : F) {
1952 uint64_t CountValue = 0;
1953 uint64_t BFICountValue = 0;
1954 if (!Func.findBBInfo(&BBI))
1955 continue;
1956 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1957 CountValue = Func.getBBInfo(&BBI).CountValue;
1958 BFICountValue = *BFICount;
1959 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1960 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1961 }
1962 if (SumCount.isZero())
1963 return;
1964
1965 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1966 "Incorrect sum of BFI counts");
1967 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1968 return;
1969 double Scale = (SumCount / SumBFICount).convertToDouble();
1970 if (Scale < 1.001 && Scale > 0.999)
1971 return;
1972
1973 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1974 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1975 if (NewEntryCount == 0)
1976 NewEntryCount = 1;
1977 if (NewEntryCount != FuncEntryCount) {
1978 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1979 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1980 << ", entry_count " << FuncEntryCount << " --> "
1981 << NewEntryCount << "\n");
1982 }
1983}
1984
1985// Compare the profile count values with BFI count values, and print out
1986// the non-matching ones.
1987static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1989 uint64_t HotCountThreshold,
1991 Function &F = Func.getFunc();
1992 BlockFrequencyInfo NBFI(F, NBPI, LI);
1993 // bool PrintFunc = false;
1994 bool HotBBOnly = PGOVerifyHotBFI;
1995 std::string Msg;
1997
1998 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1999 for (auto &BBI : F) {
2000 uint64_t CountValue = 0;
2001 uint64_t BFICountValue = 0;
2002
2003 if (Func.getBBInfo(&BBI).CountValid)
2004 CountValue = Func.getBBInfo(&BBI).CountValue;
2005
2006 BBNum++;
2007 if (CountValue)
2008 NonZeroBBNum++;
2009 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2010 if (BFICount)
2011 BFICountValue = *BFICount;
2012
2013 if (HotBBOnly) {
2014 bool rawIsHot = CountValue >= HotCountThreshold;
2015 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2016 bool rawIsCold = CountValue <= ColdCountThreshold;
2017 bool ShowCount = false;
2018 if (rawIsHot && !BFIIsHot) {
2019 Msg = "raw-Hot to BFI-nonHot";
2020 ShowCount = true;
2021 } else if (rawIsCold && BFIIsHot) {
2022 Msg = "raw-Cold to BFI-Hot";
2023 ShowCount = true;
2024 }
2025 if (!ShowCount)
2026 continue;
2027 } else {
2028 if ((CountValue < PGOVerifyBFICutoff) &&
2029 (BFICountValue < PGOVerifyBFICutoff))
2030 continue;
2031 uint64_t Diff = (BFICountValue >= CountValue)
2032 ? BFICountValue - CountValue
2033 : CountValue - BFICountValue;
2034 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2035 continue;
2036 }
2037 BBMisMatchNum++;
2038
2039 ORE.emit([&]() {
2041 F.getSubprogram(), &BBI);
2042 Remark << "BB " << ore::NV("Block", BBI.getName())
2043 << " Count=" << ore::NV("Count", CountValue)
2044 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2045 if (!Msg.empty())
2046 Remark << " (" << Msg << ")";
2047 return Remark;
2048 });
2049 }
2050 if (BBMisMatchNum)
2051 ORE.emit([&]() {
2052 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2053 F.getSubprogram(), &F.getEntryBlock())
2054 << "In Func " << ore::NV("Function", F.getName())
2055 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2056 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2057 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2058 });
2059}
2060
2062 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2063 vfs::FileSystem &FS,
2064 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2067 ProfileSummaryInfo *PSI, bool IsCS) {
2068 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2069 auto &Ctx = M.getContext();
2070 // Read the counter array from file.
2071 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2072 ProfileRemappingFileName);
2073 if (Error E = ReaderOrErr.takeError()) {
2074 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2075 Ctx.diagnose(
2076 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2077 });
2078 return false;
2079 }
2080
2081 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2082 std::move(ReaderOrErr.get());
2083 if (!PGOReader) {
2084 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2085 StringRef("Cannot get PGOReader")));
2086 return false;
2087 }
2088 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2089 return false;
2090
2091 // TODO: might need to change the warning once the clang option is finalized.
2092 if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) {
2093 Ctx.diagnose(DiagnosticInfoPGOProfile(
2094 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2095 return false;
2096 }
2097 if (PGOReader->hasSingleByteCoverage()) {
2098 Ctx.diagnose(DiagnosticInfoPGOProfile(
2099 ProfileFileName.data(),
2100 "Cannot use coverage profiles for optimization"));
2101 return false;
2102 }
2103 if (PGOReader->functionEntryOnly()) {
2104 Ctx.diagnose(DiagnosticInfoPGOProfile(
2105 ProfileFileName.data(),
2106 "Function entry profiles are not yet supported for optimization"));
2107 return false;
2108 }
2109
2110 // Add the profile summary (read from the header of the indexed summary) here
2111 // so that we can use it below when reading counters (which checks if the
2112 // function should be marked with a cold or inlinehint attribute).
2113 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2116 PSI->refresh();
2117
2118 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2119 collectComdatMembers(M, ComdatMembers);
2120 std::vector<Function *> HotFunctions;
2121 std::vector<Function *> ColdFunctions;
2122
2123 // If the profile marked as always instrument the entry BB, do the
2124 // same. Note this can be overwritten by the internal option in CFGMST.h
2125 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2126 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2127 InstrumentFuncEntry = PGOInstrumentEntry;
2128 for (auto &F : M) {
2129 if (skipPGO(F))
2130 continue;
2131 auto &TLI = LookupTLI(F);
2132 auto *BPI = LookupBPI(F);
2133 auto *BFI = LookupBFI(F);
2134 // Split indirectbr critical edges here before computing the MST rather than
2135 // later in getInstrBB() to avoid invalidating it.
2136 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
2137 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2138 InstrumentFuncEntry);
2139 // Read and match memprof first since we do this via debug info and can
2140 // match even if there is an IR mismatch detected for regular PGO below.
2141 if (PGOReader->hasMemoryProfile())
2142 Func.readMemprof(PGOReader.get());
2143
2144 if (!PGOReader->isIRLevelProfile())
2145 continue;
2146
2147 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2148 // it means the profile for the function is unrepresentative and this
2149 // function is actually hot / warm. We will reset the function hot / cold
2150 // attribute and drop all the profile counters.
2152 bool AllZeros = false;
2153 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2154 continue;
2155 if (AllZeros) {
2156 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2157 if (Func.getProgramMaxCount() != 0)
2158 ColdFunctions.push_back(&F);
2159 continue;
2160 }
2161 if (PseudoKind != InstrProfRecord::NotPseudo) {
2162 // Clear function attribute cold.
2163 if (F.hasFnAttribute(Attribute::Cold))
2164 F.removeFnAttr(Attribute::Cold);
2165 // Set function attribute as hot.
2166 if (PseudoKind == InstrProfRecord::PseudoHot)
2167 F.addFnAttr(Attribute::Hot);
2168 continue;
2169 }
2170 Func.populateCounters();
2171 Func.setBranchWeights();
2172 Func.annotateValueSites();
2173 Func.annotateIrrLoopHeaderWeights();
2174 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2175 if (FreqAttr == PGOUseFunc::FFA_Cold)
2176 ColdFunctions.push_back(&F);
2177 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2178 HotFunctions.push_back(&F);
2179 if (PGOViewCounts != PGOVCT_None &&
2180 (ViewBlockFreqFuncName.empty() ||
2181 F.getName().equals(ViewBlockFreqFuncName))) {
2183 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2184 std::make_unique<BranchProbabilityInfo>(F, LI);
2185 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2186 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2188 NewBFI->view();
2189 else if (PGOViewCounts == PGOVCT_Text) {
2190 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2191 NewBFI->print(dbgs());
2192 }
2193 }
2195 (ViewBlockFreqFuncName.empty() ||
2196 F.getName().equals(ViewBlockFreqFuncName))) {
2198 if (ViewBlockFreqFuncName.empty())
2199 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2200 else
2201 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2202 else if (PGOViewRawCounts == PGOVCT_Text) {
2203 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2204 Func.dumpInfo();
2205 }
2206 }
2207
2210 BranchProbabilityInfo NBPI(F, LI);
2211
2212 // Fix func entry count.
2213 if (PGOFixEntryCount)
2214 fixFuncEntryCount(Func, LI, NBPI);
2215
2216 // Verify BlockFrequency information.
2217 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2218 if (PGOVerifyHotBFI) {
2219 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2221 }
2222 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2223 }
2224 }
2225
2226 // Set function hotness attribute from the profile.
2227 // We have to apply these attributes at the end because their presence
2228 // can affect the BranchProbabilityInfo of any callers, resulting in an
2229 // inconsistent MST between prof-gen and prof-use.
2230 for (auto &F : HotFunctions) {
2231 F->addFnAttr(Attribute::InlineHint);
2232 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2233 << "\n");
2234 }
2235 for (auto &F : ColdFunctions) {
2236 // Only set when there is no Attribute::Hot set by the user. For Hot
2237 // attribute, user's annotation has the precedence over the profile.
2238 if (F->hasFnAttribute(Attribute::Hot)) {
2239 auto &Ctx = M.getContext();
2240 std::string Msg = std::string("Function ") + F->getName().str() +
2241 std::string(" is annotated as a hot function but"
2242 " the profile is cold");
2243 Ctx.diagnose(
2244 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2245 continue;
2246 }
2247 F->addFnAttr(Attribute::Cold);
2248 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2249 << "\n");
2250 }
2251 return true;
2252}
2253
2255 std::string Filename, std::string RemappingFilename, bool IsCS,
2257 : ProfileFileName(std::move(Filename)),
2258 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2259 FS(std::move(VFS)) {
2260 if (!PGOTestProfileFile.empty())
2261 ProfileFileName = PGOTestProfileFile;
2262 if (!PGOTestProfileRemappingFile.empty())
2263 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2264 if (!FS)
2266}
2267
2270
2271 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2272 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2274 };
2275 auto LookupBPI = [&FAM](Function &F) {
2277 };
2278 auto LookupBFI = [&FAM](Function &F) {
2280 };
2281
2282 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
2283
2284 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2285 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2286 return PreservedAnalyses::all();
2287
2288 return PreservedAnalyses::none();
2289}
2290
2291static std::string getSimpleNodeName(const BasicBlock *Node) {
2292 if (!Node->getName().empty())
2293 return std::string(Node->getName());
2294
2295 std::string SimpleNodeName;
2296 raw_string_ostream OS(SimpleNodeName);
2297 Node->printAsOperand(OS, false);
2298 return OS.str();
2299}
2300
2302 ArrayRef<uint64_t> EdgeCounts,
2303 uint64_t MaxCount) {
2304 MDBuilder MDB(M->getContext());
2305 assert(MaxCount > 0 && "Bad max count");
2306 uint64_t Scale = calculateCountScale(MaxCount);
2308 for (const auto &ECI : EdgeCounts)
2309 Weights.push_back(scaleBranchCount(ECI, Scale));
2310
2311 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2312 : Weights) {
2313 dbgs() << W << " ";
2314 } dbgs() << "\n";);
2315
2316 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2317
2318 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
2320 std::string BrCondStr = getBranchCondString(TI);
2321 if (BrCondStr.empty())
2322 return;
2323
2324 uint64_t WSum =
2325 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2326 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2327 uint64_t TotalCount =
2328 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2329 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2330 Scale = calculateCountScale(WSum);
2331 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2332 scaleBranchCount(WSum, Scale));
2333 std::string BranchProbStr;
2334 raw_string_ostream OS(BranchProbStr);
2335 OS << BP;
2336 OS << " (total count : " << TotalCount << ")";
2337 OS.flush();
2338 Function *F = TI->getParent()->getParent();
2340 ORE.emit([&]() {
2341 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2342 << BrCondStr << " is true with probability : " << BranchProbStr;
2343 });
2344 }
2345}
2346
2347namespace llvm {
2348
2350 MDBuilder MDB(M->getContext());
2351 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2352 MDB.createIrrLoopHeaderWeight(Count));
2353}
2354
2355template <> struct GraphTraits<PGOUseFunc *> {
2356 using NodeRef = const BasicBlock *;
2359
2360 static NodeRef getEntryNode(const PGOUseFunc *G) {
2361 return &G->getFunc().front();
2362 }
2363
2365 return succ_begin(N);
2366 }
2367
2368 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2369
2370 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2371 return nodes_iterator(G->getFunc().begin());
2372 }
2373
2374 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2375 return nodes_iterator(G->getFunc().end());
2376 }
2377};
2378
2379template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2380 explicit DOTGraphTraits(bool isSimple = false)
2382
2383 static std::string getGraphName(const PGOUseFunc *G) {
2384 return std::string(G->getFunc().getName());
2385 }
2386
2387 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2388 std::string Result;
2389 raw_string_ostream OS(Result);
2390
2391 OS << getSimpleNodeName(Node) << ":\\l";
2392 UseBBInfo *BI = Graph->findBBInfo(Node);
2393 OS << "Count : ";
2394 if (BI && BI->CountValid)
2395 OS << BI->CountValue << "\\l";
2396 else
2397 OS << "Unknown\\l";
2398
2399 if (!PGOInstrSelect)
2400 return Result;
2401
2402 for (const Instruction &I : *Node) {
2403 if (!isa<SelectInst>(&I))
2404 continue;
2405 // Display scaled counts for SELECT instruction:
2406 OS << "SELECT : { T = ";
2407 uint64_t TC, FC;
2408 bool HasProf = extractBranchWeights(I, TC, FC);
2409 if (!HasProf)
2410 OS << "Unknown, F = Unknown }\\l";
2411 else
2412 OS << TC << ", F = " << FC << " }\\l";
2413 }
2414 return Result;
2415 }
2416};
2417
2418} // end namespace llvm
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
This file contains the simple types necessary to represent the attributes associated with functions a...
basic Basic Alias true
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
uint64_t Size
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
AllocType
Module.h This file contains the declarations for the Module class.
return ToRemove size() > 0
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< bool > PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, cl::desc("Use this option to turn on/off " "warnings about missing profile data for " "functions."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > MatchMemProf("pgo-match-memprof", cl::init(true), cl::Hidden, cl::desc("Perform matching and annotation of memprof profiles."))
static bool skipPGO(const Function &F)
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static void addCallStack(CallStackTrie &AllocTrie, const AllocationInfo *AllocInfo)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, uint32_t Column)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, cl::desc("Use the old CFG function hashing"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static void addCallsiteMetadata(Instruction &I, std::vector< uint64_t > &InlinedCallStack, LLVMContext &Ctx)
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > NoPGOWarnMismatchComdatWeak("no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "or weak functions."))
static bool stackFrameIncludesInlinedCallStack(ArrayRef< Frame > ProfileCallStack, ArrayRef< uint64_t > InlinedCallStack, unsigned StartIndex=0)
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
@ Names
Definition: TextStubV5.cpp:106
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:921
Class for arbitrary precision integers.
Definition: APInt.h:75
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:152
iterator begin() const
Definition: ArrayRef.h:151
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:245
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Definition: CFGMST.h:39
Edge & addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W)
Definition: CFGMST.h:260
std::vector< std::unique_ptr< Edge > > AllEdges
Definition: CFGMST.h:45
BBInfo * findBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:90
BBInfo & getBBInfo(const BasicBlock *BB) const
Definition: CFGMST.h:83
DenseMap< const BasicBlock *, std::unique_ptr< BBInfo > > BBInfos
Definition: CFGMST.h:48
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Definition: CFGMST.h:241
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:808
StringRef getName() const
Definition: Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition: Comdat.h:47
SelectionKind getSelectionKind() const
Definition: Comdat.h:46
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2228
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:205
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:199
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:386
Debug location.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Base class for error info classes.
Definition: Error.h:47
virtual std::string message() const
Return the error message as a string.
Definition: Error.h:55
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
Tagged union holding either a T or a Error.
Definition: Error.h:470
Error takeError()
Take ownership of the stored error.
Definition: Error.h:597
reference get()
Returns a reference to the stored T value.
Definition: Error.h:567
Class to represent profile counts.
Definition: Function.h:252
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:520
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition: GlobalValue.h:49
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:51
HashResultTy< HasherT_ > final()
Forward to HasherT::final() if available.
Definition: HashBuilder.h:66
Implementation of the HashBuilder interface.
Definition: HashBuilder.h:94
std::enable_if_t< hashbuilder_detail::IsHashableData< T >::value, HashBuilderImpl & > add(T Value)
Implement hashing for hashable data types, e.g. integral or enum values.
Definition: HashBuilder.h:109
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< memprof::MemProfRecord > getMemProfRecord(uint64_t FuncNameHash)
Return the memprof record for the function identified by llvm::md5(Name).
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
instrprof_error get() const
Definition: InstrProf.h:353
std::string message() const override
Return the error message as a string.
Definition: InstrProf.cpp:257
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:685
const BasicBlock * getParent() const
Definition: Instruction.h:90
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition: CRC.h:52
void update(ArrayRef< uint8_t > Data)
Definition: CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
MDString * createString(StringRef Str)
Return the given string as metadata.
Definition: MDBuilder.cpp:20
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Definition: MDBuilder.cpp:330
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:943
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1289
Tuple of metadata.
Definition: Metadata.h:1328
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1356
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
An efficient, type-erasing, non-owning reference to a callable.
Class to build a trie of call stack contexts for a particular profiled allocation call,...
void addCallStack(AllocationType AllocType, ArrayRef< uint64_t > StackIds)
Add a call stack context with the given allocation type to the Trie.
bool buildAndAttachMIBMetadata(CallBase *CI)
Build and attach the minimal necessary MIB metadata.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const CustomOperand< const MCSubtargetInfo & > Msg[]
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
MDNode * buildCallstackMetadata(ArrayRef< uint64_t > CallStack, LLVMContext &Ctx)
Build callstack metadata from the provided list of call stack ids.
AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize, uint64_t MinLifetime)
Return the allocation type for a given set of memory profile values.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
Definition: MisExpect.cpp:202
DiagnosticInfoOptimizationBase::Argument NV
void write64le(void *P, uint64_t V)
Definition: Endian.h:417
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Return the modified name for function F suitable to be used the key for profile lookup.
Definition: InstrProf.cpp:301
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
Definition: InstrProf.cpp:1123
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition: CFG.cpp:79
Function::ProfileCount ProfileCount
cl::opt< bool > NoPGOWarnMismatch
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:966
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
std::array< uint8_t, NumBytes > BLAKE3Result
The constant LLVM_BLAKE3_OUT_LEN provides the default output length, 32 bytes, which is recommended f...
Definition: BLAKE3.h:35
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
Definition: InstrProf.cpp:380
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1018
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:495
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
HashBuilderImpl< HasherT,(Endianness==support::endianness::native ? support::endian::system_endianness() :Endianness)> HashBuilder
Interface to help hash various types through a hasher type.
Definition: HashBuilder.h:401
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
InstrProfValueKind
Definition: InstrProf.h:244
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition: CFG.cpp:95
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
Definition: InstrProf.cpp:1183
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
Definition: InstrProf.cpp:1206
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1909
@ DS_Warning
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > DebugInfoCorrelate("debug-info-correlate", cl::desc("Use debug info to correlate profiles."), cl::init(false))
bool isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory via new.
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
Definition: CFG.h:243
Definition: BitVector.h:851
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:217
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:247
Summary of memprof metadata on allocations.
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
Definition: InstrProf.h:730
std::vector< uint64_t > Counts
Definition: InstrProf.h:731
CountPseudoKind getCountPseudoKind() const
Definition: InstrProf.h:836
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition: InstrProf.h:945
static void setCSFlagInHash(uint64_t &FuncHash)
Definition: InstrProf.h:926
GlobalValue::GUID Function
Definition: MemProf.h:145
uint32_t LineOffset
Definition: MemProf.h:150