125#include <unordered_map>
133#define DEBUG_TYPE "pgo-instrumentation"
135STATISTIC(NumOfPGOInstrument,
"Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts,
"Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics,
"Number of mem intrinsics instrumented.");
140STATISTIC(NumOfPGOSplit,
"Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc,
"Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch,
"Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing,
"Number of functions without profile.");
144STATISTIC(NumOfPGOICall,
"Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument,
"Number of edges instrumented in CSPGO.");
147 "Number of select instruction instrumented in CSPGO.");
149 "Number of mem intrinsics instrumented in CSPGO.");
151STATISTIC(NumOfCSPGOBB,
"Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit,
"Number of critical edge splits in CSPGO.");
154 "Number of functions having valid profile counts in CSPGO.");
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing,
"Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks,
"Number of basic blocks that were executed");
165 cl::desc(
"Specify the path of profile data file. This is"
166 "mainly for test purpose."));
170 cl::desc(
"Specify the path of profile remapping file. This is mainly for "
177 cl::desc(
"Disable Value Profiling"));
183 cl::desc(
"Max number of annotations for a single indirect "
190 cl::desc(
"Max number of preicise value annotations for a single memop"
197 cl::desc(
"Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
205 cl::desc(
"Use this option to turn on/off "
206 "warnings about missing profile data for "
213 cl::desc(
"Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
221 cl::desc(
"The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
229 cl::desc(
"Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
235 cl::desc(
"A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
248 cl::desc(
"Use this option to turn on/off "
249 "memory intrinsic size profiling."));
254 cl::desc(
"When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
261 cl::desc(
"Force to instrument function entry basicblock."));
266 "Use this option to enable function entry coverage instrumentation."));
269 "pgo-block-coverage",
270 cl::desc(
"Use this option to enable basic block coverage instrumentation"));
274 cl::desc(
"Create a dot file of CFGs with block "
275 "coverage inference information"));
278 "pgo-temporal-instrumentation",
279 cl::desc(
"Use this option to enable temporal instrumentation"));
283 cl::desc(
"Fix function entry count in profile use."));
287 cl::desc(
"Print out the non-match BFI count if a hot raw profile count "
288 "becomes non-hot, or a cold raw profile count becomes hot. "
289 "The print is enabled under -Rpass-analysis=pgo, or "
290 "internal option -pass-remakrs-analysis=pgo."));
294 cl::desc(
"Print out mismatched BFI counts after setting profile metadata "
295 "The print is enabled under -Rpass-analysis=pgo, or "
296 "internal option -pass-remakrs-analysis=pgo."));
300 cl::desc(
"Set the threshold for pgo-verify-bfi: only print out "
301 "mismatched BFI if the difference percentage is greater than "
302 "this value (in percentage)."));
306 cl::desc(
"Set the threshold for pgo-verify-bfi: skip the counts whose "
307 "profile count value is below."));
312 cl::desc(
"Trace the hash of the function with this name."));
316 cl::desc(
"Do not instrument functions smaller than this threshold."));
320 cl::desc(
"Do not instrument functions with the number of critical edges "
321 " greater than this threshold."));
357 return std::string();
362 return std::string();
374 else if (CV->
isOne())
386#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
393 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
395 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
397 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
399 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
401 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
404 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
406 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
408 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
413 Triple TT(M.getTargetTriple());
414 if (TT.supportsCOMDAT()) {
416 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
418 return IRLevelVersionVariable;
428enum VisitMode { VM_counting, VM_instrument, VM_annotate };
432struct SelectInstVisitor :
public InstVisitor<SelectInstVisitor> {
435 VisitMode
Mode = VM_counting;
436 unsigned *CurCtrIdx =
nullptr;
437 unsigned TotalNumCtrs = 0;
440 PGOUseFunc *UseFunc =
nullptr;
441 bool HasSingleByteCoverage;
443 SelectInstVisitor(
Function &Func,
bool HasSingleByteCoverage)
444 :
F(
Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
446 void countSelects() {
456 void instrumentSelects(
unsigned *Ind,
unsigned TotalNC,
GlobalVariable *FNV,
458 Mode = VM_instrument;
460 TotalNumCtrs = TotalNC;
467 void annotateSelects(PGOUseFunc *UF,
unsigned *Ind) {
482 unsigned getNumOfSelectInsts()
const {
return NSIs; }
494 bool Removed =
false;
495 bool IsCritical =
false;
498 : SrcBB(Src), DestBB(Dest), Weight(
W) {}
501 std::string infoString()
const {
502 return (
Twine(Removed ?
"-" :
" ") + (InMST ?
" " :
"*") +
503 (IsCritical ?
"c" :
" ") +
" W=" +
Twine(Weight))
514 PGOBBInfo(
unsigned IX) : Group(this),
Index(IX) {}
517 std::string infoString()
const {
523template <
class Edge,
class BBInfo>
class FuncPGOInstrumentation {
531 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
535 void computeCFGHash();
536 void renameComdatFunction();
540 std::vector<std::vector<VPCandidateInfo>> ValueSites;
541 SelectInstVisitor SIVisitor;
542 std::string FuncName;
543 std::string DeprecatedFuncName;
552 const std::optional<BlockCoverageInference> BCI;
554 static std::optional<BlockCoverageInference>
555 constructBCI(
Function &Func,
bool HasSingleByteCoverage,
556 bool InstrumentFuncEntry) {
557 if (HasSingleByteCoverage)
564 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
577 void dumpInfo(
StringRef Str =
"")
const {
579 " Hash: " +
Twine(FunctionHash) +
"\t" + Str);
582 FuncPGOInstrumentation(
584 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
587 bool InstrumentFuncEntry =
true,
bool HasSingleByteCoverage =
false)
588 :
F(
Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(
Func, TLI),
589 TLI(TLI), ValueSites(IPVK_Last + 1),
590 SIVisitor(
Func, HasSingleByteCoverage),
591 MST(
F, InstrumentFuncEntry, BPI,
BFI),
592 BCI(constructBCI(
Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
594 BCI->viewBlockCoverageGraph();
596 SIVisitor.countSelects();
597 ValueSites[IPVK_MemOPSize] = VPC.
get(IPVK_MemOPSize);
599 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
600 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
602 ValueSites[IPVK_IndirectCallTarget] = VPC.
get(IPVK_IndirectCallTarget);
604 ValueSites[IPVK_VTableTarget] = VPC.
get(IPVK_VTableTarget);
606 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
607 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
614 if (!ComdatMembers.empty())
615 renameComdatFunction();
618 for (
const auto &E : MST.
allEdges()) {
621 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
623 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
636template <
class Edge,
class BBInfo>
637void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
638 std::vector<uint8_t> Indexes;
642 auto BI = findBBInfo(Succ);
646 for (
int J = 0; J < 4; J++)
647 Indexes.push_back((uint8_t)(
Index >> (J * 8)));
654 auto updateJCH = [&JCH](
uint64_t Num) {
659 updateJCH((
uint64_t)SIVisitor.getNumOfSelectInsts());
660 updateJCH((
uint64_t)ValueSites[IPVK_IndirectCallTarget].
size());
663 updateJCH(BCI->getInstrumentedBlocksHash());
673 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
676 LLVM_DEBUG(
dbgs() <<
"Function Hash Computation for " <<
F.getName() <<
":\n"
677 <<
" CRC = " << JC.
getCRC()
678 <<
", Selects = " << SIVisitor.getNumOfSelectInsts()
679 <<
", Edges = " << MST.
numEdges() <<
", ICSites = "
680 << ValueSites[IPVK_IndirectCallTarget].size()
681 <<
", Memops = " << ValueSites[IPVK_MemOPSize].size()
682 <<
", High32 CRC = " << JCH.
getCRC()
683 <<
", Hash = " << FunctionHash <<
"\n";);
686 dbgs() <<
"Funcname=" <<
F.getName() <<
", Hash=" << FunctionHash
687 <<
" in building " <<
F.getParent()->getSourceFileName() <<
"\n";
693 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
705 for (
auto &&CM :
make_range(ComdatMembers.equal_range(
C))) {
706 assert(!isa<GlobalAlias>(CM.second));
707 Function *FM = dyn_cast<Function>(CM.second);
715template <
class Edge,
class BBInfo>
716void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
719 std::string OrigName =
F.getName().str();
720 std::string NewFuncName =
722 F.setName(
Twine(NewFuncName));
724 FuncName =
Twine(FuncName +
"." +
Twine(FunctionHash)).
str();
730 if (!
F.hasComdat()) {
732 NewComdat =
M->getOrInsertComdat(
StringRef(NewFuncName));
734 F.setComdat(NewComdat);
739 Comdat *OrigComdat =
F.getComdat();
740 std::string NewComdatName =
742 NewComdat =
M->getOrInsertComdat(
StringRef(NewComdatName));
745 for (
auto &&CM :
make_range(ComdatMembers.equal_range(OrigComdat))) {
747 cast<Function>(CM.second)->setComdat(NewComdat);
753template <
class Edge,
class BBInfo>
754void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
755 std::vector<BasicBlock *> &InstrumentBBs) {
758 if (BCI->shouldInstrumentBlock(BB))
759 InstrumentBBs.push_back(&BB);
764 std::vector<Edge *> EdgeList;
766 for (
const auto &E : MST.
allEdges())
767 EdgeList.push_back(E.get());
769 for (
auto &E : EdgeList) {
772 InstrumentBBs.push_back(InstrBB);
778template <
class Edge,
class BBInfo>
779BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
780 if (E->InMST || E->Removed)
786 if (SrcBB ==
nullptr)
788 if (DestBB ==
nullptr)
803 return canInstrument(SrcBB);
805 return canInstrument(DestBB);
814 dbgs() <<
"Fail to split critical edge: not instrument this edge.\n");
819 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
821 <<
" --> " << getBBInfo(DestBB).
Index <<
"\n");
823 MST.
addEdge(SrcBB, InstrBB, 0);
825 Edge &NewEdge1 = MST.
addEdge(InstrBB, DestBB, 0);
826 NewEdge1.InMST =
true;
829 return canInstrument(InstrBB);
845 if (!isa<IntrinsicInst>(OrigCall)) {
848 std::optional<OperandBundleUse> ParentFunclet =
856 if (!BlockColors.
empty()) {
857 const ColorVector &CV = BlockColors.
find(OrigCall->getParent())->second;
858 assert(CV.
size() == 1 &&
"non-unique color for block!");
871 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
879 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
883 auto Name = FuncInfo.FuncNameVar;
885 FuncInfo.FunctionHash);
887 auto &EntryBB =
F.getEntryBlock();
888 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
897 std::vector<BasicBlock *> InstrumentBBs;
898 FuncInfo.getInstrumentBBs(InstrumentBBs);
899 unsigned NumCounters =
900 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
915 for (
auto &Instr : BB)
916 if (
auto *CS = dyn_cast<CallBase>(&Instr)) {
917 if ((CS->getCalledFunction() &&
918 CS->getCalledFunction()->isIntrinsic()) ||
919 dyn_cast<InlineAsm>(CS->getCalledOperand()))
926 Visit([&TotalNrCallsites](
auto *) { ++TotalNrCallsites; });
930 Visit([&](
auto *CB) {
942 auto &EntryBB =
F.getEntryBlock();
943 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
952 for (
auto *InstrBB : InstrumentBBs) {
955 "Cannot get the Instrumentation point");
960 ? Intrinsic::instrprof_cover
961 : Intrinsic::instrprof_increment),
966 FuncInfo.SIVisitor.instrumentSelects(&
I, NumCounters, FuncInfo.FuncNameVar,
967 FuncInfo.FunctionHash);
973 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
980 if (
F.hasPersonalityFn() &&
985 for (
uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
986 unsigned SiteIndex = 0;
992 <<
" site: CallSite Index = " << SiteIndex <<
"\n");
996 "Cannot get the Instrumentation point");
998 Value *ToProfile =
nullptr;
999 if (Cand.V->getType()->isIntegerTy())
1001 else if (Cand.V->getType()->isPointerTy())
1003 assert(ToProfile &&
"value profiling Value is of unexpected type");
1009 {FuncInfo.FuncNameVar, Builder.
getInt64(FuncInfo.FunctionHash),
1019struct PGOUseEdge :
public PGOEdge {
1020 using PGOEdge::PGOEdge;
1022 std::optional<uint64_t> Count;
1028 std::string infoString()
const {
1030 return PGOEdge::infoString();
1031 return (
Twine(PGOEdge::infoString()) +
" Count=" +
Twine(*Count)).str();
1038struct PGOUseBBInfo :
public PGOBBInfo {
1039 std::optional<uint64_t> Count;
1040 int32_t UnknownCountInEdge = 0;
1041 int32_t UnknownCountOutEdge = 0;
1042 DirectEdges InEdges;
1043 DirectEdges OutEdges;
1045 PGOUseBBInfo(
unsigned IX) : PGOBBInfo(IX) {}
1051 std::string infoString()
const {
1053 return PGOBBInfo::infoString();
1054 return (
Twine(PGOBBInfo::infoString()) +
" Count=" +
Twine(*Count)).str();
1058 void addOutEdge(PGOUseEdge *E) {
1059 OutEdges.push_back(E);
1060 UnknownCountOutEdge++;
1064 void addInEdge(PGOUseEdge *E) {
1065 InEdges.push_back(E);
1066 UnknownCountInEdge++;
1075 for (
const auto &E : Edges) {
1089 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1092 bool HasSingleByteCoverage)
1093 :
F(
Func),
M(Modu),
BFI(BFIin), PSI(PSI),
1094 FuncInfo(
Func, TLI, ComdatMembers,
false, BPI, BFIin, IsCS,
1095 InstrumentFuncEntry, HasSingleByteCoverage),
1096 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1098 void handleInstrProfError(
Error Err,
uint64_t MismatchedFuncSum);
1105 void populateCounters();
1114 void annotateValueSites();
1117 void annotateValueSites(
uint32_t Kind);
1120 void annotateIrrLoopHeaderWeights();
1123 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1126 FuncFreqAttr getFuncFreqAttr()
const {
return FreqAttr; }
1135 PGOUseBBInfo &getBBInfo(
const BasicBlock *BB)
const {
1136 return FuncInfo.getBBInfo(BB);
1140 PGOUseBBInfo *findBBInfo(
const BasicBlock *BB)
const {
1141 return FuncInfo.findBBInfo(BB);
1146 void dumpInfo(
StringRef Str =
"")
const { FuncInfo.dumpInfo(Str); }
1148 uint64_t getProgramMaxCount()
const {
return ProgramMaxCount; }
1157 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1173 FuncFreqAttr FreqAttr;
1179 bool setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile);
1192 FreqAttr = FFA_Cold;
1200 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1204 for (
const auto &E : FuncInfo.MST.allEdges()) {
1209 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1210 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1211 SrcInfo.addOutEdge(E.get());
1212 DestInfo.addInEdge(E.get());
1218bool PGOUseFunc::setInstrumentedCounts(
1219 const std::vector<uint64_t> &CountFromProfile) {
1221 std::vector<BasicBlock *> InstrumentBBs;
1222 FuncInfo.getInstrumentBBs(InstrumentBBs);
1226 unsigned NumCounters =
1227 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1230 if (NumCounters != CountFromProfile.size()) {
1233 auto *FuncEntry = &*
F.begin();
1238 uint64_t CountValue = CountFromProfile[
I++];
1239 PGOUseBBInfo &
Info = getBBInfo(InstrBB);
1243 if (InstrBB == FuncEntry && CountValue == 0)
1245 Info.setBBInfoCount(CountValue);
1247 ProfileCountSize = CountFromProfile.size();
1251 auto setEdgeCount = [
this](PGOUseEdge *E,
uint64_t Value) ->
void {
1252 E->setEdgeCount(
Value);
1253 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1254 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1260 for (
const auto &E : FuncInfo.MST.allEdges()) {
1261 if (E->Removed || E->InMST)
1264 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1268 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1269 setEdgeCount(E.get(), *SrcInfo.Count);
1272 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1275 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1276 setEdgeCount(E.get(), *DestInfo.Count);
1282 setEdgeCount(E.get(), 0);
1289void PGOUseFunc::setEdgeCount(DirectEdges &Edges,
uint64_t Value) {
1290 for (
auto &E : Edges) {
1293 E->setEdgeCount(
Value);
1295 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1296 getBBInfo(E->DestBB).UnknownCountInEdge--;
1304 const char MetadataName[] =
"instr_prof_hash_mismatch";
1307 auto *Existing =
F.getMetadata(LLVMContext::MD_annotation);
1309 MDTuple *Tuple = cast<MDTuple>(Existing);
1310 for (
const auto &
N : Tuple->
operands()) {
1311 if (
N.equalsStr(MetadataName))
1320 F.setMetadata(LLVMContext::MD_annotation, MD);
1323void PGOUseFunc::handleInstrProfError(
Error Err,
uint64_t MismatchedFuncSum) {
1325 auto &Ctx =
M->getContext();
1326 auto Err = IPE.
get();
1327 bool SkipWarning =
false;
1329 << FuncInfo.FuncName <<
": ");
1330 if (Err == instrprof_error::unknown_function) {
1331 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1334 }
else if (Err == instrprof_error::hash_mismatch ||
1335 Err == instrprof_error::malformed) {
1336 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1342 LLVM_DEBUG(
dbgs() <<
"hash mismatch (hash= " << FuncInfo.FunctionHash
1343 <<
" skip=" << SkipWarning <<
")");
1353 IPE.
message() + std::string(
" ") +
F.getName().str() +
1354 std::string(
" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1355 std::string(
" up to ") + std::to_string(MismatchedFuncSum) +
1356 std::string(
" count discarded");
1368 auto &Ctx =
M->getContext();
1371 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1372 &MismatchedFuncSum);
1374 handleInstrProfError(std::move(E), MismatchedFuncSum);
1377 ProfileRecord = std::move(
Result.get());
1382 std::vector<uint64_t> &CountFromProfile = ProfileRecord.
Counts;
1384 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1388 for (
unsigned I = 0, S = CountFromProfile.size();
I < S;
I++) {
1390 ValueSum += CountFromProfile[
I];
1392 AllZeros = (ValueSum == 0);
1396 getBBInfo(
nullptr).UnknownCountOutEdge = 2;
1397 getBBInfo(
nullptr).UnknownCountInEdge = 2;
1399 if (!setInstrumentedCounts(CountFromProfile)) {
1401 dbgs() <<
"Inconsistent number of counts, skipping this function");
1403 M->getName().data(),
1404 Twine(
"Inconsistent number of counts in ") +
F.getName().str() +
1405 Twine(
": the profile may be stale or there is a function name "
1417 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1418 &MismatchedFuncSum);
1419 if (
auto Err =
Result.takeError()) {
1420 handleInstrProfError(std::move(Err), MismatchedFuncSum);
1423 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1425 std::vector<uint64_t> &CountsFromProfile =
Result.get().Counts;
1429 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1435 InverseDependencies;
1436 for (
auto &BB :
F) {
1437 for (
auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1439 InverseDependencies[Dep].
insert(&BB);
1444 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1445 for (
auto &[BB, IsCovered] : Coverage)
1447 CoveredBlocksToProcess.push(BB);
1449 while (!CoveredBlocksToProcess.empty()) {
1450 auto *CoveredBlock = CoveredBlocksToProcess.top();
1451 assert(Coverage[CoveredBlock]);
1452 CoveredBlocksToProcess.pop();
1453 for (
auto *BB : InverseDependencies[CoveredBlock]) {
1458 CoveredBlocksToProcess.push(BB);
1466 F.setEntryCount(Coverage[&
F.getEntryBlock()] ? 10000 : 0);
1467 for (
auto &BB :
F) {
1476 Weights.
push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1477 if (Weights.
size() >= 2)
1482 unsigned NumCorruptCoverage = 0;
1487 auto IsBlockDead = [&](
const BasicBlock &BB) -> std::optional<bool> {
1488 if (
auto C =
BFI.getBlockProfileCount(&BB))
1492 LLVM_DEBUG(
dbgs() <<
"Block Coverage: (Instrumented=*, Covered=X)\n");
1493 for (
auto &BB :
F) {
1494 LLVM_DEBUG(
dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ?
"* " :
" ")
1495 << (Coverage[&BB] ?
"X " :
" ") <<
" " << BB.getName()
1501 if (Coverage[&BB] == IsBlockDead(BB).value_or(
false)) {
1503 dbgs() <<
"Found inconsistent block covearge for " << BB.getName()
1504 <<
": BCI=" << (Coverage[&BB] ?
"Covered" :
"Dead") <<
" BFI="
1505 << (IsBlockDead(BB).
value() ?
"Dead" :
"Covered") <<
"\n");
1506 ++NumCorruptCoverage;
1512 auto &Ctx =
M->getContext();
1514 M->getName().data(),
1515 Twine(
"Found inconsistent block coverage for function ") +
F.getName() +
1516 " in " +
Twine(NumCorruptCoverage) +
" blocks.",
1520 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1525void PGOUseFunc::populateCounters() {
1526 bool Changes =
true;
1527 unsigned NumPasses = 0;
1535 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1536 if (UseBBInfo ==
nullptr)
1538 if (!UseBBInfo->Count) {
1539 if (UseBBInfo->UnknownCountOutEdge == 0) {
1542 }
else if (UseBBInfo->UnknownCountInEdge == 0) {
1547 if (UseBBInfo->Count) {
1548 if (UseBBInfo->UnknownCountOutEdge == 1) {
1554 if (*UseBBInfo->Count > OutSum)
1555 Total = *UseBBInfo->Count - OutSum;
1556 setEdgeCount(UseBBInfo->OutEdges,
Total);
1559 if (UseBBInfo->UnknownCountInEdge == 1) {
1562 if (*UseBBInfo->Count > InSum)
1563 Total = *UseBBInfo->Count - InSum;
1564 setEdgeCount(UseBBInfo->InEdges,
Total);
1571 LLVM_DEBUG(
dbgs() <<
"Populate counts in " << NumPasses <<
" passes.\n");
1575 for (
auto &BB :
F) {
1576 auto BI = findBBInfo(&BB);
1579 assert(BI->Count &&
"BB count is not valid");
1584 for (
auto &BB :
F) {
1585 auto BI = findBBInfo(&BB);
1588 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1598 FuncInfo.SIVisitor.annotateSelects(
this, &CountPosition);
1599 assert(CountPosition == ProfileCountSize);
1601 LLVM_DEBUG(FuncInfo.dumpInfo(
"after reading profile."));
1605void PGOUseFunc::setBranchWeights() {
1607 LLVM_DEBUG(
dbgs() <<
"\nSetting branch weights for func " <<
F.getName()
1608 <<
" IsCS=" << IsCS <<
"\n");
1609 for (
auto &BB :
F) {
1613 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1614 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1615 isa<CallBrInst>(TI)))
1618 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1619 if (!*BBCountInfo.Count)
1623 unsigned Size = BBCountInfo.OutEdges.size();
1626 for (
unsigned s = 0; s <
Size; s++) {
1627 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1630 if (DestBB ==
nullptr)
1634 if (EdgeCount > MaxCount)
1635 MaxCount = EdgeCount;
1636 EdgeCounts[SuccNum] = EdgeCount;
1645 auto &Ctx =
M->getContext();
1647 M->getName().data(),
1648 Twine(
"Profile in ") +
F.getName().str() +
1649 Twine(
" partially ignored") +
1650 Twine(
", possibly due to the lack of a return path."),
1658 if (isa<IndirectBrInst>(Pred->getTerminator()))
1664void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1665 LLVM_DEBUG(
dbgs() <<
"\nAnnotating irreducible loop header weights.\n");
1667 for (
auto &BB :
F) {
1673 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1679void SelectInstVisitor::instrumentOneSelectInst(
SelectInst &SI) {
1683 auto *Step = Builder.CreateZExt(
SI.getCondition(), Int64Ty);
1686 {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1687 Builder.getInt32(*CurCtrIdx), Step});
1691void SelectInstVisitor::annotateOneSelectInst(
SelectInst &SI) {
1692 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1693 assert(*CurCtrIdx < CountFromProfile.size() &&
1694 "Out of bound access of counters");
1696 SCounts[0] = CountFromProfile[*CurCtrIdx];
1699 auto BI = UseFunc->findBBInfo(
SI.getParent());
1701 TotalCount = *BI->Count;
1703 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1704 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1709void SelectInstVisitor::visitSelectInst(
SelectInst &SI) {
1713 if (
SI.getCondition()->getType()->isVectorTy())
1721 instrumentOneSelectInst(SI);
1724 annotateOneSelectInst(SI);
1732void PGOUseFunc::annotateValueSites() {
1740 annotateValueSites(Kind);
1744void PGOUseFunc::annotateValueSites(
uint32_t Kind) {
1745 assert(Kind <= IPVK_Last);
1746 unsigned ValueSiteIndex = 0;
1747 auto &ValueSites = FuncInfo.ValueSites[
Kind];
1749 if (NumValueSites != ValueSites.size()) {
1750 auto &Ctx =
M->getContext();
1752 M->getName().data(),
1753 Twine(
"Inconsistent number of value sites for ") +
1756 Twine(
"\", possibly due to the use of a stale profile."),
1762 LLVM_DEBUG(
dbgs() <<
"Read one value site profile (kind = " << Kind
1763 <<
"): Index = " << ValueSiteIndex <<
" out of "
1764 << NumValueSites <<
"\n");
1777 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1782 ComdatMembers.insert(std::make_pair(
C, &
F));
1784 if (
Comdat *
C = GV.getComdat())
1785 ComdatMembers.insert(std::make_pair(
C, &GV));
1787 if (
Comdat *
C = GA.getComdat())
1788 ComdatMembers.insert(std::make_pair(
C, &GA));
1793 if (
F.isDeclaration())
1798 unsigned NumCriticalEdges = 0;
1799 for (
auto &BB :
F) {
1808 <<
", NumCriticalEdges=" << NumCriticalEdges
1809 <<
" exceed the threshold. Skip PGO.\n");
1819 if (
F.hasFnAttribute(llvm::Attribute::Naked))
1821 if (
F.hasFnAttribute(llvm::Attribute::NoProfile))
1823 if (
F.hasFnAttribute(llvm::Attribute::SkipProfile))
1839 Triple TT(M.getTargetTriple());
1844 Twine(
"VTable value profiling is presently not "
1845 "supported for non-ELF object formats"),
1847 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1853 auto &TLI = LookupTLI(
F);
1854 auto *BPI = LookupBPI(
F);
1855 auto *BFI = LookupBFI(
F);
1899 auto BFIEntryCount =
F.getEntryCount();
1900 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1901 "Invalid BFI Entrycount");
1905 for (
auto &BBI :
F) {
1908 if (!Func.findBBInfo(&BBI))
1911 CountValue = *Func.getBBInfo(&BBI).Count;
1912 BFICountValue = *BFICount;
1916 if (SumCount.isZero())
1920 "Incorrect sum of BFI counts");
1923 double Scale = (SumCount / SumBFICount).convertToDouble();
1924 if (Scale < 1.001 && Scale > 0.999)
1929 if (NewEntryCount == 0)
1935 << NewEntryCount <<
"\n");
1952 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1953 for (
auto &BBI :
F) {
1957 CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1964 BFICountValue = *BFICount;
1967 bool rawIsHot = CountValue >= HotCountThreshold;
1968 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1970 bool ShowCount =
false;
1971 if (rawIsHot && !BFIIsHot) {
1972 Msg =
"raw-Hot to BFI-nonHot";
1974 }
else if (rawIsCold && BFIIsHot) {
1975 Msg =
"raw-Cold to BFI-Hot";
1984 uint64_t Diff = (BFICountValue >= CountValue)
1985 ? BFICountValue - CountValue
1986 : CountValue - BFICountValue;
1994 F.getSubprogram(), &BBI);
1996 <<
" Count=" <<
ore::NV(
"Count", CountValue)
1997 <<
" BFI_Count=" <<
ore::NV(
"Count", BFICountValue);
1999 Remark <<
" (" << Msg <<
")";
2006 F.getSubprogram(), &
F.getEntryBlock())
2007 <<
"In Func " <<
ore::NV(
"Function",
F.getName())
2008 <<
": Num_of_BB=" <<
ore::NV(
"Count", BBNum)
2009 <<
", Num_of_non_zerovalue_BB=" <<
ore::NV(
"Count", NonZeroBBNum)
2010 <<
", Num_of_mis_matching_BB=" <<
ore::NV(
"Count", BBMisMatchNum);
2022 auto &Ctx = M.getContext();
2025 ProfileRemappingFileName);
2026 if (
Error E = ReaderOrErr.takeError()) {
2034 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2035 std::move(ReaderOrErr.get());
2041 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2045 if (!PGOReader->isIRLevelProfile()) {
2047 ProfileFileName.
data(),
"Not an IR level instrumentation profile"));
2050 if (PGOReader->functionEntryOnly()) {
2052 ProfileFileName.
data(),
2053 "Function entry profiles are not yet supported for optimization"));
2060 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2065 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2067 std::vector<Function *> HotFunctions;
2068 std::vector<Function *> ColdFunctions;
2072 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2077 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2081 auto &TLI = LookupTLI(
F);
2082 auto *BPI = LookupBPI(
F);
2083 auto *BFI = LookupBFI(
F);
2084 if (!HasSingleByteCoverage) {
2090 PGOUseFunc Func(
F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2091 InstrumentFuncEntry, HasSingleByteCoverage);
2092 if (HasSingleByteCoverage) {
2093 Func.populateCoverage(PGOReader.get());
2101 bool AllZeros =
false;
2102 if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2106 if (Func.getProgramMaxCount() != 0)
2107 ColdFunctions.push_back(&
F);
2112 if (
F.hasFnAttribute(Attribute::Cold))
2113 F.removeFnAttr(Attribute::Cold);
2116 F.addFnAttr(Attribute::Hot);
2119 Func.populateCounters();
2120 Func.setBranchWeights();
2121 Func.annotateValueSites();
2122 Func.annotateIrrLoopHeaderWeights();
2123 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2124 if (FreqAttr == PGOUseFunc::FFA_Cold)
2125 ColdFunctions.push_back(&
F);
2126 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2127 HotFunctions.push_back(&
F);
2132 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2133 std::make_unique<BranchProbabilityInfo>(
F, LI);
2134 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2135 std::make_unique<BlockFrequencyInfo>(
F, *NewBPI, LI);
2139 dbgs() <<
"pgo-view-counts: " << Func.getFunc().getName() <<
"\n";
2140 NewBFI->print(
dbgs());
2150 ViewGraph(&Func,
Twine(
"PGORawCounts_") + Func.getFunc().getName());
2152 dbgs() <<
"pgo-view-raw-counts: " << Func.getFunc().getName() <<
"\n";
2179 for (
auto &
F : HotFunctions) {
2180 F->addFnAttr(Attribute::InlineHint);
2181 LLVM_DEBUG(
dbgs() <<
"Set inline attribute to function: " <<
F->getName()
2184 for (
auto &
F : ColdFunctions) {
2187 if (
F->hasFnAttribute(Attribute::Hot)) {
2188 auto &Ctx = M.getContext();
2189 std::string Msg = std::string(
"Function ") +
F->getName().str() +
2190 std::string(
" is annotated as a hot function but"
2191 " the profile is cold");
2196 F->addFnAttr(Attribute::Cold);
2197 LLVM_DEBUG(
dbgs() <<
"Set cold attribute to function: " <<
F->getName()
2204 std::string Filename, std::string RemappingFilename,
bool IsCS,
2206 : ProfileFileName(
std::
move(Filename)),
2207 ProfileRemappingFileName(
std::
move(RemappingFilename)), IsCS(IsCS),
2234 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2241 if (!
Node->getName().empty())
2242 return Node->getName().str();
2244 std::string SimpleNodeName;
2252 assert(MaxCount > 0 &&
"Bad max count");
2255 for (
const auto &ECI : EdgeCounts)
2268 if (BrCondStr.empty())
2280 std::string BranchProbStr;
2283 OS <<
" (total count : " << TotalCount <<
")";
2289 << BrCondStr <<
" is true with probability : " << BranchProbStr;
2308 return &
G->getFunc().front();
2331 return std::string(
G->getFunc().getName());
2339 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2341 if (BI && BI->Count)
2342 OS << *BI->Count <<
"\\l";
2350 if (!isa<SelectInst>(&
I))
2353 OS <<
"SELECT : { T = ";
2357 OS <<
"Unknown, F = Unknown }\\l";
2359 OS << TC <<
", F = " << FC <<
" }\\l";
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Given that RA is a live value
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
Module.h This file contains the declarations for the Module class.
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
static void instrumentOneFunc(Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers, bool IsCS)
bool isValueProfilingDisabled()
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is" "mainly for test purpose."))
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remakrs-analysis=pgo."))
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
Function::ProfileCount ProfileCount
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of preicise value annotations for a single memop" "intrinsic"))
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, bool IsCS)
static bool isIndirectBrTarget(BasicBlock *BB)
static std::string getBranchCondString(Instruction *TI)
bool shouldInstrumentEntryBB()
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, ProfileSummaryInfo *PSI, bool IsCS)
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Defines the virtual file system interface vfs::FileSystem.
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
bool isConditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
An union-find based Minimum Spanning Tree for CFG.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
const std::vector< std::unique_ptr< Edge > > & allEdges() const
size_t bbInfoSize() const
BBInfo * findBBInfo(const BasicBlock *BB) const
BBInfo & getBBInfo(const BasicBlock *BB) const
void dumpEdges(raw_ostream &OS, const Twine &Message) const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Value * getCalledOperand() const
Predicate getPredicate() const
Return the predicate for this instruction.
StringRef getName() const
void setSelectionKind(SelectionKind Val)
SelectionKind getSelectionKind() const
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Base class for error info classes.
virtual std::string message() const
Return the error message as a string.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
Class to represent profile counts.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
@ HiddenVisibility
The GV is hidden.
@ ExternalLinkage
Externally visible function.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AvailableExternallyLinkage
Available for inspection, not emission.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
BasicBlock::iterator GetInsertPoint() const
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Reader for the indexed binary instrprof format.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
Expected< InstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Base class for instruction visitors.
void visit(Iterator Start, Iterator End)
RetTy visitSelectInst(SelectInst &I)
instrprof_error get() const
std::string message() const override
Return the error message as a string.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
void update(ArrayRef< uint8_t > Data)
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MDString * createString(StringRef Str)
Return the given string as metadata.
MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
static bool isContextualIRPGOEnabled()
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
void refresh()
If no summary is present, attempt to refresh.
bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class represents the LLVM 'select' instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
Utility analysis that determines what values are worth profiling.
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
void write64le(void *P, uint64_t V)
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
cl::opt< InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate("profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), cl::init(InstrProfCorrelator::NONE), cl::values(clEnumValN(InstrProfCorrelator::NONE, "", "No profile correlation"), clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate")))
DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
cl::opt< bool > DebugInfoCorrelate
OperandBundleDefT< Value * > OperandBundleDef
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
static uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
static uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto predecessors(const MachineBasicBlock *BB)
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
SuccIterator< const Instruction, const BasicBlock > const_succ_iterator
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
DOTGraphTraits(bool isSimple=false)
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
Profiling information for a single function.
std::vector< uint64_t > Counts
CountPseudoKind getCountPseudoKind() const
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
static void setCSFlagInHash(uint64_t &FuncHash)
Instruction * AnnotatedInst