126#include <unordered_map>
133#define DEBUG_TYPE "pgo-instrumentation"
135STATISTIC(NumOfPGOInstrument,
"Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts,
"Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics,
"Number of mem intrinsics instrumented.");
140STATISTIC(NumOfPGOSplit,
"Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc,
"Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch,
"Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing,
"Number of functions without profile.");
144STATISTIC(NumOfPGOICall,
"Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument,
"Number of edges instrumented in CSPGO.");
147 "Number of select instruction instrumented in CSPGO.");
149 "Number of mem intrinsics instrumented in CSPGO.");
151STATISTIC(NumOfCSPGOBB,
"Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit,
"Number of critical edge splits in CSPGO.");
154 "Number of functions having valid profile counts in CSPGO.");
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing,
"Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks,
"Number of basic blocks that were executed");
165 cl::desc(
"Specify the path of profile data file. This is "
166 "mainly for test purpose."));
170 cl::desc(
"Specify the path of profile remapping file. This is mainly for "
177 cl::desc(
"Disable Value Profiling"));
183 cl::desc(
"Max number of annotations for a single indirect "
190 cl::desc(
"Max number of precise value annotations for a single memop"
197 cl::desc(
"Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
205 cl::desc(
"Use this option to turn on/off "
206 "warnings about missing profile data for "
213 cl::desc(
"Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
221 cl::desc(
"The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
228 cl::desc(
"Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
234 cl::desc(
"A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
247 cl::desc(
"Use this option to turn on/off "
248 "memory intrinsic size profiling."));
253 cl::desc(
"When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
260 cl::desc(
"Force to instrument function entry basicblock."));
265 cl::desc(
"Force to instrument loop entries."));
270 "Use this option to enable function entry coverage instrumentation."));
273 "pgo-block-coverage",
274 cl::desc(
"Use this option to enable basic block coverage instrumentation"));
278 cl::desc(
"Create a dot file of CFGs with block "
279 "coverage inference information"));
282 "pgo-temporal-instrumentation",
283 cl::desc(
"Use this option to enable temporal instrumentation"));
287 cl::desc(
"Fix function entry count in profile use."));
291 cl::desc(
"Print out the non-match BFI count if a hot raw profile count "
292 "becomes non-hot, or a cold raw profile count becomes hot. "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remarks-analysis=pgo."));
298 cl::desc(
"Print out mismatched BFI counts after setting profile metadata "
299 "The print is enabled under -Rpass-analysis=pgo, or "
300 "internal option -pass-remarks-analysis=pgo."));
304 cl::desc(
"Set the threshold for pgo-verify-bfi: only print out "
305 "mismatched BFI if the difference percentage is greater than "
306 "this value (in percentage)."));
310 cl::desc(
"Set the threshold for pgo-verify-bfi: skip the counts whose "
311 "profile count value is below."));
316 cl::desc(
"Trace the hash of the function with this name."));
320 cl::desc(
"Do not instrument functions smaller than this threshold."));
324 cl::desc(
"Do not instrument functions with the number of critical edges "
325 " greater than this threshold."));
329 cl::desc(
"For cold function instrumentation, skip instrumenting functions "
330 "whose entry count is above the given value."));
334 cl::desc(
"For cold function instrumentation, treat count unknown(e.g. "
335 "unprofiled) functions as cold."));
339 cl::desc(
"Enable cold function only instrumentation."));
343 cl::desc(
"Do not instrument callsites to functions in this list. Intended "
365class FunctionInstrumenter final {
369 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
380 bool isValueProfilingDisabled()
const {
392 bool shouldInstrumentEntryBB()
const {
400 FunctionInstrumenter(
402 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
406 : M(M),
F(
F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
407 LI(LI), InstrumentationType(InstrumentationType) {}
418 return std::string();
423 return std::string();
435 else if (CV->
isOne())
446#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
479 Triple TT(M.getTargetTriple());
480 if (TT.supportsCOMDAT()) {
482 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
484 return IRLevelVersionVariable;
494enum VisitMode { VM_counting, VM_instrument, VM_annotate };
498struct SelectInstVisitor :
public InstVisitor<SelectInstVisitor> {
501 VisitMode Mode = VM_counting;
502 unsigned *CurCtrIdx =
nullptr;
503 unsigned TotalNumCtrs = 0;
504 GlobalValue *FuncNameVar =
nullptr;
505 uint64_t FuncHash = 0;
506 PGOUseFunc *UseFunc =
nullptr;
507 bool HasSingleByteCoverage;
509 SelectInstVisitor(Function &Func,
bool HasSingleByteCoverage)
510 : F(
Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
512 void countSelects() {
522 void instrumentSelects(
unsigned *Ind,
unsigned TotalNC, GlobalValue *FNV,
524 Mode = VM_instrument;
526 TotalNumCtrs = TotalNC;
533 void annotateSelects(PGOUseFunc *UF,
unsigned *Ind) {
540 void instrumentOneSelectInst(SelectInst &SI);
541 void annotateOneSelectInst(SelectInst &SI);
544 void visitSelectInst(SelectInst &SI);
548 unsigned getNumOfSelectInsts()
const {
return NSIs; }
560 bool Removed =
false;
561 bool IsCritical =
false;
563 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
564 : SrcBB(Src), DestBB(Dest), Weight(
W) {}
567 std::string infoString()
const {
568 return (Twine(Removed ?
"-" :
" ") + (InMST ?
" " :
"*") +
569 (IsCritical ?
"c" :
" ") +
" W=" + Twine(Weight))
580 PGOBBInfo(
unsigned IX) : Group(this), Index(IX) {}
583 std::string infoString()
const {
584 return (Twine(
"Index=") + Twine(Index)).str();
589template <
class Edge,
class BBInfo>
class FuncPGOInstrumentation {
597 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
599 ValueProfileCollector VPC;
601 void computeCFGHash();
602 void renameComdatFunction();
605 const TargetLibraryInfo &TLI;
606 std::vector<std::vector<VPCandidateInfo>> ValueSites;
607 SelectInstVisitor SIVisitor;
608 std::string FuncName;
609 std::string DeprecatedFuncName;
610 GlobalVariable *FuncNameVar;
613 uint64_t FunctionHash = 0;
616 CFGMST<Edge, BBInfo> MST;
618 const std::optional<BlockCoverageInference> BCI;
620 static std::optional<BlockCoverageInference>
621 constructBCI(Function &Func,
bool HasSingleByteCoverage,
622 bool InstrumentFuncEntry) {
623 if (HasSingleByteCoverage)
624 return BlockCoverageInference(Func, InstrumentFuncEntry);
630 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
637 BBInfo &getBBInfo(
const BasicBlock *BB)
const {
return MST.getBBInfo(BB); }
640 BBInfo *findBBInfo(
const BasicBlock *BB)
const {
return MST.findBBInfo(BB); }
643 void dumpInfo(StringRef Str =
"")
const {
644 MST.dumpEdges(
dbgs(), Twine(
"Dump Function ") + FuncName +
645 " Hash: " + Twine(FunctionHash) +
"\t" + Str);
648 FuncPGOInstrumentation(
649 Function &Func, TargetLibraryInfo &TLI,
650 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
651 bool CreateGlobalVar =
false, BranchProbabilityInfo *BPI =
nullptr,
652 BlockFrequencyInfo *BFI =
nullptr, LoopInfo *LI =
nullptr,
653 bool IsCS =
false,
bool InstrumentFuncEntry =
true,
654 bool InstrumentLoopEntries =
false,
bool HasSingleByteCoverage =
false)
655 : F(
Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(
Func, TLI),
656 TLI(TLI), ValueSites(IPVK_Last + 1),
657 SIVisitor(
Func, HasSingleByteCoverage),
658 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
659 BCI(constructBCI(
Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
661 BCI->viewBlockCoverageGraph();
663 SIVisitor.countSelects();
664 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
666 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
667 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
668 NumOfPGOBB += MST.bbInfoSize();
669 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
671 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
673 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
674 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
675 NumOfCSPGOBB += MST.bbInfoSize();
681 if (!ComdatMembers.empty())
682 renameComdatFunction();
685 for (
const auto &
E : MST.allEdges()) {
688 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
690 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
703template <
class Edge,
class BBInfo>
704void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
705 std::vector<uint8_t> Indexes;
709 auto BI = findBBInfo(Succ);
713 for (
int J = 0; J < 4; J++)
714 Indexes.push_back((
uint8_t)(Index >> (J * 8)));
721 auto updateJCH = [&JCH](
uint64_t Num) {
726 updateJCH((
uint64_t)SIVisitor.getNumOfSelectInsts());
727 updateJCH((
uint64_t)ValueSites[IPVK_IndirectCallTarget].
size());
730 updateJCH(BCI->getInstrumentedBlocksHash());
743 LLVM_DEBUG(
dbgs() <<
"Function Hash Computation for " <<
F.getName() <<
":\n"
744 <<
" CRC = " << JC.
getCRC()
745 <<
", Selects = " << SIVisitor.getNumOfSelectInsts()
746 <<
", Edges = " << MST.
numEdges() <<
", ICSites = "
747 << ValueSites[IPVK_IndirectCallTarget].size()
748 <<
", Memops = " << ValueSites[IPVK_MemOPSize].size()
749 <<
", High32 CRC = " << JCH.
getCRC()
750 <<
", Hash = " << FunctionHash <<
"\n";);
753 dbgs() <<
"Funcname=" <<
F.getName() <<
", Hash=" << FunctionHash
754 <<
" in building " <<
F.getParent()->getSourceFileName() <<
"\n";
760 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
772 for (
auto &&CM :
make_range(ComdatMembers.equal_range(
C))) {
782template <
class Edge,
class BBInfo>
783void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
786 std::string OrigName =
F.getName().str();
787 std::string NewFuncName =
789 F.setName(
Twine(NewFuncName));
791 FuncName =
Twine(FuncName +
"." +
Twine(FunctionHash)).
str();
797 if (!
F.hasComdat()) {
799 NewComdat = M->getOrInsertComdat(
StringRef(NewFuncName));
801 F.setComdat(NewComdat);
806 Comdat *OrigComdat =
F.getComdat();
807 std::string NewComdatName =
809 NewComdat = M->getOrInsertComdat(
StringRef(NewComdatName));
812 for (
auto &&CM :
make_range(ComdatMembers.equal_range(OrigComdat))) {
820template <
class Edge,
class BBInfo>
821void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
822 std::vector<BasicBlock *> &InstrumentBBs) {
825 if (BCI->shouldInstrumentBlock(BB))
826 InstrumentBBs.push_back(&BB);
831 std::vector<Edge *> EdgeList;
834 EdgeList.push_back(
E.get());
836 for (
auto &
E : EdgeList) {
839 InstrumentBBs.push_back(InstrBB);
845template <
class Edge,
class BBInfo>
846BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *
E) {
847 if (
E->InMST ||
E->Removed)
853 if (SrcBB ==
nullptr)
855 if (DestBB ==
nullptr)
870 return canInstrument(SrcBB);
872 return canInstrument(DestBB);
881 dbgs() <<
"Fail to split critical edge: not instrument this edge.\n");
886 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
887 LLVM_DEBUG(
dbgs() <<
"Split critical edge: " << getBBInfo(SrcBB).Index
888 <<
" --> " << getBBInfo(DestBB).Index <<
"\n");
890 MST.
addEdge(SrcBB, InstrBB, 0);
892 Edge &NewEdge1 = MST.
addEdge(InstrBB, DestBB, 0);
893 NewEdge1.InMST =
true;
896 return canInstrument(InstrBB);
915 std::optional<OperandBundleUse> ParentFunclet =
923 if (!BlockColors.
empty()) {
924 const ColorVector &CV = BlockColors.
find(OrigCall->getParent())->second;
925 assert(CV.
size() == 1 &&
"non-unique color for block!");
927 if (EHPadIt->isEHPad())
935void FunctionInstrumenter::instrument() {
942 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
943 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
944 F, TLI, ComdatMembers, !IsCtxProf, BPI, BFI, LI,
945 InstrumentationType == PGOInstrumentationType::CSFDO,
946 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
950 auto *
const CFGHash =
951 ConstantInt::get(Type::getInt64Ty(
M.getContext()), FuncInfo.FunctionHash);
955 Name, PointerType::get(
M.getContext(), 0));
957 auto &EntryBB =
F.getEntryBlock();
958 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
961 Builder.CreateIntrinsic(
962 Intrinsic::instrprof_cover,
963 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
967 std::vector<BasicBlock *> InstrumentBBs;
968 FuncInfo.getInstrumentBBs(InstrumentBBs);
969 unsigned NumCounters =
970 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
985 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
987 for (
auto &Instr : BB)
991 if (CS->getCalledFunction() &&
992 SkipCSInstr.contains(CS->getCalledFunction()->getName()))
998 uint32_t TotalNumCallsites = 0;
999 Visit([&TotalNumCallsites](
auto *) { ++TotalNumCallsites; });
1003 Visit([&](
auto *CB) {
1005 Builder.CreateCall(CSIntrinsic,
1006 {
Name, CFGHash, Builder.getInt32(TotalNumCallsites),
1008 CB->getCalledOperand()});
1015 auto &EntryBB =
F.getEntryBlock();
1016 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
1019 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
1020 {NormalizedNamePtr, CFGHash,
1021 Builder.getInt32(NumCounters),
1022 Builder.getInt32(
I)});
1026 for (
auto *InstrBB : InstrumentBBs) {
1028 assert(Builder.GetInsertPoint() != InstrBB->
end() &&
1029 "Cannot get the Instrumentation point");
1033 : Intrinsic::instrprof_increment,
1034 {NormalizedNamePtr, CFGHash,
1035 Builder.getInt32(NumCounters),
1036 Builder.getInt32(
I++)});
1040 FuncInfo.SIVisitor.instrumentSelects(&
I, NumCounters, Name,
1041 FuncInfo.FunctionHash);
1044 if (isValueProfilingDisabled())
1047 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1053 DenseMap<BasicBlock *, ColorVector> BlockColors;
1054 if (
F.hasPersonalityFn() &&
1059 for (uint32_t Kind = IPVK_First;
Kind <= IPVK_Last; ++
Kind) {
1060 unsigned SiteIndex = 0;
1066 <<
" site: CallSite Index = " << SiteIndex <<
"\n");
1069 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1070 "Cannot get the Instrumentation point");
1072 Value *ToProfile =
nullptr;
1073 if (Cand.V->getType()->isIntegerTy())
1074 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1075 else if (Cand.V->getType()->isPointerTy())
1076 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1077 assert(ToProfile &&
"value profiling Value is of unexpected type");
1080 Name, PointerType::get(
M.getContext(), 0));
1086 Intrinsic::instrprof_value_profile),
1087 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1088 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1097struct PGOUseEdge :
public PGOEdge {
1098 using PGOEdge::PGOEdge;
1100 std::optional<uint64_t>
Count;
1106 std::string infoString()
const {
1108 return PGOEdge::infoString();
1109 return (Twine(PGOEdge::infoString()) +
" Count=" + Twine(*
Count)).str();
1116struct PGOUseBBInfo :
public PGOBBInfo {
1117 std::optional<uint64_t>
Count;
1118 int32_t UnknownCountInEdge = 0;
1119 int32_t UnknownCountOutEdge = 0;
1120 DirectEdges InEdges;
1121 DirectEdges OutEdges;
1123 PGOUseBBInfo(
unsigned IX) : PGOBBInfo(IX) {}
1129 std::string infoString()
const {
1131 return PGOBBInfo::infoString();
1132 return (Twine(PGOBBInfo::infoString()) +
" Count=" + Twine(*
Count)).str();
1136 void addOutEdge(PGOUseEdge *
E) {
1137 OutEdges.push_back(
E);
1138 UnknownCountOutEdge++;
1142 void addInEdge(PGOUseEdge *
E) {
1143 InEdges.push_back(
E);
1144 UnknownCountInEdge++;
1153 for (
const auto &
E : Edges) {
1166 PGOUseFunc(Function &Func,
Module *Modu, TargetLibraryInfo &TLI,
1167 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1168 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1169 LoopInfo *LI, ProfileSummaryInfo *PSI,
bool IsCS,
1170 bool InstrumentFuncEntry,
bool InstrumentLoopEntries,
1171 bool HasSingleByteCoverage)
1172 :
F(
Func),
M(Modu), BFI(BFIin), PSI(PSI),
1173 FuncInfo(
Func, TLI, ComdatMembers,
false, BPI, BFIin, LI, IsCS,
1174 InstrumentFuncEntry, InstrumentLoopEntries,
1175 HasSingleByteCoverage),
1176 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(
Func, TLI) {}
1178 void handleInstrProfError(
Error Err, uint64_t MismatchedFuncSum);
1183 bool getRecord(IndexedInstrProfReader *PGOReader);
1186 bool readCounters(
bool &AllZeros,
1190 void populateCounters();
1193 void populateCoverage();
1199 void annotateValueSites();
1202 void annotateValueSites(uint32_t Kind);
1205 void annotateIrrLoopHeaderWeights();
1208 void setBlockUniformityAttribute();
1211 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1214 FuncFreqAttr getFuncFreqAttr()
const {
return FreqAttr; }
1217 uint64_t
getFuncHash()
const {
return FuncInfo.FunctionHash; }
1220 NamedInstrProfRecord &getProfileRecord() {
return ProfileRecord; }
1223 PGOUseBBInfo &getBBInfo(
const BasicBlock *BB)
const {
1224 return FuncInfo.getBBInfo(BB);
1228 PGOUseBBInfo *findBBInfo(
const BasicBlock *BB)
const {
1229 return FuncInfo.findBBInfo(BB);
1234 void dumpInfo(StringRef Str =
"")
const { FuncInfo.dumpInfo(Str); }
1236 uint64_t getProgramMaxCount()
const {
return ProgramMaxCount; }
1241 BlockFrequencyInfo *BFI;
1242 ProfileSummaryInfo *PSI;
1245 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1249 uint64_t ProgramMaxCount;
1252 uint32_t CountPosition = 0;
1255 uint32_t ProfileCountSize = 0;
1258 NamedInstrProfRecord ProfileRecord;
1261 FuncFreqAttr FreqAttr;
1266 ValueProfileCollector VPC;
1269 bool setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile);
1273 void setEdgeCount(DirectEdges &Edges, uint64_t
Value);
1278 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1282 FreqAttr = FFA_Cold;
1290 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1294 for (
const auto &
E : FuncInfo.MST.allEdges()) {
1299 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1300 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1301 SrcInfo.addOutEdge(
E.get());
1302 DestInfo.addInEdge(
E.get());
1308bool PGOUseFunc::setInstrumentedCounts(
1309 const std::vector<uint64_t> &CountFromProfile) {
1311 std::vector<BasicBlock *> InstrumentBBs;
1312 FuncInfo.getInstrumentBBs(InstrumentBBs);
1316 unsigned NumInstrumentedBBs = InstrumentBBs.size();
1317 unsigned NumSelects = FuncInfo.SIVisitor.getNumOfSelectInsts();
1318 unsigned NumCounters = NumInstrumentedBBs + NumSelects;
1321 if (NumCounters != CountFromProfile.size()) {
1323 dbgs() <<
"PGO COUNTER MISMATCH for function " <<
F.getName() <<
":\n";
1324 dbgs() <<
" Expected counters: " << NumCounters <<
"\n";
1325 dbgs() <<
" - From instrumented edges: " << NumInstrumentedBBs <<
"\n";
1326 for (
size_t i = 0; i < InstrumentBBs.size(); ++i) {
1327 dbgs() <<
" " << i <<
": ";
1328 InstrumentBBs[i]->printAsOperand(
dbgs(),
false);
1331 dbgs() <<
" - From select instructions: " << NumSelects <<
"\n";
1332 dbgs() <<
" Actual counters from profile: " << CountFromProfile.size()
1337 auto *FuncEntry = &*
F.begin();
1341 for (BasicBlock *InstrBB : InstrumentBBs) {
1342 uint64_t CountValue = CountFromProfile[
I++];
1343 PGOUseBBInfo &
Info = getBBInfo(InstrBB);
1347 if (InstrBB == FuncEntry && CountValue == 0)
1349 Info.setBBInfoCount(CountValue);
1351 ProfileCountSize = CountFromProfile.size();
1355 auto setEdgeCount = [
this](PGOUseEdge *
E, uint64_t
Value) ->
void {
1357 this->getBBInfo(
E->SrcBB).UnknownCountOutEdge--;
1358 this->getBBInfo(
E->DestBB).UnknownCountInEdge--;
1364 for (
const auto &
E : FuncInfo.MST.allEdges()) {
1365 if (
E->Removed ||
E->InMST)
1368 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1372 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1373 setEdgeCount(
E.get(), *SrcInfo.Count);
1376 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1379 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1380 setEdgeCount(
E.get(), *DestInfo.Count);
1386 setEdgeCount(
E.get(), 0);
1393void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t
Value) {
1394 for (
auto &
E : Edges) {
1399 getBBInfo(
E->SrcBB).UnknownCountOutEdge--;
1400 getBBInfo(
E->DestBB).UnknownCountInEdge--;
1408 const char MetadataName[] =
"instr_prof_hash_mismatch";
1411 auto *Existing =
F.getMetadata(LLVMContext::MD_annotation);
1414 for (
const auto &
N : Tuple->operands()) {
1415 if (
N.equalsStr(MetadataName))
1424 F.setMetadata(LLVMContext::MD_annotation, MD);
1427void PGOUseFunc::handleInstrProfError(
Error Err, uint64_t MismatchedFuncSum) {
1429 auto &Ctx =
M->getContext();
1430 auto Err = IPE.
get();
1431 bool SkipWarning =
false;
1433 << FuncInfo.FuncName <<
": ");
1434 if (Err == instrprof_error::unknown_function) {
1435 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1438 }
else if (Err == instrprof_error::hash_mismatch ||
1439 Err == instrprof_error::malformed) {
1440 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1446 LLVM_DEBUG(
dbgs() <<
"hash mismatch (hash= " << FuncInfo.FunctionHash
1447 <<
" skip=" << SkipWarning <<
")");
1457 IPE.
message() + std::string(
" ") +
F.getName().str() +
1458 std::string(
" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1459 std::string(
" up to ") + std::to_string(MismatchedFuncSum) +
1460 std::string(
" count discarded");
1463 DiagnosticInfoPGOProfile(
M->getName().data(), Msg,
DS_Warning));
1467bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) {
1468 uint64_t MismatchedFuncSum = 0;
1470 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1471 &MismatchedFuncSum);
1473 handleInstrProfError(std::move(
E), MismatchedFuncSum);
1476 ProfileRecord = std::move(
Result.get());
1484bool PGOUseFunc::readCounters(
bool &AllZeros,
1486 auto &Ctx =
M->getContext();
1491 std::vector<uint64_t> &CountFromProfile = ProfileRecord.
Counts;
1493 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1496 uint64_t ValueSum = 0;
1497 for (
unsigned I = 0, S = CountFromProfile.size();
I < S;
I++) {
1499 ValueSum += CountFromProfile[
I];
1501 AllZeros = (ValueSum == 0);
1505 getBBInfo(
nullptr).UnknownCountOutEdge = 2;
1506 getBBInfo(
nullptr).UnknownCountInEdge = 2;
1508 if (!setInstrumentedCounts(CountFromProfile)) {
1510 dbgs() <<
"Inconsistent number of counts, skipping this function");
1511 Ctx.diagnose(DiagnosticInfoPGOProfile(
1512 M->getName().data(),
1513 Twine(
"Inconsistent number of counts in ") +
F.getName().str() +
1514 Twine(
": the profile may be stale or there is a function name "
1522void PGOUseFunc::populateCoverage() {
1523 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1525 ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.
Counts;
1526 DenseMap<const BasicBlock *, bool>
Coverage;
1529 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1534 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1535 InverseDependencies;
1536 for (
auto &BB :
F) {
1537 for (
auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1539 InverseDependencies[Dep].
insert(&BB);
1544 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1545 for (
auto &[BB, IsCovered] : Coverage)
1547 CoveredBlocksToProcess.push(BB);
1549 while (!CoveredBlocksToProcess.empty()) {
1550 auto *CoveredBlock = CoveredBlocksToProcess.top();
1551 assert(Coverage[CoveredBlock]);
1552 CoveredBlocksToProcess.pop();
1553 for (
auto *BB : InverseDependencies[CoveredBlock]) {
1559 CoveredBlocksToProcess.push(BB);
1564 MDBuilder MDB(
F.getContext());
1567 F.setEntryCount(Coverage[&
F.getEntryBlock()] ? 10000 : 0);
1568 for (
auto &BB :
F) {
1575 SmallVector<uint32_t, 4> Weights;
1577 Weights.
push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1578 if (Weights.
size() >= 2)
1583 unsigned NumCorruptCoverage = 0;
1584 DominatorTree DT(
F);
1586 BranchProbabilityInfo BPI(
F, LI);
1587 BlockFrequencyInfo BFI(
F, BPI, LI);
1588 auto IsBlockDead = [&](
const BasicBlock &BB) -> std::optional<bool> {
1593 LLVM_DEBUG(
dbgs() <<
"Block Coverage: (Instrumented=*, Covered=X)\n");
1594 for (
auto &BB :
F) {
1595 LLVM_DEBUG(
dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ?
"* " :
" ")
1596 << (Coverage[&BB] ?
"X " :
" ") <<
" " << BB.getName()
1603 if (Cov == IsBlockDead(BB).value_or(
false)) {
1605 dbgs() <<
"Found inconsistent block covearge for " << BB.getName()
1606 <<
": BCI=" << (Cov ?
"Covered" :
"Dead") <<
" BFI="
1607 << (IsBlockDead(BB).value() ?
"Dead" :
"Covered") <<
"\n");
1608 ++NumCorruptCoverage;
1614 auto &Ctx =
M->getContext();
1615 Ctx.diagnose(DiagnosticInfoPGOProfile(
1616 M->getName().data(),
1617 Twine(
"Found inconsistent block coverage for function ") +
F.getName() +
1618 " in " + Twine(NumCorruptCoverage) +
" blocks.",
1622 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1627void PGOUseFunc::populateCounters() {
1628 bool Changes =
true;
1629 unsigned NumPasses = 0;
1637 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1638 if (UseBBInfo ==
nullptr)
1640 if (!UseBBInfo->Count) {
1641 if (UseBBInfo->UnknownCountOutEdge == 0) {
1644 }
else if (UseBBInfo->UnknownCountInEdge == 0) {
1649 if (UseBBInfo->Count) {
1650 if (UseBBInfo->UnknownCountOutEdge == 1) {
1656 if (*UseBBInfo->Count > OutSum)
1657 Total = *UseBBInfo->Count - OutSum;
1658 setEdgeCount(UseBBInfo->OutEdges,
Total);
1661 if (UseBBInfo->UnknownCountInEdge == 1) {
1664 if (*UseBBInfo->Count > InSum)
1665 Total = *UseBBInfo->Count - InSum;
1666 setEdgeCount(UseBBInfo->InEdges,
Total);
1673 LLVM_DEBUG(
dbgs() <<
"Populate counts in " << NumPasses <<
" passes.\n");
1677 for (
auto &BB :
F) {
1678 auto BI = findBBInfo(&BB);
1681 assert(BI->Count &&
"BB count is not valid");
1685 FuncInfo.SIVisitor.annotateSelects(
this, &CountPosition);
1686 assert(CountPosition == ProfileCountSize);
1690 for (
auto &BB :
F) {
1691 auto BI = findBBInfo(&BB);
1694 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1703 LLVM_DEBUG(FuncInfo.dumpInfo(
"after reading profile."));
1707void PGOUseFunc::setBranchWeights() {
1709 LLVM_DEBUG(
dbgs() <<
"\nSetting branch weights for func " <<
F.getName()
1710 <<
" IsCS=" << IsCS <<
"\n");
1711 for (
auto &BB :
F) {
1720 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1721 if (!*BBCountInfo.Count)
1728 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1729 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1730 assert(OutEdgesCount <= SuccessorCount);
1733 uint64_t MaxCount = 0;
1734 for (
unsigned It = 0; It < OutEdgesCount; It++) {
1735 const PGOUseEdge *
E = BBCountInfo.OutEdges[It];
1738 if (DestBB ==
nullptr)
1741 uint64_t EdgeCount = *
E->Count;
1742 if (EdgeCount > MaxCount)
1743 MaxCount = EdgeCount;
1744 EdgeCounts[SuccNum] = EdgeCount;
1753 auto &Ctx =
M->getContext();
1754 Ctx.diagnose(DiagnosticInfoPGOProfile(
1755 M->getName().data(),
1756 Twine(
"Profile in ") +
F.getName().str() +
1757 Twine(
" partially ignored") +
1758 Twine(
", possibly due to the lack of a return path."),
1772void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1773 LLVM_DEBUG(
dbgs() <<
"\nAnnotating irreducible loop header weights.\n");
1775 for (
auto &BB :
F) {
1781 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1787void PGOUseFunc::setBlockUniformityAttribute() {
1798 std::vector<BasicBlock *> InstrumentBBs;
1799 FuncInfo.getInstrumentBBs(InstrumentBBs);
1801 LLVMContext &Ctx =
F.getContext();
1802 Type *Int1Ty = Type::getInt1Ty(Ctx);
1804 for (
size_t I = 0,
E = InstrumentBBs.size();
I <
E; ++
I) {
1816 dbgs() <<
"PGO: Set block uniformity profile for " <<
F.getName() <<
": ";
1817 for (
size_t I = 0,
E = InstrumentBBs.size();
I <
E; ++
I)
1823void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1826 Type *Int64Ty = Builder.getInt64Ty();
1827 auto *Step = Builder.CreateZExt(
SI.getCondition(), Int64Ty);
1828 auto *NormalizedFuncNameVarPtr =
1830 FuncNameVar, PointerType::get(
M->getContext(), 0));
1831 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step,
1832 {NormalizedFuncNameVarPtr, Builder.getInt64(
FuncHash),
1833 Builder.getInt32(TotalNumCtrs),
1834 Builder.getInt32(*CurCtrIdx), Step});
1838void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1839 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1840 assert(*CurCtrIdx < CountFromProfile.size() &&
1841 "Out of bound access of counters");
1842 uint64_t SCounts[2];
1843 SCounts[0] = CountFromProfile[*CurCtrIdx];
1845 uint64_t TotalCount = 0;
1846 auto BI = UseFunc->findBBInfo(
SI.getParent());
1847 if (BI !=
nullptr) {
1848 TotalCount = *BI->Count;
1851 if (TotalCount < SCounts[0])
1852 BI->Count = SCounts[0];
1855 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1856 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1861void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1865 if (
SI.getCondition()->getType()->isVectorTy())
1873 instrumentOneSelectInst(SI);
1876 annotateOneSelectInst(SI);
1884 if (ValueProfKind == IPVK_MemOPSize)
1886 if (ValueProfKind == llvm::IPVK_VTableTarget)
1892void PGOUseFunc::annotateValueSites() {
1899 for (uint32_t Kind = IPVK_First;
Kind <= IPVK_Last; ++
Kind)
1900 annotateValueSites(Kind);
1904void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1905 assert(Kind <= IPVK_Last);
1906 unsigned ValueSiteIndex = 0;
1920 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1922 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.
get(IPVK_VTableTarget);
1923 auto &ValueSites = FuncInfo.ValueSites[
Kind];
1925 auto &Ctx =
M->getContext();
1926 Ctx.
diagnose(DiagnosticInfoPGOProfile(
1927 M->getName().data(),
1928 Twine(
"Inconsistent number of value sites for ") +
1931 Twine(
"\", possibly due to the use of a stale profile."),
1937 LLVM_DEBUG(
dbgs() <<
"Read one value site profile (kind = " << Kind
1938 <<
"): Index = " << ValueSiteIndex <<
" out of "
1941 *M, *
I.AnnotatedInst, ProfileRecord,
1952 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1957 ComdatMembers.insert(std::make_pair(
C, &
F));
1959 if (
Comdat *
C = GV.getComdat())
1960 ComdatMembers.insert(std::make_pair(
C, &GV));
1962 if (
Comdat *
C = GA.getComdat())
1963 ComdatMembers.insert(std::make_pair(
C, &GA));
1968 if (
F.isDeclaration())
1973 unsigned NumCriticalEdges = 0;
1974 for (
auto &BB :
F) {
1983 <<
", NumCriticalEdges=" << NumCriticalEdges
1984 <<
" exceed the threshold. Skip PGO.\n");
1994 if (
F.hasFnAttribute(llvm::Attribute::Naked))
1996 if (
F.hasFnAttribute(llvm::Attribute::NoProfile))
1998 if (
F.hasFnAttribute(llvm::Attribute::SkipProfile))
2003 if (
auto EntryCount =
F.getEntryCount())
2021 Triple TT(M.getTargetTriple());
2026 Twine(
"VTable value profiling is presently not "
2027 "supported for non-ELF object formats"),
2029 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2039 FunctionInstrumenter FI(M,
F, TLI, ComdatMembers, BPI, BFI, LI,
2040 InstrumentationType);
2053 if (ProfileSampling)
2078 InstrumentationType))
2091 auto BFIEntryCount =
F.getEntryCount();
2092 assert(BFIEntryCount && (*BFIEntryCount > 0) &&
"Invalid BFI Entrycount");
2096 for (
auto &BBI :
F) {
2099 if (!Func.findBBInfo(&BBI))
2102 CountValue = *Func.getBBInfo(&BBI).Count;
2103 BFICountValue = *BFICount;
2107 if (SumCount.isZero())
2111 "Incorrect sum of BFI counts");
2114 double Scale = (SumCount / SumBFICount).convertToDouble();
2115 if (Scale < 1.001 && Scale > 0.999)
2120 if (NewEntryCount == 0)
2123 F.setEntryCount(NewEntryCount);
2126 << NewEntryCount <<
"\n");
2143 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2144 for (
auto &BBI :
F) {
2145 PGOUseBBInfo *BBInfo = Func.findBBInfo(&BBI);
2149 uint64_t CountValue = BBInfo->Count.value_or(CountValue);
2157 BFICountValue = *BFICount;
2160 bool rawIsHot = CountValue >= HotCountThreshold;
2161 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2163 bool ShowCount =
false;
2164 if (rawIsHot && !BFIIsHot) {
2165 Msg =
"raw-Hot to BFI-nonHot";
2167 }
else if (rawIsCold && BFIIsHot) {
2168 Msg =
"raw-Cold to BFI-Hot";
2177 uint64_t Diff = (BFICountValue >= CountValue)
2178 ? BFICountValue - CountValue
2179 : CountValue - BFICountValue;
2187 F.getSubprogram(), &BBI);
2189 <<
" Count=" <<
ore::NV(
"Count", CountValue)
2190 <<
" BFI_Count=" <<
ore::NV(
"Count", BFICountValue);
2192 Remark <<
" (" << Msg <<
")";
2199 F.getSubprogram(), &
F.getEntryBlock())
2200 <<
"In Func " <<
ore::NV(
"Function",
F.getName())
2201 <<
": Num_of_BB=" <<
ore::NV(
"Count", BBNum)
2202 <<
", Num_of_non_zerovalue_BB=" <<
ore::NV(
"Count", NonZeroBBNum)
2203 <<
", Num_of_mis_matching_BB=" <<
ore::NV(
"Count", BBMisMatchNum);
2216 auto &Ctx = M.getContext();
2219 ProfileRemappingFileName);
2220 if (
Error E = ReaderOrErr.takeError()) {
2228 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2229 std::move(ReaderOrErr.get());
2235 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2239 if (!PGOReader->isIRLevelProfile()) {
2241 ProfileFileName.
data(),
"Not an IR level instrumentation profile"));
2244 if (PGOReader->functionEntryOnly()) {
2246 ProfileFileName.
data(),
2247 "Function entry profiles are not yet supported for optimization"));
2253 if (!
G.hasName() || !
G.hasMetadata(LLVMContext::MD_type))
2264 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2269 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2271 std::vector<Function *> HotFunctions;
2272 std::vector<Function *> ColdFunctions;
2276 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2279 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2283 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2291 if (!HasSingleByteCoverage) {
2297 PGOUseFunc Func(
F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2298 InstrumentFuncEntry, InstrumentLoopEntries,
2299 HasSingleByteCoverage);
2300 if (!Func.getRecord(PGOReader.get()))
2302 if (HasSingleByteCoverage) {
2303 Func.populateCoverage();
2311 bool AllZeros =
false;
2312 if (!Func.readCounters(AllZeros, PseudoKind))
2316 if (Func.getProgramMaxCount() != 0)
2317 ColdFunctions.push_back(&
F);
2322 if (
F.hasFnAttribute(Attribute::Cold))
2323 F.removeFnAttr(Attribute::Cold);
2326 F.addFnAttr(Attribute::Hot);
2329 Func.populateCounters();
2330 Func.setBranchWeights();
2331 Func.annotateValueSites();
2332 Func.annotateIrrLoopHeaderWeights();
2333 Func.setBlockUniformityAttribute();
2334 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2335 if (FreqAttr == PGOUseFunc::FFA_Cold)
2336 ColdFunctions.push_back(&
F);
2337 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2338 HotFunctions.push_back(&
F);
2343 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2344 std::make_unique<BranchProbabilityInfo>(
F, LI);
2345 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2346 std::make_unique<BlockFrequencyInfo>(
F, *NewBPI, LI);
2350 dbgs() <<
"pgo-view-counts: " << Func.getFunc().getName() <<
"\n";
2351 NewBFI->print(
dbgs());
2361 ViewGraph(&Func,
Twine(
"PGORawCounts_") + Func.getFunc().getName());
2363 dbgs() <<
"pgo-view-raw-counts: " << Func.getFunc().getName() <<
"\n";
2390 for (
auto &
F : HotFunctions) {
2391 F->addFnAttr(Attribute::InlineHint);
2392 LLVM_DEBUG(
dbgs() <<
"Set inline attribute to function: " <<
F->getName()
2395 for (
auto &
F : ColdFunctions) {
2398 if (
F->hasFnAttribute(Attribute::Hot)) {
2399 auto &Ctx = M.getContext();
2400 std::string Msg = std::string(
"Function ") +
F->getName().str() +
2401 std::string(
" is annotated as a hot function but"
2402 " the profile is cold");
2407 F->addFnAttr(Attribute::Cold);
2408 LLVM_DEBUG(
dbgs() <<
"Set cold attribute to function: " <<
F->getName()
2415 std::string
Filename, std::string RemappingFilename,
bool IsCS,
2418 ProfileRemappingFileName(
std::
move(RemappingFilename)), IsCS(IsCS),
2447 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2455 if (!
Node->getName().empty())
2456 return Node->getName().str();
2458 std::string SimpleNodeName;
2461 return SimpleNodeName;
2468 LLVM_DEBUG(
dbgs() <<
"Weight is: ";
for (
const auto &W : Weights) {
2478 if (BrCondStr.empty())
2482 std::accumulate(Weights.begin(), Weights.end(), (
uint64_t)0,
2490 std::string BranchProbStr;
2493 OS <<
" (total count : " << TotalCount <<
")";
2498 << BrCondStr <<
" is true with probability : " << BranchProbStr;
2517 return &
G->getFunc().front();
2540 return std::string(
G->getFunc().getName());
2548 PGOUseBBInfo *BI = Graph->findBBInfo(
Node);
2550 if (BI && BI->Count)
2551 OS << *BI->Count <<
"\\l";
2562 OS <<
"SELECT : { T = ";
2566 OS <<
"Unknown, F = Unknown }\\l";
2568 OS << TC <<
", F = " << FC <<
" }\\l";
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INSTR_PROF_QUOTE(x)
#define VARIANT_MASK_CSIR_PROF
#define VARIANT_MASK_DBG_CORRELATE
#define INSTR_PROF_RAW_VERSION
#define INSTR_PROF_RAW_VERSION_VAR
#define VARIANT_MASK_TEMPORAL_PROF
#define VARIANT_MASK_IR_PROF
#define VARIANT_MASK_BYTE_COVERAGE
#define VARIANT_MASK_INSTR_ENTRY
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY
#define VARIANT_MASK_INSTR_LOOP_ENTRIES
Machine Check Debug Module
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static constexpr StringLiteral Filename
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of precise value annotations for a single memop" "intrinsic"))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, bool IsCS)
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
ValueProfileCollector::CandidateInfo VPCandidateInfo
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType)
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static bool isIndirectBrTarget(BasicBlock *BB)
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is " "mainly for test purpose."))
static std::string getBranchCondString(Instruction *TI)
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
StringSet - A set-like wrapper for the StringMap.
Defines the virtual file system interface vfs::FileSystem.
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI bool isIrrLoopHeader(const BasicBlock *BB)
Returns true if BB is an irreducible loop header block.
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Predicate getPredicate() const
Return the predicate for this instruction.
LLVM_ABI StringRef getName() const
void setSelectionKind(SelectionKind Val)
SelectionKind getSelectionKind() const
Conditional Branch instruction.
Value * getCondition() const
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
This is the shared class of boolean and integer constants.
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Base class for error info classes.
virtual std::string message() const
Return the error message as a string.
Lightweight error class with error context and mandatory checking.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
@ HiddenVisibility
The GV is hidden.
@ ExternalLinkage
Externally visible function.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AvailableExternallyLinkage
Available for inspection, not emission.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
This instruction compares its operands according to the predicate given to the constructor.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Expected< NamedInstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Base class for instruction visitors.
static bool canInstrumentCallsite(const CallBase &CB)
instrprof_error get() const
std::string message() const override
Return the error message as a string.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
LLVM_ABI void update(ArrayRef< uint8_t > Data)
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI MDString * createString(StringRef Str)
Return the given string as metadata.
LLVM_ABI MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
constexpr bool empty() const
Check if the string is empty.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Value * getOperand(unsigned i) const
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
LLVM_ABI void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
void write64le(void *P, uint64_t V)
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > PGOTreatUnknownAsCold("pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, cl::desc("For cold function instrumentation, treat count unknown(e.g. " "unprofiled) functions as cold."))
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
LLVM_ABI void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
LLVM_ABI void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
LLVM_ABI std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
RelativeUniformCounterPtr ValuesPtrExpr NumValueSites[IPVK_Last+1]
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr, DomTreeUpdater *DTU=nullptr)
LLVM_ABI DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
LLVM_ABI void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
cl::opt< unsigned > MaxNumVTableAnnotations("icp-max-num-vtables", cl::init(6), cl::Hidden, cl::desc("Max number of vtables annotated for a vtable load instruction."))
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
LLVM_ABI GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), cl::Hidden, cl::desc("Force to instrument loop entries."))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
static cl::opt< uint64_t > PGOColdInstrumentEntryThreshold("pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, cl::desc("For cold function instrumentation, skip instrumenting functions " "whose entry count is above the given value."))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
cl::opt< bool > PGOInstrumentColdFunctionOnly
cl::list< std::string > CtxPGOSkipCallsiteInstrument("ctx-prof-skip-callsite-instr", cl::Hidden, cl::desc("Do not instrument callsites to functions in this list. Intended " "for testing."))
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
TinyPtrVector< BasicBlock * > ColorVector
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
Instruction::const_succ_iterator const_succ_iterator
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI SmallVector< uint32_t > downscaleWeights(ArrayRef< uint64_t > Weights, std::optional< uint64_t > KnownMaxCount=std::nullopt)
downscale the given weights preserving the ratio.
LLVM_ABI bool isGPUProfTarget(const Module &M)
Determines whether module targets a GPU eligable for PGO instrumentation.
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool isSimple=false)
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DefaultDOTGraphTraits(bool simple=false)
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
const BasicBlock * NodeRef
static nodes_iterator nodes_begin(const PGOUseFunc *G)
pointer_iterator< Function::const_iterator > nodes_iterator
const_succ_iterator ChildIteratorType
bool isBlockUniform(unsigned BlockIdx) const
Check if a basic block is entered via a wave-uniform branch.
std::vector< uint64_t > Counts
CountPseudoKind getCountPseudoKind() const
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
std::vector< uint8_t > UniformityBits
For AMDGPU offload profiling: 1 bit per basic block indicating whether the block is usually entered w...
static void setCSFlagInHash(uint64_t &FuncHash)
static constexpr uint64_t FUNC_HASH_MASK
Instruction * AnnotatedInst