85 #include <system_error> 90 using namespace sampleprof;
92 #define DEBUG_TYPE "sample-profile" 108 "sample-profile-max-propagate-iterations",
cl::init(100),
109 cl::desc(
"Maximum number of iterations to go through when propagating " 110 "sample block/edge weights through the CFG."));
114 cl::desc(
"Emit a warning if less than N% of records in the input profile " 115 "are matched to the IR."));
119 cl::desc(
"Emit a warning if less than N% of samples in the input profile " 120 "are matched to the IR."));
124 cl::desc(
"Use this option to turn off/on warnings about function with " 125 "samples but without debug information to use those samples. "));
129 cl::desc(
"If the sample profile is accurate, we will mark all un-sampled " 130 "callsite and function as having 0 samples. Otherwise, treat " 131 "un-sampled callsites and functions conservatively as unknown. "));
136 cl::desc(
"For symbols in profile symbol list, regard their profiles to " 137 "be accurate. It may be overriden by profile-sample-accurate. "));
143 using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
148 class SampleProfileLoader;
150 class SampleCoverageTracker {
152 SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){};
155 uint32_t Discriminator, uint64_t Samples);
156 unsigned computeCoverage(
unsigned Used,
unsigned Total)
const;
161 uint64_t getTotalUsedSamples()
const {
return TotalUsedSamples; }
166 SampleCoverage.clear();
167 TotalUsedSamples = 0;
171 using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
172 using FunctionSamplesCoverageMap =
184 FunctionSamplesCoverageMap SampleCoverage;
197 uint64_t TotalUsedSamples = 0;
199 SampleProfileLoader &SPLoader;
202 class GUIDToFuncNameMapper {
206 : CurrentReader(Reader), CurrentModule(M),
207 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
211 for (
const auto &
F : CurrentModule) {
213 CurrentGUIDToFuncNameMap.insert(
224 if (CanonName != OrigName)
225 CurrentGUIDToFuncNameMap.insert(
230 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
233 ~GUIDToFuncNameMapper() {
237 CurrentGUIDToFuncNameMap.clear();
241 SetGUIDToFuncNameMapForAll(
nullptr);
246 std::queue<FunctionSamples *> FSToUpdate;
247 for (
auto &IFS : CurrentReader.getProfiles()) {
248 FSToUpdate.push(&IFS.second);
251 while (!FSToUpdate.empty()) {
257 for (
auto &IFS : FSMap) {
259 FSToUpdate.push(&FS);
275 class SampleProfileLoader {
281 : GetAC(
std::move(GetAssumptionCache)),
282 GetTTI(
std::move(GetTargetTransformInfo)), CoverageTracker(*this),
283 Filename(Name), RemappingFilename(RemapName),
284 IsThinLTOPreLink(IsThinLTOPreLink) {}
286 bool doInitialization(
Module &M);
293 friend class SampleCoverageTracker;
301 std::vector<const FunctionSamples *>
302 findIndirectCallFunctionSamples(
const Instruction &
I, uint64_t &Sum)
const;
312 void findEquivalenceClasses(
Function &
F);
313 template <
bool IsPostDom>
318 uint64_t visitEdge(Edge
E,
unsigned *NumUnknownEdges, Edge *UnknownEdge);
320 bool propagateThroughEdges(
Function &
F,
bool UpdateBlockCount);
321 void computeDominanceAndLoopInfo(
Function &
F);
322 void clearFunctionData();
330 BlockWeightMap BlockWeights;
336 EdgeWeightMap EdgeWeights;
350 EquivalenceClassMap EquivalenceClass;
359 std::unique_ptr<DominatorTree> DT;
360 std::unique_ptr<PostDominatorTree> PDT;
361 std::unique_ptr<LoopInfo> LI;
363 std::function<AssumptionCache &(Function &)> GetAC;
364 std::function<TargetTransformInfo &(Function &)> GetTTI;
367 BlockEdgeMap Predecessors;
370 BlockEdgeMap Successors;
372 SampleCoverageTracker CoverageTracker;
375 std::unique_ptr<SampleProfileReader> Reader;
381 std::string Filename;
384 std::string RemappingFilename;
387 bool ProfileIsValid =
false;
393 bool IsThinLTOPreLink;
400 std::unique_ptr<ProfileSymbolList> PSL;
406 uint64_t TotalCollectedSamples = 0;
414 struct NotInlinedProfileInfo {
432 bool ProfAccForSymsInList;
435 class SampleProfileLoaderLegacyPass :
public ModulePass {
441 bool IsThinLTOPreLink =
false)
445 return ACT->getAssumptionCache(
F);
448 return TTIWP->getTTI(
F);
454 void dump() { SampleLoader.dump(); }
456 bool doInitialization(
Module &M)
override {
457 return SampleLoader.doInitialization(M);
460 StringRef getPassName()
const override {
return "Sample profile pass"; }
461 bool runOnModule(
Module &M)
override;
470 SampleProfileLoader SampleLoader;
494 bool SampleProfileLoader::callsiteIsHot(
const FunctionSamples *CallsiteFS,
499 assert(PSI &&
"PSI is expected to be non null");
501 if (ProfAccForSymsInList)
516 unsigned &Count = SampleCoverage[FS][Loc];
517 bool FirstTime = (++Count == 1);
519 TotalUsedSamples += Samples;
529 auto I = SampleCoverage.find(FS);
533 unsigned Count = (
I != SampleCoverage.end()) ?
I->second.size() : 0;
539 for (
const auto &J :
I.second) {
541 if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
542 Count += countUsedRecords(CalleeSamples, PSI);
558 for (
const auto &J :
I.second) {
560 if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
561 Count += countBodyRecords(CalleeSamples, PSI);
575 Total +=
I.second.getSamples();
579 for (
const auto &J :
I.second) {
581 if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
582 Total += countBodySamples(CalleeSamples, PSI);
593 unsigned SampleCoverageTracker::computeCoverage(
unsigned Used,
594 unsigned Total)
const {
596 "number of used records cannot exceed the total number of records");
597 return Total > 0 ? Used * 100 / Total : 100;
601 void SampleProfileLoader::clearFunctionData() {
602 BlockWeights.clear();
604 VisitedBlocks.clear();
605 VisitedEdges.clear();
606 EquivalenceClass.clear();
610 Predecessors.clear();
612 CoverageTracker.clear();
620 void SampleProfileLoader::printEdgeWeight(
raw_ostream &OS, Edge
E) {
621 OS <<
"weight[" << E.first->getName() <<
"->" << E.second->getName()
622 <<
"]: " << EdgeWeights[
E] <<
"\n";
629 void SampleProfileLoader::printBlockEquivalence(
raw_ostream &OS,
631 const BasicBlock *Equiv = EquivalenceClass[BB];
632 OS <<
"equivalence[" << BB->
getName()
633 <<
"]: " << ((Equiv) ? EquivalenceClass[BB]->
getName() :
"NONE") <<
"\n";
640 void SampleProfileLoader::printBlockWeight(
raw_ostream &OS,
642 const auto &
I = BlockWeights.find(BB);
643 uint64_t
W = (
I == BlockWeights.end() ? 0 :
I->second);
644 OS <<
"weight[" << BB->
getName() <<
"]: " << W <<
"\n";
662 return std::error_code();
666 return std::error_code();
671 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
672 return std::error_code();
678 if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
680 findCalleeFunctionSamples(Inst))
689 CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.
get());
694 Remark <<
" samples from profile (offset: ";
706 <<
" (line offset: " << LineOffset <<
"." 723 bool HasWeight =
false;
740 bool SampleProfileLoader::computeBlockWeights(
Function &
F) {
741 bool Changed =
false;
743 for (
const auto &BB : F) {
746 BlockWeights[&BB] = Weight.
get();
747 VisitedBlocks.insert(&BB);
769 SampleProfileLoader::findCalleeFunctionSamples(
const Instruction &Inst)
const {
776 if (
const CallInst *CI = dyn_cast<CallInst>(&Inst))
777 if (
Function *Callee = CI->getCalledFunction())
778 CalleeName =
Callee->getName();
792 std::vector<const FunctionSamples *>
793 SampleProfileLoader::findIndirectCallFunctionSamples(
796 std::vector<const FunctionSamples *> R;
812 for (
const auto &T_C :
T.get())
818 for (
const auto &NameFS : *M) {
819 Sum += NameFS.second.getEntrySamples();
820 R.push_back(&NameFS.second);
842 SampleProfileLoader::findFunctionSamples(
const Instruction &Inst)
const {
847 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
849 it.first->second = Samples->findFunctionSamples(DIL);
850 return it.first->second;
853 bool SampleProfileLoader::inlineCallInstruction(
Instruction *
I) {
854 assert(isa<CallInst>(I) || isa<InvokeInst>(I));
856 Function *CalledFunction = CS.getCalledFunction();
869 getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
870 None,
nullptr,
nullptr);
873 <<
"incompatible inlining");
880 <<
"inlined hot callee '" <<
ore::NV(
"Callee", CalledFunction)
900 bool SampleProfileLoader::inlineHotFunctions(
906 assert((!ProfAccForSymsInList ||
909 "ProfAccForSymsInList should be false when profile-sample-accurate " 913 bool Changed =
false;
915 bool LocalChanged =
false;
922 if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
923 !isa<IntrinsicInst>(
I) && (FS = findCalleeFunctionSamples(I))) {
926 localNotInlinedCallSites.try_emplace(&I, FS);
927 if (callsiteIsHot(FS, PSI))
938 if (CalledFunction == &F)
941 if (PromotedInsns.
count(I))
944 for (
const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
945 if (IsThinLTOPreLink) {
956 if (CalleeFunctionName == F.getName())
959 if (!callsiteIsHot(FS, PSI))
962 const char *Reason =
"Callee function not available";
965 !R->getValue()->isDeclaration() &&
966 R->getValue()->getSubprogram() &&
974 if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
975 inlineCallInstruction(DI)) {
976 localNotInlinedCallSites.erase(I);
981 <<
"\nFailed to promote indirect call to " 982 << CalleeFunctionName <<
" because " << Reason <<
"\n");
985 }
else if (CalledFunction && CalledFunction->
getSubprogram() &&
987 if (inlineCallInstruction(I)) {
988 localNotInlinedCallSites.erase(I);
991 }
else if (IsThinLTOPreLink) {
992 findCalleeFunctionSamples(*I)->findInlinedFunctions(
1004 for (
const auto &Pair : localNotInlinedCallSites) {
1011 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1040 template <
bool IsPostDom>
1041 void SampleProfileLoader::findEquivalencesFor(
1044 const BasicBlock *EC = EquivalenceClass[BB1];
1045 uint64_t Weight = BlockWeights[EC];
1046 for (
const auto *BB2 : Descendants) {
1047 bool IsDomParent = DomTree->
dominates(BB2, BB1);
1048 bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
1049 if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
1050 EquivalenceClass[BB2] = EC;
1052 if (VisitedBlocks.count(BB2)) {
1053 VisitedBlocks.insert(EC);
1064 Weight =
std::max(Weight, BlockWeights[BB2]);
1068 BlockWeights[EC] = Samples->getHeadSamples() + 1;
1070 BlockWeights[EC] = Weight;
1083 void SampleProfileLoader::findEquivalenceClasses(
Function &F) {
1087 for (
auto &BB : F) {
1091 if (EquivalenceClass.count(BB1)) {
1097 EquivalenceClass[BB1] = BB1;
1109 DominatedBBs.
clear();
1110 DT->getDescendants(BB1, DominatedBBs);
1111 findEquivalencesFor(BB1, DominatedBBs, PDT.get());
1123 dbgs() <<
"\nAssign the same weight to all blocks in the same class\n");
1124 for (
auto &BI : F) {
1126 const BasicBlock *EquivBB = EquivalenceClass[BB];
1128 BlockWeights[BB] = BlockWeights[EquivBB];
1143 uint64_t SampleProfileLoader::visitEdge(Edge E,
unsigned *NumUnknownEdges,
1144 Edge *UnknownEdge) {
1145 if (!VisitedEdges.count(E)) {
1146 (*NumUnknownEdges)++;
1151 return EdgeWeights[
E];
1167 bool SampleProfileLoader::propagateThroughEdges(
Function &F,
1168 bool UpdateBlockCount) {
1169 bool Changed =
false;
1171 for (
const auto &BI : F) {
1180 for (
unsigned i = 0; i < 2; i++) {
1181 uint64_t TotalWeight = 0;
1182 unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
1183 Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
1187 NumTotalEdges = Predecessors[BB].size();
1188 for (
auto *Pred : Predecessors[BB]) {
1189 Edge E = std::make_pair(Pred, BB);
1190 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1191 if (E.first == E.second)
1192 SelfReferentialEdge =
E;
1194 if (NumTotalEdges == 1) {
1195 SingleEdge = std::make_pair(Predecessors[BB][0], BB);
1199 NumTotalEdges = Successors[BB].size();
1200 for (
auto *Succ : Successors[BB]) {
1201 Edge E = std::make_pair(BB, Succ);
1202 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
1204 if (NumTotalEdges == 1) {
1205 SingleEdge = std::make_pair(BB, Successors[BB][0]);
1232 if (NumUnknownEdges <= 1) {
1233 uint64_t &BBWeight = BlockWeights[EC];
1234 if (NumUnknownEdges == 0) {
1235 if (!VisitedBlocks.count(EC)) {
1239 if (TotalWeight > BBWeight) {
1240 BBWeight = TotalWeight;
1243 <<
" known. Set weight for block: ";
1244 printBlockWeight(
dbgs(), BB););
1246 }
else if (NumTotalEdges == 1 &&
1247 EdgeWeights[SingleEdge] < BlockWeights[EC]) {
1250 EdgeWeights[SingleEdge] = BlockWeights[EC];
1253 }
else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
1256 if (BBWeight >= TotalWeight)
1257 EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
1259 EdgeWeights[UnknownEdge] = 0;
1262 OtherEC = EquivalenceClass[UnknownEdge.first];
1264 OtherEC = EquivalenceClass[UnknownEdge.second];
1266 if (VisitedBlocks.count(OtherEC) &&
1267 EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
1268 EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
1269 VisitedEdges.insert(UnknownEdge);
1272 printEdgeWeight(
dbgs(), UnknownEdge));
1274 }
else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
1277 for (
auto *Pred : Predecessors[BB]) {
1278 Edge E = std::make_pair(Pred, BB);
1280 VisitedEdges.insert(E);
1283 for (
auto *Succ : Successors[BB]) {
1284 Edge E = std::make_pair(BB, Succ);
1286 VisitedEdges.insert(E);
1289 }
else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
1290 uint64_t &BBWeight = BlockWeights[BB];
1292 if (BBWeight >= TotalWeight)
1293 EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
1295 EdgeWeights[SelfReferentialEdge] = 0;
1296 VisitedEdges.insert(SelfReferentialEdge);
1299 printEdgeWeight(
dbgs(), SelfReferentialEdge));
1301 if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
1302 BlockWeights[EC] = TotalWeight;
1303 VisitedBlocks.insert(EC);
1316 void SampleProfileLoader::buildEdges(
Function &F) {
1317 for (
auto &BI : F) {
1322 if (!Predecessors[B1].
empty())
1326 if (Visited.
insert(B2).second)
1327 Predecessors[B1].push_back(B2);
1332 if (!Successors[B1].
empty())
1336 if (Visited.
insert(B2).second)
1337 Successors[B1].push_back(B2);
1369 void SampleProfileLoader::propagateWeights(
Function &F) {
1370 bool Changed =
true;
1375 for (
auto &BI : F) {
1377 Loop *L = LI->getLoopFor(BB);
1382 if (Header && BlockWeights[BB] > BlockWeights[Header]) {
1383 BlockWeights[Header] = BlockWeights[BB];
1396 Changed = propagateThroughEdges(F,
false);
1402 VisitedEdges.clear();
1405 Changed = propagateThroughEdges(F,
false);
1412 Changed = propagateThroughEdges(F,
true);
1417 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch weights\n");
1420 for (
auto &BI : F) {
1423 if (BlockWeights[BB]) {
1424 for (
auto &I : BB->getInstList()) {
1425 if (!isa<CallInst>(I) && !isa<InvokeInst>(
I))
1429 const DebugLoc &DLoc = I.getDebugLoc();
1440 if (!
T ||
T.get().empty())
1445 findIndirectCallFunctionSamples(I, Sum);
1447 SortedCallTargets, Sum, IPVK_IndirectCallTarget,
1448 SortedCallTargets.
size());
1449 }
else if (!isa<IntrinsicInst>(&I)) {
1450 I.setMetadata(LLVMContext::MD_prof,
1452 {static_cast<uint32_t>(BlockWeights[BB])}));
1459 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
1465 :
Twine(
"<UNKNOWN LOCATION>"))
1472 Edge E = std::make_pair(BB, Succ);
1473 uint64_t Weight = EdgeWeights[
E];
1484 Weights.
push_back(static_cast<uint32_t>(Weight + 1));
1486 if (Weight > MaxWeight) {
1495 uint64_t TempWeight;
1507 <<
"most popular destination for conditional branches at " 1508 <<
ore::NV(
"CondBranchesLoc", BranchLoc);
1527 unsigned SampleProfileLoader::getFunctionLoc(
Function &F) {
1529 return S->getLine();
1537 "No debug information found in function " + F.
getName() +
1538 ": Function profile not used",
1543 void SampleProfileLoader::computeDominanceAndLoopInfo(
Function &F) {
1602 bool SampleProfileLoader::emitAnnotations(
Function &F) {
1603 bool Changed =
false;
1605 if (getFunctionLoc(F) == 0)
1609 << F.
getName() <<
": " << getFunctionLoc(F) <<
"\n");
1612 Changed |= inlineHotFunctions(F, InlinedGUIDs);
1615 Changed |= computeBlockWeights(F);
1628 computeDominanceAndLoopInfo(F);
1631 findEquivalenceClasses(F);
1634 propagateWeights(F);
1639 unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1640 unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1641 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1645 Twine(Used) +
" of " +
Twine(Total) +
" available profile records (" +
1646 Twine(Coverage) +
"%) were applied",
1652 uint64_t Used = CoverageTracker.getTotalUsedSamples();
1653 uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1654 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1658 Twine(Used) +
" of " +
Twine(Total) +
" available profile samples (" +
1659 Twine(Coverage) +
"%) were applied",
1669 "Sample Profile loader",
false,
false)
1676 bool SampleProfileLoader::doInitialization(
Module &M) {
1677 auto &Ctx = M.getContext();
1679 std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
1682 if (std::error_code EC = ReaderOrErr.getError()) {
1683 std::string Msg =
"Could not open profile: " + EC.message();
1687 Reader = std::move(ReaderOrErr.get());
1693 ProfAccForSymsInList =
1695 if (ProfAccForSymsInList) {
1696 NamesInProfile.clear();
1698 NamesInProfile.insert(NameTable->begin(), NameTable->end());
1705 return new SampleProfileLoaderLegacyPass();
1709 return new SampleProfileLoaderLegacyPass(Name);
1714 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
1715 if (!ProfileIsValid)
1725 TotalCollectedSamples += I.second.getTotalSamples();
1734 auto pos = OrigName.
find(
'.');
1743 r.first->second =
nullptr;
1747 bool retval =
false;
1749 if (!F.isDeclaration()) {
1750 clearFunctionData();
1755 for (
const std::pair<Function *, NotInlinedProfileInfo> &pair :
1762 bool SampleProfileLoaderLegacyPass::runOnModule(
Module &M) {
1763 ACT = &getAnalysis<AssumptionCacheTracker>();
1764 TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1766 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1767 return SampleLoader.runOnModule(M,
nullptr, PSI);
1772 DILocation2SampleMap.clear();
1777 uint64_t initialEntryCount = -1;
1783 initialEntryCount = 0;
1786 ProfAccForSymsInList =
false;
1793 if (ProfAccForSymsInList) {
1795 if (PSL->contains(F.
getName()))
1796 initialEntryCount = 0;
1809 if (NamesInProfile.count(CanonName))
1810 initialEntryCount = -1;
1814 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
1821 OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&
F);
1822 ORE = OwnedORE.get();
1825 if (Samples && !Samples->empty())
1826 return emitAnnotations(F);
1842 SampleProfileLoader SampleLoader(
1845 : ProfileRemappingFileName,
1846 IsThinLTOPreLink, GetAssumptionCache, GetTTI);
1848 SampleLoader.doInitialization(M);
1851 if (!SampleLoader.runOnModule(M, &AM, PSI))
const FunctionSamplesMap * findFunctionSamplesMapAt(const LineLocation &Loc) const
Returns the FunctionSamplesMap at the given Loc.
static uint64_t getGUID(StringRef Name)
reference emplace_back(ArgTypes &&... Args)
Thresholds to tune inline cost analysis.
Represents either an error or a value T.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
virtual void collectFuncsFrom(const Module &M)
bool isColdCount(uint64_t C)
Returns true if count C is considered cold.
void push_back(const T &Elt)
const ValueSymbolTable & getValueSymbolTable() const
Get the symbol table of global variable and function identifiers.
Analysis providing profile information.
This class represents a function call, abstracting a target machine's calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
Metadata * getProfileSummary(bool IsCS)
Returns profile summary metadata.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. "))
LLVMContext & getContext() const
All values hold a context through their type.
bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
DenseMap< uint64_t, StringRef > * GUIDToFuncNameMap
GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for all the function symbols define...
ErrorOr< uint64_t > findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const
Return the number of samples collected at the given location.
uint64_t getOrCompHotCountThreshold()
Returns HotCountThreshold if set.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const Module *M, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
bool isHotCount(uint64_t C)
Returns true if count C is considered hot.
Represents the cost of inlining a function.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Metadata * getMD(LLVMContext &Context)
Return summary information as metadata.
Representation of the samples collected for a function.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVMContext & getContext() const
Get the global data context.
static StringRef getName(Value *V)
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
StringRef getFilename() const
BlockT * getHeader() const
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
const Instruction * getFirstNonPHIOrDbgOrLifetime() const
Returns a pointer to the first instruction in this block that is not a PHINode, a debug intrinsic...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
void verifyMisExpect(llvm::Instruction *I, const llvm::SmallVector< uint32_t, 4 > &Weights, llvm::LLVMContext &Ctx)
verifyMisExpect - compares PGO counters to the thresholds used for llvm.expect and warns if the PGO c...
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
ModulePass * createSampleProfileLoaderPass()
Instruction * promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
static cl::opt< unsigned > SampleProfileRecordCoverage("sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of records in the input profile " "are matched to the IR."))
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Interval::succ_iterator succ_end(Interval *I)
iterator find(const_arg_type_t< KeyT > Val)
const BasicBlock & getEntryBlock() const
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
This is an important class for using LLVM in a threaded context.
static cl::opt< unsigned > SampleProfileMaxPropagateIterations("sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG."))
DISubprogram * getSubprogram() const
Get the attached subprogram.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::error_code read()
The interface to read sample profiles from the associated file.
std::pair< iterator, bool > insert(const ValueT &V)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes...
LLVM_NODISCARD size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Function::ProfileCount ProfileCount
Represent the analysis usage information of a pass.
Interval::pred_iterator pred_end(Interval *I)
DenseMap< SymbolStringPtr, JITEvaluatedSymbol > SymbolMap
A map from symbol names (as SymbolStringPtrs) to JITSymbols (address/flags pairs).
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry &)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Used in the streaming interface as the general argument type.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
std::map< std::string, FunctionSamples, std::less<> > FunctionSamplesMap
Class to represent profile counts.
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
StringRef getFuncNameInModule(const Module *M) const
Return the original function name if it exists in Module M.
Optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
void sort(IteratorTy Start, IteratorTy End)
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
const FunctionSamples * findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName) const
Returns a pointer to FunctionSamples at the given callsite location Loc with callee CalleeName...
A function analysis which provides an AssumptionCache.
virtual std::unique_ptr< ProfileSymbolList > getProfileSymbolList()
const InstListType & getInstList() const
Return the underlying instruction list container.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Align max(MaybeAlign Lhs, Align Rhs)
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
virtual std::vector< StringRef > * getNameTable()
It includes all the names that have samples either in outline instance or inline instance.
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
ErrorOr< SampleRecord::CallTargetMap > findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const
Returns the call target map collected at a given location.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings"...
StringMap< FunctionSamples > & getProfiles()
Return all the profiles.
amdgpu Simplify well known AMD library false FunctionCallee Callee
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
static void clear(coro::Shape &Shape)
iterator insert(iterator I, T &&Elt)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Represents the relative location of an instruction.
static cl::opt< unsigned > SampleProfileSampleCoverage("sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of samples in the input profile " "are matched to the IR."))
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
Establish a view to a call site for examination.
const Function * getParent() const
Return the enclosing method, or null if none.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
StringRef getName() const
Return the function name.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
void updateProfileCallee(Function *Callee, int64_t entryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding entryDelta then scaling callsite i...
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Provides ErrorOr<T> smart pointer.
ProfileSummary & getSummary() const
Return the profile summary.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
sample Sample Profile loader
const CallsiteSampleMap & getCallsiteSamples() const
Return all the callsite samples collected in the body of the function.
StringSet - A wrapper for StringMap that provides set-like functionality.
Sample-based profile reader.
This class implements an extremely fast bulk output stream that can only output to a stream...
print Print MemDeps of function
This file defines a set of templates that efficiently compute a dominator tree over a generic graph...
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
uint64_t getEntrySamples() const
Return the sample count of the first instruction of the function.
static cl::opt< bool > NoWarnSampleUnused("no-warn-sample-unused", cl::init(false), cl::Hidden, cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. "))
This header defines various interfaces for pass management in LLVM.
Diagnostic information for the sample profiler.
void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind)
Attach profile summary metadata to this module.
void dump(raw_ostream &OS=dbgs())
Print all the profiles on stream OS.
This file provides the interface for the sampled PGO loader pass.
const BasicBlock * getParent() const
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...