15#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
16#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
47using namespace sampleprof;
48using namespace sampleprofutil;
55#define DEBUG_TYPE "sample-profile-impl"
57namespace afdo_detail {
76 return &
F->getEntryBlock();
91 auto I = GUIDToProbeDescMap.
find(
93 return I == GUIDToProbeDescMap.
end() ? nullptr : &
I->second;
100 for (
const auto *Operand :
FuncInfo->operands()) {
101 const auto *MD = cast<MDNode>(Operand);
102 auto GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))
104 auto Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))
116 const auto *Desc = getDesc(
F);
119 <<
F.getName() <<
"\n");
143 using BT =
typename std::remove_pointer<NodeRef>::type;
167 using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
266 std::unique_ptr<SampleProfileReader>
Reader;
291template <
typename BT>
293 BlockWeights.clear();
295 VisitedBlocks.clear();
296 VisitedEdges.clear();
297 EquivalenceClass.clear();
303 Predecessors.clear();
305 CoverageTracker.clear();
313template <
typename BT>
315 OS <<
"weight[" <<
E.first->getName() <<
"->" <<
E.second->getName()
316 <<
"]: " << EdgeWeights[
E] <<
"\n";
323template <
typename BT>
327 OS <<
"equivalence[" << BB->getName()
328 <<
"]: " << ((Equiv) ? EquivalenceClass[BB]->
getName() :
"NONE") <<
"\n";
335template <
typename BT>
338 const auto &
I = BlockWeights.find(BB);
339 uint64_t W = (
I == BlockWeights.end() ? 0 :
I->second);
340 OS <<
"weight[" << BB->getName() <<
"]: " << W <<
"\n";
355template <
typename BT>
359 return getProbeWeight(Inst);
360 return getInstWeightImpl(Inst);
363template <
typename BT>
368 return std::error_code();
370 const DebugLoc &DLoc = Inst.getDebugLoc();
372 return std::error_code();
378 Discriminator = DIL->getDiscriminator();
385 CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
390 Remark <<
" samples from profile (offset: ";
401 << Inst <<
" (line offset: " << LineOffset <<
"."
402 << Discriminator <<
" - weight: " << R.get() <<
")\n");
410template <
typename BT>
414 "Profile is not pseudo probe based");
419 return std::error_code();
434 auto R = FS->findSamplesAt(Probe->Id, Probe->Discriminator);
436 uint64_t Samples = R.get() * Probe->Factor;
437 bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
442 Remark <<
" samples from profile (ProbeId=";
444 if (Probe->Discriminator) {
450 Remark <<
", OriginalSamples=";
457 if (Probe->Discriminator)
458 dbgs() <<
"." << Probe->Discriminator;
459 dbgs() <<
":" << Inst <<
" - weight: " << R.get()
460 <<
" - factor: " <<
format(
"%0.2f", Probe->Factor) <<
")\n";});
474template <
typename BT>
478 bool HasWeight =
false;
479 for (
auto &
I : *BB) {
482 Max = std::max(Max, R.get());
495template <
typename BT>
497 bool Changed =
false;
499 for (
const auto &BB :
F) {
502 BlockWeights[&BB] = Weight.
get();
503 VisitedBlocks.insert(&BB);
521template <
typename BT>
528 auto it = DILocation2SampleMap.try_emplace(DIL,
nullptr);
530 it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
532 return it.first->second;
558template <
typename BT>
564 for (
const auto *BB2 : Descendants) {
565 bool IsDomParent = DomTree->dominates(BB2, BB1);
566 bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
567 if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
568 EquivalenceClass[BB2] = EC;
570 if (VisitedBlocks.count(BB2)) {
571 VisitedBlocks.insert(EC);
582 Weight = std::max(Weight, BlockWeights[BB2]);
585 const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
587 BlockWeights[EC] = Samples->getHeadSamples() + 1;
589 BlockWeights[EC] = Weight;
602template <
typename BT>
611 if (EquivalenceClass.count(BB1)) {
617 EquivalenceClass[BB1] = BB1;
629 DominatedBBs.
clear();
630 DT->getDescendants(BB1, DominatedBBs);
631 findEquivalencesFor(BB1, DominatedBBs, &*PDT);
643 dbgs() <<
"\nAssign the same weight to all blocks in the same class\n");
648 BlockWeights[BB] = BlockWeights[EquivBB];
663template <
typename BT>
665 unsigned *NumUnknownEdges,
667 if (!VisitedEdges.count(
E)) {
668 (*NumUnknownEdges)++;
673 return EdgeWeights[
E];
689template <
typename BT>
692 bool Changed =
false;
694 for (
const auto &BI :
F) {
703 for (
unsigned i = 0; i < 2; i++) {
705 unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
706 Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
710 NumTotalEdges = Predecessors[BB].size();
711 for (
auto *Pred : Predecessors[BB]) {
712 Edge E = std::make_pair(Pred, BB);
713 TotalWeight += visitEdge(
E, &NumUnknownEdges, &UnknownEdge);
714 if (
E.first ==
E.second)
715 SelfReferentialEdge =
E;
717 if (NumTotalEdges == 1) {
718 SingleEdge = std::make_pair(Predecessors[BB][0], BB);
722 NumTotalEdges = Successors[BB].size();
723 for (
auto *Succ : Successors[BB]) {
724 Edge E = std::make_pair(BB, Succ);
725 TotalWeight += visitEdge(
E, &NumUnknownEdges, &UnknownEdge);
727 if (NumTotalEdges == 1) {
728 SingleEdge = std::make_pair(BB, Successors[BB][0]);
755 if (NumUnknownEdges <= 1) {
756 uint64_t &BBWeight = BlockWeights[EC];
757 if (NumUnknownEdges == 0) {
758 if (!VisitedBlocks.count(EC)) {
762 if (TotalWeight > BBWeight) {
763 BBWeight = TotalWeight;
766 <<
" known. Set weight for block: ";
767 printBlockWeight(
dbgs(), BB););
769 }
else if (NumTotalEdges == 1 &&
770 EdgeWeights[SingleEdge] < BlockWeights[EC]) {
773 EdgeWeights[SingleEdge] = BlockWeights[EC];
776 }
else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
779 if (BBWeight >= TotalWeight)
780 EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
782 EdgeWeights[UnknownEdge] = 0;
785 OtherEC = EquivalenceClass[UnknownEdge.first];
787 OtherEC = EquivalenceClass[UnknownEdge.second];
789 if (VisitedBlocks.count(OtherEC) &&
790 EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
791 EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
792 VisitedEdges.insert(UnknownEdge);
795 printEdgeWeight(
dbgs(), UnknownEdge));
797 }
else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
800 for (
auto *Pred : Predecessors[BB]) {
801 Edge E = std::make_pair(Pred, BB);
803 VisitedEdges.insert(
E);
806 for (
auto *Succ : Successors[BB]) {
807 Edge E = std::make_pair(BB, Succ);
809 VisitedEdges.insert(
E);
812 }
else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
813 uint64_t &BBWeight = BlockWeights[BB];
815 if (BBWeight >= TotalWeight)
816 EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
818 EdgeWeights[SelfReferentialEdge] = 0;
819 VisitedEdges.insert(SelfReferentialEdge);
822 printEdgeWeight(
dbgs(), SelfReferentialEdge));
824 if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
825 BlockWeights[EC] = TotalWeight;
826 VisitedBlocks.insert(EC);
839template <
typename BT>
846 if (!Predecessors[B1].empty())
848 for (
auto *B2 : getPredecessors(B1))
849 if (Visited.
insert(B2).second)
850 Predecessors[B1].push_back(B2);
854 if (!Successors[B1].empty())
856 for (
auto *B2 : getSuccessors(B1))
857 if (Visited.
insert(B2).second)
858 Successors[B1].push_back(B2);
879template <
typename BT>
886 for (
const auto &BI :
F) {
889 SampleBlockWeights[&BI] = Weight.
get();
892 applyProfi(
F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
901 LoopT *L = LI->getLoopFor(BB);
906 if (Header && BlockWeights[BB] > BlockWeights[Header]) {
907 BlockWeights[Header] = BlockWeights[BB];
913 Changed = propagateThroughEdges(
F,
false);
919 VisitedEdges.clear();
922 Changed = propagateThroughEdges(
F,
false);
929 Changed = propagateThroughEdges(
F,
true);
934template <
typename FT>
939 Infer.apply(BlockWeights, EdgeWeights);
988template <
typename BT>
991 bool Changed = (InlinedGUIDs.
size() != 0);
994 Changed |= computeBlockWeights(
F);
998 initWeightPropagation(
F, InlinedGUIDs);
1001 propagateWeights(
F);
1004 finalizeWeightPropagation(
F, InlinedGUIDs);
1010template <
typename BT>
1024 computeDominanceAndLoopInfo(
F);
1027 findEquivalenceClasses(
F);
1038template <
typename BT>
1050 if (BlockWeights[EntryBB] > 0) {
1058template <
typename BT>
1063 unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1064 unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1065 unsigned Coverage = CoverageTracker.computeCoverage(Used,
Total);
1068 Func.getSubprogram()->getFilename(), getFunctionLoc(
F),
1069 Twine(Used) +
" of " +
Twine(
Total) +
" available profile records (" +
1070 Twine(Coverage) +
"%) were applied",
1076 uint64_t Used = CoverageTracker.getTotalUsedSamples();
1077 uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1078 unsigned Coverage = CoverageTracker.computeCoverage(Used,
Total);
1081 Func.getSubprogram()->getFilename(), getFunctionLoc(
F),
1082 Twine(Used) +
" of " +
Twine(
Total) +
" available profile samples (" +
1083 Twine(Coverage) +
"%) were applied",
1100template <
typename BT>
1104 return S->getLine();
1112 "No debug information found in function " + Func.getName() +
1113 ": Function profile not used",
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static Function * getFunction(Constant *C)
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the RefCountedBase, ThreadSafeRefCountedBase, and IntrusiveRefCntPtr classes.
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
Module.h This file contains the declarations for the Module class.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the profile inference algorithm, profi.
This file provides the utility functions for the sampled PGO loader base implementation.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Implements a dense probed hash-table based set.
Diagnostic information for the sample profiler.
Represents either an error or a value T.
Class to represent profile counts.
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis providing profile information.
bool moduleIsProbed(const Module &M) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
PseudoProbeManager(const Module &M)
Sample profile inference pass.
typename std::remove_pointer< NodeRef >::type BT
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
void computeDominanceAndLoopInfo(FunctionT &F)
typename afdo_detail::IRTraits< BT >::BasicBlockT BasicBlockT
IntrusiveRefCntPtr< vfs::FileSystem > FS
VirtualFileSystem to load profile files from.
typename afdo_detail::IRTraits< BT >::OptRemarkAnalysisT OptRemarkAnalysisT
typename afdo_detail::IRTraits< BT >::SuccRangeT SuccRangeT
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
Function & getFunction(FunctionT &F)
SmallSet< Edge, 32 > VisitedEdges
Set of visited edges during propagation.
OptRemarkEmitterT * ORE
Optimization Remark Emitter used to emit diagnostic remarks.
const BasicBlockT * getEntryBB(const FunctionT *F)
ErrorOr< uint64_t > getBlockWeight(const BasicBlockT *BB)
Compute the weight of a basic block.
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
PredRangeT getPredecessors(BasicBlockT *BB)
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
SmallPtrSet< const BasicBlockT *, 32 > VisitedBlocks
Set of visited blocks during propagation.
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
typename afdo_detail::IRTraits< BT >::DominatorTreePtrT DominatorTreePtrT
SampleCoverageTracker CoverageTracker
Profile coverage tracker.
typename afdo_detail::IRTraits< BT >::LoopT LoopT
typename GraphTraits< FT * >::NodeRef NodeRef
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
typename afdo_detail::IRTraits< BT >::OptRemarkEmitterT OptRemarkEmitterT
void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const
Print the weight of block BB on stream OS.
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB)
Print the equivalence class of block BB on stream OS.
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName, IntrusiveRefCntPtr< vfs::FileSystem > FS)
std::unique_ptr< PseudoProbeManager > ProbeManager
~SampleProfileLoaderBaseImpl()=default
typename afdo_detail::IRTraits< BT >::LoopInfoPtrT LoopInfoPtrT
void emitCoverageRemarks(FunctionT &F)
SuccRangeT getSuccessors(BasicBlockT *BB)
std::string Filename
Name of the profile file to load.
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount)
Propagate weights through incoming/outgoing edges.
PostDominatorTreePtrT PDT
typename afdo_detail::IRTraits< BT >::InstructionT InstructionT
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge)
Visit the given edge to decide if it has a valid weight.
typename afdo_detail::IRTraits< BT >::PostDominatorTreePtrT PostDominatorTreePtrT
void initWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
BlockEdgeMap Predecessors
Predecessors for each basic block in the CFG.
void finalizeWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
bool computeBlockWeights(FunctionT &F)
Compute and store the weights of every basic block.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
typename afdo_detail::IRTraits< BT >::PostDominatorTreeT PostDominatorTreeT
virtual ErrorOr< uint64_t > getProbeWeight(const InstructionT &Inst)
std::string RemappingFilename
Name of the profile remapping file to load.
typename afdo_detail::IRTraits< BT >::PredRangeT PredRangeT
void applyProfi(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights)
typename afdo_detail::IRTraits< BT >::BlockFrequencyInfoT BlockFrequencyInfoT
BlockEdgeMap Successors
Successors for each basic block in the CFG.
FunctionSamples * Samples
Samples collected for the body of this function.
void findEquivalenceClasses(FunctionT &F)
Find equivalence classes.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
ProfileSummaryInfo * PSI
Profile Summary Info computed from sample profile.
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
DenseMap< const DILocation *, const FunctionSamples * > DILocation2SampleMap
void buildEdges(FunctionT &F)
Build in/out edge lists for each basic block in the CFG.
void findEquivalencesFor(BasicBlockT *BB1, ArrayRef< BasicBlockT * > Descendants, PostDominatorTreeT *DomTree)
Find equivalence classes for the given block.
void printEdgeWeight(raw_ostream &OS, Edge E)
Print the weight of edge E on stream OS.
typename afdo_detail::IRTraits< BT >::FunctionT FunctionT
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
void propagateWeights(FunctionT &F)
Propagate weights into edges.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Representation of the samples collected for a function.
uint64_t getFunctionHash() const
static bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
cl::opt< unsigned > SampleProfileSampleCoverage
cl::opt< unsigned > SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileMaxPropagateIterations
cl::opt< bool > SampleProfileUseProfi
cl::opt< bool > EnableFSDiscriminator
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< succ_iterator > succ_range
cl::opt< bool > NoWarnSampleUnused
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
iterator_range< pred_iterator > pred_range
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto predecessors(const MachineBasicBlock *BB)
constexpr const char * PseudoProbeDescMetadataName
typename GraphType::UnknownGraphTypeError NodeRef
std::unique_ptr< LoopInfo > LoopInfoPtrT
std::unique_ptr< PostDominatorTree > PostDominatorTreePtrT
static Function & getFunction(Function &F)
static pred_range getPredecessors(BasicBlock *BB)
static succ_range getSuccessors(BasicBlock *BB)
std::unique_ptr< DominatorTree > DominatorTreePtrT
static const BasicBlock * getEntryBB(const Function *F)