25#define DEBUG_TYPE "ctx_prof"
30 cl::desc(
"Use the specified contextual profile file"));
33 "ctx-profile-printer-level",
36 "everything",
"print everything - most verbose"),
37 clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::JSON,
"json",
38 "just the json representation of the profile")),
39 cl::desc(
"Verbosity level of the contextual profile printer pass."));
45 Ret[
"Guid"] =
P.guid();
46 Ret[
"Counters"] =
Array(
P.counters());
47 if (
P.callsites().empty())
52 assert(MaxIt != AllCS.end() &&
"We should have a max value because the "
53 "callsites collection is not empty.");
56 for (
auto I = 0U, Max = *MaxIt;
I <= Max; ++
I) {
60 for (
const auto &[
_, Ctx] :
P.callsite(
I))
63 Ret[
"Callsites"] = std::move(CSites);
70 for (
const auto &[
_, Ctx] :
P)
71 Ret.push_back(
toJSON(Ctx));
80 for (
auto &
F : M.functions()) {
81 if (
F.isDeclaration())
88 {ConstantAsMetadata::get(ConstantInt::get(
89 Type::getInt64Ty(M.getContext()), GUID))}));
95 if (
F.isDeclaration()) {
100 assert(MD &&
"guid not found for defined function");
101 return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
103 ->stripPointerCasts())
123 M.getContext().emitError(
"could not open contextual profile file: " +
130 M.getContext().emitError(
"contextual profile file is invalid: " +
136 for (
const auto &
F : M)
137 if (!
F.isDeclaration())
139 MaybeCtx->find(GUID) != MaybeCtx->end())
140 ProfileRootsInModule.
insert(GUID);
144 if (!ProfileRootsInModule.
contains(RootGuid))
145 MaybeCtx->erase(RootGuid);
148 if (MaybeCtx->empty())
154 for (
const auto &
F : M) {
155 if (
F.isDeclaration())
158 assert(GUID &&
"guid not found for defined function");
159 const auto &Entry =
F.begin();
161 for (
const auto &
I : *Entry)
162 if (
auto *
C = dyn_cast<InstrProfIncrementInst>(&
I)) {
164 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
170 for (
const auto &BB :
F)
171 for (
const auto &
I : BB)
172 if (
auto *
C = dyn_cast<InstrProfCallsite>(&
I)) {
174 static_cast<uint32_t>(
C->getNumCounters()->getZExtValue());
177 auto [It, Ins] =
Result.FuncInfo.insert(
178 {GUID, PGOContextualProfile::FunctionInfo(
F.getName())});
181 It->second.NextCallsiteIndex = MaxCallsites;
182 It->second.NextCounterIndex = MaxCounters;
186 Result.Profiles = std::move(*MaybeCtx);
192PGOContextualProfile::getDefinedFunctionGUID(
const Function &
F)
const {
205 OS <<
"No contextual profile was provided.\n";
210 OS <<
"Function Info:\n";
211 for (
const auto &[
Guid, FuncInfo] :
C.FuncInfo)
212 OS <<
Guid <<
" : " << FuncInfo.Name
213 <<
". MaxCounterID: " << FuncInfo.NextCounterIndex
214 <<
". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex <<
"\n";
220 OS <<
"\nCurrent Profile:\n";
221 OS <<
formatv(
"{0:2}", JSONed);
226 OS <<
"\nFlat Profile:\n";
227 auto Flat =
C.flatten();
241 if (
auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
243 assert(!isa<CallBase>(Prev) &&
244 "didn't expect to find another call, that's not the callsite "
245 "instrumentation, before an instrumentable callsite");
252 if (
auto *Incr = dyn_cast<InstrProfIncrementInst>(&
I))
253 if (!isa<InstrProfIncrementInstStep>(&
I))
262 if (
auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
267template <
class ProfilesTy,
class ProfTy>
270 std::function<void(ProfTy &)> Traverser = [&](
auto &Ctx) {
272 for (
auto &[
_, SubCtxSet] : Ctx.callsites())
273 for (
auto &[__, Subctx] : SubCtxSet)
276 for (
auto &[
_,
P] : Profiles)
280void PGOContextualProfile::initIndex() {
284 for (
auto &[
Guid, FI] : FuncInfo)
285 InsertionPoints[
Guid] = &FI.Index;
286 preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
288 auto InsertIt = InsertionPoints.
find(Ctx.
guid());
289 if (InsertIt == InsertionPoints.
end())
295 InsertIt->second->Next = &Ctx;
296 Ctx.Previous = InsertIt->second;
297 InsertIt->second = &Ctx;
304 for (
auto *Node = FuncInfo.find(
G)->second.Index.Next; Node;
315 for (
const auto *Node = FuncInfo.find(
G)->second.Index.Next; Node;
321 assert(Profiles.has_value());
326 auto [It, Ins] = Flat.insert({Ctx.
guid(), {}});
332 "All contexts corresponding to a function should have the exact "
333 "same number of counters.");
334 for (
size_t I = 0, E = It->second.size();
I < E; ++
I)
342 SetVector<std::pair<CallBase *, Function *>> &Candidates) {
347 const uint32_t CallID = Instr->getIndex()->getZExtValue();
353 for (
const auto &[
Guid,
_] : Targets->second)
356 if (
Target->hasFnAttribute(Attribute::AlwaysInline))
357 Candidates.insert({&IC, Target});
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void preorderVisit(ProfilesTy &Profiles, function_ref< void(ProfTy &)> Visitor)
static cl::opt< CtxProfAnalysisPrinterPass::PrintMode > PrintLevel("ctx-profile-printer-level", cl::init(CtxProfAnalysisPrinterPass::PrintMode::JSON), cl::Hidden, cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything, "everything", "print everything - most verbose"), clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::JSON, "json", "just the json representation of the profile")), cl::desc("Verbosity level of the contextual profile printer pass."))
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file supports working with JSON data.
Reader for contextual iFDO profile, which comes in bitstream format.
ModuleAnalysisManager MAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static uint64_t getGUID(const Function &F)
static const char * GUIDMetadataName
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Assign a GUID if one is not already assign, as a function metadata named GUIDMetadataName.
LLVM Basic Block Representation.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCaller()
Helper to get the caller (the parent function).
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
CtxProfAnalysisPrinterPass(raw_ostream &OS)
PGOContextualProfile run(Module &M, ModuleAnalysisManager &MAM)
static InstrProfIncrementInst * getBBInstrumentation(BasicBlock &BB)
Get the instruction instrumenting a BB, or nullptr if not present.
static InstrProfIncrementInstStep * getSelectInstrumentation(SelectInst &SI)
Get the step instrumentation associated with a select
static void collectIndirectCallPromotionList(CallBase &IC, Result &Profile, SetVector< std::pair< CallBase *, Function * > > &Candidates)
static InstrProfCallsite * getCallsiteInstrumentation(CallBase &CB)
Get the instruction instrumenting a callsite, or nullptr if that cannot be found.
PGOContextualProfile Result
CtxProfAnalysis(std::optional< StringRef > Profile=std::nullopt)
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Represents either an error or a value T.
std::error_code getError() const
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
static bool isExternalLinkage(LinkageTypes Linkage)
This represents the llvm.instrprof.callsite intrinsic.
static bool canInstrumentCallsite(const CallBase &CB)
This represents the llvm.instrprof.increment.step intrinsic.
This represents the llvm.instrprof.increment intrinsic.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
The instrumented contextual profile, produced by the CtxProfAnalysis.
void visit(ConstVisitor, const Function *F=nullptr) const
const CtxProfFlatProfile flatten() const
void update(Visitor, const Function &F)
bool isFunctionKnown(const Function &F) const
A node (context) in the loaded contextual profile, suitable for mutation during IPO passes.
GlobalValue::GUID guid() const
const SmallVectorImpl< uint64_t > & counters() const
std::map< GlobalValue::GUID, PGOCtxProfContext > CallTargetMapTy
const CallsiteMapTy & callsites() const
Expected< std::map< GlobalValue::GUID, PGOCtxProfContext > > loadContexts()
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
An Array is a JSON array, which contains heterogeneous JSON values.
void push_back(const Value &E)
An Object is a JSON object, which maps strings to heterogenous JSON values.
A Value is an JSON value of unknown type.
const json::Array * getAsArray() const
This class implements an extremely fast bulk output stream that can only output to a stream.
Pass manager infrastructure for declaring and invalidating analyses.
@ C
The default llvm calling convention, compatible with C.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Value toJSON(const std::optional< T > &Opt)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
std::map< GlobalValue::GUID, SmallVector< uint64_t, 1 > > CtxProfFlatProfile
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
const char * toString(DWARFSectionKind Kind)
Implement std::hash so that hash_code can be used in STL containers.
A special type used by analysis passes to provide an address that identifies that particular analysis...