LLVM 20.0.0git
MIRSampleProfile.cpp
Go to the documentation of this file.
1//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the implementation of the MIRSampleProfile loader, mainly
10// for flow sensitive SampleFDO.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
25#include "llvm/CodeGen/Passes.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/PseudoProbe.h"
30#include "llvm/Support/Debug.h"
35#include <optional>
36
37using namespace llvm;
38using namespace sampleprof;
39using namespace llvm::sampleprofutil;
41
42#define DEBUG_TYPE "fs-profile-loader"
43
45 "show-fs-branchprob", cl::Hidden, cl::init(false),
46 cl::desc("Print setting flow sensitive branch probabilities"));
48 "fs-profile-debug-prob-diff-threshold", cl::init(10),
50 "Only show debug message if the branch probability is greater than "
51 "this value (in percentage)."));
52
54 "fs-profile-debug-bw-threshold", cl::init(10000),
55 cl::desc("Only show debug message if the source branch weight is greater "
56 " than this value."));
57
58static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
59 cl::init(false),
60 cl::desc("View BFI before MIR loader"));
61static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
62 cl::init(false),
63 cl::desc("View BFI after MIR loader"));
64
65namespace llvm {
67}
69
71 "Load MIR Sample Profile",
72 /* cfg = */ false, /* is_analysis = */ false)
79 /* cfg = */ false, /* is_analysis = */ false)
80
82
84llvm::createMIRProfileLoaderPass(std::string File, std::string RemappingFile,
86 IntrusiveRefCntPtr<vfs::FileSystem> FS) {
87 return new MIRProfileLoaderPass(File, RemappingFile, P, std::move(FS));
88}
89
90namespace llvm {
91
92// Internal option used to control BFI display only after MBP pass.
93// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
94// -view-block-layout-with-bfi={none | fraction | integer | count}
96
97// Command line option to specify the name of the function for CFG dump
98// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
100
101std::optional<PseudoProbe> extractProbe(const MachineInstr &MI) {
102 if (MI.isPseudoProbe()) {
103 PseudoProbe Probe;
104 Probe.Id = MI.getOperand(1).getImm();
105 Probe.Type = MI.getOperand(2).getImm();
106 Probe.Attr = MI.getOperand(3).getImm();
107 Probe.Factor = 1;
108 DILocation *DebugLoc = MI.getDebugLoc();
109 Probe.Discriminator = DebugLoc ? DebugLoc->getDiscriminator() : 0;
110 return Probe;
111 }
112
113 // Ignore callsite probes since they do not have FS discriminators.
114 return std::nullopt;
115}
116
117namespace afdo_detail {
118template <> struct IRTraits<MachineBasicBlock> {
134 static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
137 }
139 return BB->predecessors();
140 }
142 return BB->successors();
143 }
144};
145} // namespace afdo_detail
146
148 : public SampleProfileLoaderBaseImpl<MachineFunction> {
149public:
153 DT = MDT;
154 PDT = MPDT;
155 LI = MLI;
156 BFI = MBFI;
157 ORE = MORE;
158 }
160 P = Pass;
163 assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
164 }
165
169 std::move(FS)) {}
170
173 bool doInitialization(Module &M);
174 bool isValid() const { return ProfileIsValid; }
175
176protected:
178
179 /// Hold the information of the basic block frequency.
181
182 /// PassNum is the sequence number this pass is called, start from 1.
184
185 // LowBit in the FS discriminator used by this instance. Note the number is
186 // 0-based. Base discrimnator use bit 0 to bit 11.
187 unsigned LowBit;
188 // HighwBit in the FS discriminator used by this instance. Note the number
189 // is 0-based.
190 unsigned HighBit;
191
192 bool ProfileIsValid = true;
195 return getProbeWeight(MI);
196 if (ImprovedFSDiscriminator && MI.isMetaInstruction())
197 return std::error_code();
198 return getInstWeightImpl(MI);
199 }
200};
201
202template <>
204 MachineFunction &F) {}
205
207 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
208 for (auto &BI : F) {
209 MachineBasicBlock *BB = &BI;
210 if (BB->succ_size() < 2)
211 continue;
212 const MachineBasicBlock *EC = EquivalenceClass[BB];
213 uint64_t BBWeight = BlockWeights[EC];
214 uint64_t SumEdgeWeight = 0;
215 for (MachineBasicBlock *Succ : BB->successors()) {
216 Edge E = std::make_pair(BB, Succ);
217 SumEdgeWeight += EdgeWeights[E];
218 }
219
220 if (BBWeight != SumEdgeWeight) {
221 LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
222 << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
223 << "\n");
224 BBWeight = SumEdgeWeight;
225 }
226 if (BBWeight == 0) {
227 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
228 continue;
229 }
230
231#ifndef NDEBUG
232 uint64_t BBWeightOrig = BBWeight;
233#endif
234 uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
235 uint32_t Factor = 1;
236 if (BBWeight > MaxWeight) {
237 Factor = BBWeight / MaxWeight + 1;
238 BBWeight /= Factor;
239 LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
240 }
241
243 SE = BB->succ_end();
244 SI != SE; ++SI) {
245 MachineBasicBlock *Succ = *SI;
246 Edge E = std::make_pair(BB, Succ);
247 uint64_t EdgeWeight = EdgeWeights[E];
248 EdgeWeight /= Factor;
249
250 assert(BBWeight >= EdgeWeight &&
251 "BBweight is larger than EdgeWeight -- should not happen.\n");
252
253 BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
254 BranchProbability NewProb(EdgeWeight, BBWeight);
255 if (OldProb == NewProb)
256 continue;
257 BB->setSuccProbability(SI, NewProb);
258#ifndef NDEBUG
259 if (!ShowFSBranchProb)
260 continue;
261 bool Show = false;
263 if (OldProb > NewProb)
264 Diff = OldProb - NewProb;
265 else
266 Diff = NewProb - OldProb;
268 Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
269
270 auto DIL = BB->findBranchDebugLoc();
271 auto SuccDIL = Succ->findBranchDebugLoc();
272 if (Show) {
273 dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
274 << Succ->getNumber() << "): ";
275 if (DIL)
276 dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
277 << DIL->getColumn();
278 if (SuccDIL)
279 dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
280 << ":" << SuccDIL->getColumn();
281 dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb
282 << "\n";
283 }
284#endif
285 }
286 }
287}
288
290 auto &Ctx = M.getContext();
291
293 Filename, Ctx, *FS, P, RemappingFilename);
294 if (std::error_code EC = ReaderOrErr.getError()) {
295 std::string Msg = "Could not open profile: " + EC.message();
296 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
297 return false;
298 }
299
300 Reader = std::move(ReaderOrErr.get());
301 Reader->setModule(&M);
303
304 // Load pseudo probe descriptors for probe-based function samples.
305 if (Reader->profileIsProbeBased()) {
306 ProbeManager = std::make_unique<PseudoProbeManager>(M);
307 if (!ProbeManager->moduleIsProbed(M)) {
308 return false;
309 }
310 }
311
312 return true;
313}
314
316 // Do not load non-FS profiles. A line or probe can get a zero-valued
317 // discriminator at certain pass which could result in accidentally loading
318 // the corresponding base counter in the non-FS profile, while a non-zero
319 // discriminator would end up getting zero samples. This could in turn undo
320 // the sample distribution effort done by previous BFI maintenance and the
321 // probe distribution factor work for pseudo probes.
322 if (!Reader->profileIsFS())
323 return false;
324
325 Function &Func = MF.getFunction();
326 clearFunctionData(false);
327 Samples = Reader->getSamplesFor(Func);
328 if (!Samples || Samples->empty())
329 return false;
330
332 if (!ProbeManager->profileIsValid(MF.getFunction(), *Samples))
333 return false;
334 } else {
335 if (getFunctionLoc(MF) == 0)
336 return false;
337 }
338
339 DenseSet<GlobalValue::GUID> InlinedGUIDs;
340 bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
341
342 // Set the new BPI, BFI.
343 setBranchProbs(MF);
344
345 return Changed;
346}
347
348} // namespace llvm
349
351 std::string FileName, std::string RemappingFileName, FSDiscriminatorPass P,
353 : MachineFunctionPass(ID), ProfileFileName(FileName), P(P) {
354 LowBit = getFSPassBitBegin(P);
355 HighBit = getFSPassBitEnd(P);
356
357 auto VFS = FS ? std::move(FS) : vfs::getRealFileSystem();
358 MIRSampleLoader = std::make_unique<MIRProfileLoader>(
359 FileName, RemappingFileName, std::move(VFS));
360 assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
361}
362
363bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
364 if (!MIRSampleLoader->isValid())
365 return false;
366
367 LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
368 << MF.getFunction().getName() << "\n");
369 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
370 auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
371 auto *MPDT =
372 &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
373
374 MF.RenumberBlocks();
375 MDT->updateBlockNumbers();
376 MPDT->updateBlockNumbers();
377
378 MIRSampleLoader->setInitVals(
379 MDT, MPDT, &getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI,
380 &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
381
383 (ViewBlockFreqFuncName.empty() ||
385 MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
386 }
387
388 bool Changed = MIRSampleLoader->runOnFunction(MF);
389 if (Changed)
390 MBFI->calculate(MF, *MBFI->getMBPI(),
391 *&getAnalysis<MachineLoopInfoWrapperPass>().getLI());
392
394 (ViewBlockFreqFuncName.empty() ||
396 MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
397 }
398
399 return Changed;
400}
401
402bool MIRProfileLoaderPass::doInitialization(Module &M) {
403 LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
404 << "\n");
405
406 MIRSampleLoader->setFSPass(P);
407 return MIRSampleLoader->doInitialization(M);
408}
409
410void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
411 AU.setPreservesAll();
418}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
Load MIR Sample Profile
static cl::opt< bool > ShowFSBranchProb("show-fs-branchprob", cl::Hidden, cl::init(false), cl::desc("Print setting flow sensitive branch probabilities"))
static cl::opt< bool > ViewBFIAfter("fs-viewbfi-after", cl::Hidden, cl::init(false), cl::desc("View BFI after MIR loader"))
static cl::opt< unsigned > FSProfileDebugBWThreshold("fs-profile-debug-bw-threshold", cl::init(10000), cl::desc("Only show debug message if the source branch weight is greater " " than this value."))
static cl::opt< unsigned > FSProfileDebugProbDiffThreshold("fs-profile-debug-prob-diff-threshold", cl::init(10), cl::desc("Only show debug message if the branch probability is greater than " "this value (in percentage)."))
#define DEBUG_TYPE
static cl::opt< bool > ViewBFIBefore("fs-viewbfi-before", cl::Hidden, cl::init(false), cl::desc("View BFI before MIR loader"))
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
Defines the virtual file system interface vfs::FileSystem.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
Debug location.
A debug info location.
Definition: DebugLoc.h:33
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for the sample profiler.
Represents either an error or a value T.
Definition: ErrorOr.h:56
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
Class to represent profile counts.
Definition: Function.h:292
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
MIRProfileLoaderPass(std::string FileName="", std::string RemappingFileName="", FSDiscriminatorPass P=FSDiscriminatorPass::Pass1, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
FS bits will only use the '1' bits in the Mask.
MIRProfileLoader(StringRef Name, StringRef RemapName, IntrusiveRefCntPtr< vfs::FileSystem > FS)
void setBranchProbs(MachineFunction &F)
ErrorOr< uint64_t > getInstWeight(const MachineInstr &MI) override
bool runOnFunction(MachineFunction &F)
MachineBlockFrequencyInfo * BFI
Hold the information of the basic block frequency.
FSDiscriminatorPass P
PassNum is the sequence number this pass is called, start from 1.
bool doInitialization(Module &M)
void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT, MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI, MachineOptimizationRemarkEmitter *MORE)
void setFSPass(FSDiscriminatorPass Pass)
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
unsigned succ_size() const
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
void view(const Twine &Name, bool isSimple=true) const
Pop up a ghostview window with the current block frequency propagation rendered using dot.
const MachineBranchProbabilityInfo * getMBPI() const
void calculate(const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI, const MachineLoopInfo &MLI)
calculate - compute block frequency info for the given function.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
Representation of each machine instruction.
Definition: MachineInstr.h:69
Diagnostic information for optimization analysis remarks.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
void computeDominanceAndLoopInfo(FunctionT &F)
IntrusiveRefCntPtr< vfs::FileSystem > FS
VirtualFileSystem to load profile files from.
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
OptRemarkEmitterT * ORE
Optimization Remark Emitter used to emit diagnostic remarks.
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
std::string Filename
Name of the profile file to load.
virtual ErrorOr< uint64_t > getProbeWeight(const InstructionT &Inst)
std::string RemappingFilename
Name of the profile remapping file to load.
FunctionSamples * Samples
Samples collected for the body of this function.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
A range adaptor for a pair of iterators.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static unsigned getFSPassBitBegin(sampleprof::FSDiscriminatorPass P)
Definition: Discriminator.h:94
char & MIRProfileLoaderPassID
This pass reads flow sensitive profile.
static unsigned getFSPassBitEnd(sampleprof::FSDiscriminatorPass P)
Definition: Discriminator.h:87
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
cl::opt< bool > ImprovedFSDiscriminator("improved-fs-discriminator", cl::Hidden, cl::init(false), cl::desc("New FS discriminators encoding (incompatible with the original " "encoding)"))
FunctionPass * createMIRProfileLoaderPass(std::string File, std::string RemappingFile, sampleprof::FSDiscriminatorPass P, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Read Flow Sensitive Profile.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define MORE()
Definition: regcomp.c:252
uint32_t Discriminator
Definition: PseudoProbe.h:121
static PredRangeT getPredecessors(MachineBasicBlock *BB)
static SuccRangeT getSuccessors(MachineBasicBlock *BB)
static const MachineBasicBlock * getEntryBB(const MachineFunction *F)
static Function & getFunction(MachineFunction &F)