Go to the documentation of this file.
34 using namespace sampleprof;
38 #define DEBUG_TYPE "fs-profile-loader"
42 cl::desc(
"Print setting flow sensitive branch probabilities"));
44 "fs-profile-debug-prob-diff-threshold",
cl::init(10),
45 cl::desc(
"Only show debug message if the branch probility is greater than "
46 "this value (in percentage)."));
49 "fs-profile-debug-bw-threshold",
cl::init(10000),
50 cl::desc(
"Only show debug message if the source branch weight is greater "
51 " than this value."));
55 cl::desc(
"View BFI before MIR loader"));
58 cl::desc(
"View BFI after MIR loader"));
63 "Load MIR Sample Profile",
76 std::
string RemappingFile,
92 namespace afdo_detail {
112 return BB->predecessors();
115 return BB->successors();
136 assert(LowBit < HighBit &&
"HighBit needs to be greater than Lowbit");
145 bool doInitialization(
Module &M);
146 bool isValid()
const {
return ProfileIsValid; }
164 bool ProfileIsValid =
true;
172 LLVM_DEBUG(
dbgs() <<
"\nPropagation complete. Setting branch probs\n");
175 if (
BB->succ_size() < 2)
178 uint64_t BBWeight = BlockWeights[EC];
181 Edge E = std::make_pair(
BB, Succ);
182 SumEdgeWeight += EdgeWeights[
E];
185 if (BBWeight != SumEdgeWeight) {
186 LLVM_DEBUG(
dbgs() <<
"BBweight is not equal to SumEdgeWeight: BBWWeight="
187 << BBWeight <<
" SumEdgeWeight= " << SumEdgeWeight
189 BBWeight = SumEdgeWeight;
201 if (BBWeight > MaxWeight) {
202 Factor = BBWeight / MaxWeight + 1;
211 Edge E = std::make_pair(
BB, Succ);
213 EdgeWeight /= Factor;
215 assert(BBWeight >= EdgeWeight &&
216 "BBweight is larger than EdgeWeight -- should not happen.\n");
220 if (OldProb == NewProb)
222 BB->setSuccProbability(
SI, NewProb);
228 if (OldProb > NewProb)
229 Diff = OldProb - NewProb;
231 Diff = NewProb - OldProb;
235 auto DIL =
BB->findBranchDebugLoc();
238 dbgs() <<
"Set branch fs prob: MBB (" <<
BB->getNumber() <<
" -> "
241 dbgs() << DIL->getFilename() <<
":" << DIL->getLine() <<
":"
244 dbgs() <<
"-->" << SuccDIL->getFilename() <<
":" << SuccDIL->getLine()
245 <<
":" << SuccDIL->getColumn();
246 dbgs() <<
" W=" << BBWeightOrig <<
" " << OldProb <<
" --> " << NewProb
255 auto &Ctx =
M.getContext();
259 if (std::error_code EC = ReaderOrErr.getError()) {
260 std::string
Msg =
"Could not open profile: " + EC.message();
266 Reader->setModule(&
M);
268 Reader->getSummary();
275 clearFunctionData(
false);
276 Samples = Reader->getSamplesFor(Func);
277 if (!Samples || Samples->empty())
280 if (getFunctionLoc(MF) == 0)
284 bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
295 std::string RemappingFileName,
302 assert(LowBit < HighBit &&
"HighBit needs to be greater than Lowbit");
306 if (!MIRSampleLoader->isValid())
311 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
312 MIRSampleLoader->setInitVals(
313 &getAnalysis<MachineDominatorTree>(),
314 &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
315 MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
321 MBFI->
view(
"MIR_Prof_loader_b." + MF.
getName(),
false);
324 bool Changed = MIRSampleLoader->runOnFunction(MF);
331 MBFI->
view(
"MIR_prof_loader_a." + MF.
getName(),
false);
337 bool MIRProfileLoaderPass::doInitialization(
Module &
M) {
338 LLVM_DEBUG(
dbgs() <<
"MIRProfileLoader pass working on Module " <<
M.getName()
341 MIRSampleLoader->setFSPass(
P);
342 return MIRSampleLoader->doInitialization(
M);
345 void MIRProfileLoaderPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
static unsigned getFSPassBitBegin(FSDiscriminatorPass P)
Function::ProfileCount ProfileCount
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
print lazy value Lazy Value Info Printer Pass
static Function & getFunction(MachineFunction &F)
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void view(const Twine &Name, bool isSimple=true) const
Pop up a ghostview window with the current block frequency propagation rendered using dot.
static const MachineBasicBlock * getEntryBB(const MachineFunction *F)
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
MachineBlockFrequencyInfo * BFI
Hold the information of the basic block frequency.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", false, false) INITIALIZE_PASS_END(MIRProfileLoaderPass
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void setFSPass(FSDiscriminatorPass Pass)
Represent the analysis usage information of a pass.
void calculate(const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI, const MachineLoopInfo &MLI)
calculate - compute block frequency info for the given function.
MIRProfileLoaderPass(std::string FileName="", std::string RemappingFileName="", FSDiscriminatorPass P=FSDiscriminatorPass::Pass1)
FS bits will only use the '1' bits in the Mask.
FunctionPass * createMIRProfileLoaderPass(std::string File, std::string RemappingFile, sampleprof::FSDiscriminatorPass P)
Read Flow Sensitive Profile.
static PredRangeT getPredecessors(MachineBasicBlock *BB)
static cl::opt< unsigned > FSProfileDebugBWThreshold("fs-profile-debug-bw-threshold", cl::init(10000), cl::desc("Only show debug message if the source branch weight is greater " " than this value."))
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Implements a dense probed hash-table based set.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT, MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI, MachineOptimizationRemarkEmitter *MORE)
char & MIRProfileLoaderPassID
This pass reads flow sensitive profile.
Representation of each machine instruction.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
initializer< Ty > init(const Ty &Val)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool doInitialization(Module &M)
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
A Module instance is used to store all the information related to an LLVM module.
bool runOnFunction(MachineFunction &F)
Diagnostic information for the sample profiler.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
StringRef - Represent a constant reference to a string, i.e.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
StringRef getName() const
Return a constant reference to the value's name.
static cl::opt< unsigned > FSProfileDebugProbDiffThreshold("fs-profile-debug-prob-diff-threshold", cl::init(10), cl::desc("Only show debug message if the branch probility is greater than " "this value (in percentage)."))
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
const CustomOperand< const MCSubtargetInfo & > Msg[]
FSDiscriminatorPass P
PassNum is the sequence number this pass is called, start from 1.
static bool runOnFunction(Function &F, bool PostInlining)
Function & getFunction()
Return the LLVM function that this machine code represents.
void setPreservesAll()
Set by analyses that do not transform their input at all.
static SuccRangeT getSuccessors(MachineBasicBlock *BB)
void setBranchProbs(MachineFunction &F)
static cl::opt< bool > ViewBFIBefore("fs-viewbfi-before", cl::Hidden, cl::init(false), cl::desc("View BFI before MIR loader"))
Pass interface - Implemented by all 'passes'.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
const MachineBranchProbabilityInfo * getMBPI() const
A range adaptor for a pair of iterators.
AnalysisUsage & addRequiredTransitive()
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
static unsigned getFSPassBitEnd(FSDiscriminatorPass P)
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
AnalysisUsage & addRequired()
Class to represent profile counts.
static cl::opt< bool > ViewBFIAfter("fs-viewbfi-after", cl::Hidden, cl::init(false), cl::desc("View BFI after MIR loader"))
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MIRProfileLoader(StringRef Name, StringRef RemapName)
static cl::opt< bool > ShowFSBranchProb("show-fs-branchprob", cl::Hidden, cl::init(false), cl::desc("Print setting flow sensitive branch probabilities"))
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.