LLVM  3.7.0
SampleProfile.cpp
Go to the documentation of this file.
1 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SampleProfileLoader transformation. This pass
11 // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
12 // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
13 // profile information in the given profile.
14 //
15 // This pass generates branch weight annotations on the IR:
16 //
17 // - prof: Represents branch weights. This annotation is added to branches
18 // to indicate the weights of each edge coming out of the branch.
19 // The weight of each edge is the weight of the target block for
20 // that edge. The weight of a block B is computed as the maximum
21 // number of samples found in B.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #include "llvm/Transforms/Scalar.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfo.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/InstIterator.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/LLVMContext.h"
40 #include "llvm/IR/MDBuilder.h"
41 #include "llvm/IR/Metadata.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/Pass.h"
46 #include "llvm/Support/Debug.h"
48 #include <cctype>
49 
50 using namespace llvm;
51 using namespace sampleprof;
52 
53 #define DEBUG_TYPE "sample-profile"
54 
55 // Command line option to specify the file to read samples from. This is
56 // mainly used for debugging.
58  "sample-profile-file", cl::init(""), cl::value_desc("filename"),
59  cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
61  "sample-profile-max-propagate-iterations", cl::init(100),
62  cl::desc("Maximum number of iterations to go through when propagating "
63  "sample block/edge weights through the CFG."));
64 
65 namespace {
66 typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
67 typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
68 typedef std::pair<BasicBlock *, BasicBlock *> Edge;
69 typedef DenseMap<Edge, unsigned> EdgeWeightMap;
71 
72 /// \brief Sample profile pass.
73 ///
74 /// This pass reads profile data from the file specified by
75 /// -sample-profile-file and annotates every affected function with the
76 /// profile information found in that file.
77 class SampleProfileLoader : public FunctionPass {
78 public:
79  // Class identification, replacement for typeinfo
80  static char ID;
81 
82  SampleProfileLoader(StringRef Name = SampleProfileFile)
83  : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
84  Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
86  }
87 
88  bool doInitialization(Module &M) override;
89 
90  void dump() { Reader->dump(); }
91 
92  const char *getPassName() const override { return "Sample profile pass"; }
93 
94  bool runOnFunction(Function &F) override;
95 
96  void getAnalysisUsage(AnalysisUsage &AU) const override {
97  AU.setPreservesCFG();
101  }
102 
103 protected:
104  unsigned getFunctionLoc(Function &F);
105  bool emitAnnotations(Function &F);
106  unsigned getInstWeight(Instruction &I);
107  unsigned getBlockWeight(BasicBlock *BB);
108  void printEdgeWeight(raw_ostream &OS, Edge E);
109  void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
110  void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
111  bool computeBlockWeights(Function &F);
112  void findEquivalenceClasses(Function &F);
113  void findEquivalencesFor(BasicBlock *BB1,
114  SmallVector<BasicBlock *, 8> Descendants,
116  void propagateWeights(Function &F);
117  unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
118  void buildEdges(Function &F);
119  bool propagateThroughEdges(Function &F);
120 
121  /// \brief Line number for the function header. Used to compute absolute
122  /// line numbers from the relative line numbers found in the profile.
123  unsigned HeaderLineno;
124 
125  /// \brief Map basic blocks to their computed weights.
126  ///
127  /// The weight of a basic block is defined to be the maximum
128  /// of all the instruction weights in that block.
129  BlockWeightMap BlockWeights;
130 
131  /// \brief Map edges to their computed weights.
132  ///
133  /// Edge weights are computed by propagating basic block weights in
134  /// SampleProfile::propagateWeights.
135  EdgeWeightMap EdgeWeights;
136 
137  /// \brief Set of visited blocks during propagation.
138  SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
139 
140  /// \brief Set of visited edges during propagation.
141  SmallSet<Edge, 128> VisitedEdges;
142 
143  /// \brief Equivalence classes for block weights.
144  ///
145  /// Two blocks BB1 and BB2 are in the same equivalence class if they
146  /// dominate and post-dominate each other, and they are in the same loop
147  /// nest. When this happens, the two blocks are guaranteed to execute
148  /// the same number of times.
149  EquivalenceClassMap EquivalenceClass;
150 
151  /// \brief Dominance, post-dominance and loop information.
152  DominatorTree *DT;
153  PostDominatorTree *PDT;
154  LoopInfo *LI;
155 
156  /// \brief Predecessors for each basic block in the CFG.
157  BlockEdgeMap Predecessors;
158 
159  /// \brief Successors for each basic block in the CFG.
160  BlockEdgeMap Successors;
161 
162  /// \brief LLVM context holding the debug data we need.
163  LLVMContext *Ctx;
164 
165  /// \brief Profile reader object.
166  std::unique_ptr<SampleProfileReader> Reader;
167 
168  /// \brief Samples collected for the body of this function.
169  FunctionSamples *Samples;
170 
171  /// \brief Name of the profile file to load.
172  StringRef Filename;
173 
174  /// \brief Flag indicating whether the profile input loaded successfully.
175  bool ProfileIsValid;
176 };
177 }
178 
179 /// \brief Print the weight of edge \p E on stream \p OS.
180 ///
181 /// \param OS Stream to emit the output to.
182 /// \param E Edge to print.
183 void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
184  OS << "weight[" << E.first->getName() << "->" << E.second->getName()
185  << "]: " << EdgeWeights[E] << "\n";
186 }
187 
188 /// \brief Print the equivalence class of block \p BB on stream \p OS.
189 ///
190 /// \param OS Stream to emit the output to.
191 /// \param BB Block to print.
192 void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
193  BasicBlock *BB) {
194  BasicBlock *Equiv = EquivalenceClass[BB];
195  OS << "equivalence[" << BB->getName()
196  << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
197 }
198 
199 /// \brief Print the weight of block \p BB on stream \p OS.
200 ///
201 /// \param OS Stream to emit the output to.
202 /// \param BB Block to print.
203 void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
204  OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
205 }
206 
207 /// \brief Get the weight for an instruction.
208 ///
209 /// The "weight" of an instruction \p Inst is the number of samples
210 /// collected on that instruction at runtime. To retrieve it, we
211 /// need to compute the line number of \p Inst relative to the start of its
212 /// function. We use HeaderLineno to compute the offset. We then
213 /// look up the samples collected for \p Inst using BodySamples.
214 ///
215 /// \param Inst Instruction to query.
216 ///
217 /// \returns The profiled weight of I.
218 unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
219  DebugLoc DLoc = Inst.getDebugLoc();
220  if (!DLoc)
221  return 0;
222 
223  unsigned Lineno = DLoc.getLine();
224  if (Lineno < HeaderLineno)
225  return 0;
226 
227  const DILocation *DIL = DLoc;
228  int LOffset = Lineno - HeaderLineno;
229  unsigned Discriminator = DIL->getDiscriminator();
230  unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
231  DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
232  << " (line offset: " << LOffset << "." << Discriminator
233  << " - weight: " << Weight << ")\n");
234  return Weight;
235 }
236 
237 /// \brief Compute the weight of a basic block.
238 ///
239 /// The weight of basic block \p BB is the maximum weight of all the
240 /// instructions in BB. The weight of \p BB is computed and cached in
241 /// the BlockWeights map.
242 ///
243 /// \param BB The basic block to query.
244 ///
245 /// \returns The computed weight of BB.
246 unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
247  // If we've computed BB's weight before, return it.
248  std::pair<BlockWeightMap::iterator, bool> Entry =
249  BlockWeights.insert(std::make_pair(BB, 0));
250  if (!Entry.second)
251  return Entry.first->second;
252 
253  // Otherwise, compute and cache BB's weight.
254  unsigned Weight = 0;
255  for (auto &I : BB->getInstList()) {
256  unsigned InstWeight = getInstWeight(I);
257  if (InstWeight > Weight)
258  Weight = InstWeight;
259  }
260  Entry.first->second = Weight;
261  return Weight;
262 }
263 
264 /// \brief Compute and store the weights of every basic block.
265 ///
266 /// This populates the BlockWeights map by computing
267 /// the weights of every basic block in the CFG.
268 ///
269 /// \param F The function to query.
270 bool SampleProfileLoader::computeBlockWeights(Function &F) {
271  bool Changed = false;
272  DEBUG(dbgs() << "Block weights\n");
273  for (auto &BB : F) {
274  unsigned Weight = getBlockWeight(&BB);
275  Changed |= (Weight > 0);
276  DEBUG(printBlockWeight(dbgs(), &BB));
277  }
278 
279  return Changed;
280 }
281 
282 /// \brief Find equivalence classes for the given block.
283 ///
284 /// This finds all the blocks that are guaranteed to execute the same
285 /// number of times as \p BB1. To do this, it traverses all the
286 /// descendants of \p BB1 in the dominator or post-dominator tree.
287 ///
288 /// A block BB2 will be in the same equivalence class as \p BB1 if
289 /// the following holds:
290 ///
291 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
292 /// is a descendant of \p BB1 in the dominator tree, then BB2 should
293 /// dominate BB1 in the post-dominator tree.
294 ///
295 /// 2- Both BB2 and \p BB1 must be in the same loop.
296 ///
297 /// For every block BB2 that meets those two requirements, we set BB2's
298 /// equivalence class to \p BB1.
299 ///
300 /// \param BB1 Block to check.
301 /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
302 /// \param DomTree Opposite dominator tree. If \p Descendants is filled
303 /// with blocks from \p BB1's dominator tree, then
304 /// this is the post-dominator tree, and vice versa.
305 void SampleProfileLoader::findEquivalencesFor(
306  BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
308  for (auto *BB2 : Descendants) {
309  bool IsDomParent = DomTree->dominates(BB2, BB1);
310  bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
311  if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
312  IsInSameLoop) {
313  EquivalenceClass[BB2] = BB1;
314 
315  // If BB2 is heavier than BB1, make BB2 have the same weight
316  // as BB1.
317  //
318  // Note that we don't worry about the opposite situation here
319  // (when BB2 is lighter than BB1). We will deal with this
320  // during the propagation phase. Right now, we just want to
321  // make sure that BB1 has the largest weight of all the
322  // members of its equivalence set.
323  unsigned &BB1Weight = BlockWeights[BB1];
324  unsigned &BB2Weight = BlockWeights[BB2];
325  BB1Weight = std::max(BB1Weight, BB2Weight);
326  }
327  }
328 }
329 
330 /// \brief Find equivalence classes.
331 ///
332 /// Since samples may be missing from blocks, we can fill in the gaps by setting
333 /// the weights of all the blocks in the same equivalence class to the same
334 /// weight. To compute the concept of equivalence, we use dominance and loop
335 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
336 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
337 ///
338 /// \param F The function to query.
339 void SampleProfileLoader::findEquivalenceClasses(Function &F) {
340  SmallVector<BasicBlock *, 8> DominatedBBs;
341  DEBUG(dbgs() << "\nBlock equivalence classes\n");
342  // Find equivalence sets based on dominance and post-dominance information.
343  for (auto &BB : F) {
344  BasicBlock *BB1 = &BB;
345 
346  // Compute BB1's equivalence class once.
347  if (EquivalenceClass.count(BB1)) {
348  DEBUG(printBlockEquivalence(dbgs(), BB1));
349  continue;
350  }
351 
352  // By default, blocks are in their own equivalence class.
353  EquivalenceClass[BB1] = BB1;
354 
355  // Traverse all the blocks dominated by BB1. We are looking for
356  // every basic block BB2 such that:
357  //
358  // 1- BB1 dominates BB2.
359  // 2- BB2 post-dominates BB1.
360  // 3- BB1 and BB2 are in the same loop nest.
361  //
362  // If all those conditions hold, it means that BB2 is executed
363  // as many times as BB1, so they are placed in the same equivalence
364  // class by making BB2's equivalence class be BB1.
365  DominatedBBs.clear();
366  DT->getDescendants(BB1, DominatedBBs);
367  findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
368 
369  // Repeat the same logic for all the blocks post-dominated by BB1.
370  // We are looking for every basic block BB2 such that:
371  //
372  // 1- BB1 post-dominates BB2.
373  // 2- BB2 dominates BB1.
374  // 3- BB1 and BB2 are in the same loop nest.
375  //
376  // If all those conditions hold, BB2's equivalence class is BB1.
377  DominatedBBs.clear();
378  PDT->getDescendants(BB1, DominatedBBs);
379  findEquivalencesFor(BB1, DominatedBBs, DT);
380 
381  DEBUG(printBlockEquivalence(dbgs(), BB1));
382  }
383 
384  // Assign weights to equivalence classes.
385  //
386  // All the basic blocks in the same equivalence class will execute
387  // the same number of times. Since we know that the head block in
388  // each equivalence class has the largest weight, assign that weight
389  // to all the blocks in that equivalence class.
390  DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
391  for (auto &BI : F) {
392  BasicBlock *BB = &BI;
393  BasicBlock *EquivBB = EquivalenceClass[BB];
394  if (BB != EquivBB)
395  BlockWeights[BB] = BlockWeights[EquivBB];
396  DEBUG(printBlockWeight(dbgs(), BB));
397  }
398 }
399 
400 /// \brief Visit the given edge to decide if it has a valid weight.
401 ///
402 /// If \p E has not been visited before, we copy to \p UnknownEdge
403 /// and increment the count of unknown edges.
404 ///
405 /// \param E Edge to visit.
406 /// \param NumUnknownEdges Current number of unknown edges.
407 /// \param UnknownEdge Set if E has not been visited before.
408 ///
409 /// \returns E's weight, if known. Otherwise, return 0.
410 unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
411  Edge *UnknownEdge) {
412  if (!VisitedEdges.count(E)) {
413  (*NumUnknownEdges)++;
414  *UnknownEdge = E;
415  return 0;
416  }
417 
418  return EdgeWeights[E];
419 }
420 
421 /// \brief Propagate weights through incoming/outgoing edges.
422 ///
423 /// If the weight of a basic block is known, and there is only one edge
424 /// with an unknown weight, we can calculate the weight of that edge.
425 ///
426 /// Similarly, if all the edges have a known count, we can calculate the
427 /// count of the basic block, if needed.
428 ///
429 /// \param F Function to process.
430 ///
431 /// \returns True if new weights were assigned to edges or blocks.
432 bool SampleProfileLoader::propagateThroughEdges(Function &F) {
433  bool Changed = false;
434  DEBUG(dbgs() << "\nPropagation through edges\n");
435  for (auto &BI : F) {
436  BasicBlock *BB = &BI;
437 
438  // Visit all the predecessor and successor edges to determine
439  // which ones have a weight assigned already. Note that it doesn't
440  // matter that we only keep track of a single unknown edge. The
441  // only case we are interested in handling is when only a single
442  // edge is unknown (see setEdgeOrBlockWeight).
443  for (unsigned i = 0; i < 2; i++) {
444  unsigned TotalWeight = 0;
445  unsigned NumUnknownEdges = 0;
446  Edge UnknownEdge, SelfReferentialEdge;
447 
448  if (i == 0) {
449  // First, visit all predecessor edges.
450  for (auto *Pred : Predecessors[BB]) {
451  Edge E = std::make_pair(Pred, BB);
452  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
453  if (E.first == E.second)
454  SelfReferentialEdge = E;
455  }
456  } else {
457  // On the second round, visit all successor edges.
458  for (auto *Succ : Successors[BB]) {
459  Edge E = std::make_pair(BB, Succ);
460  TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
461  }
462  }
463 
464  // After visiting all the edges, there are three cases that we
465  // can handle immediately:
466  //
467  // - All the edge weights are known (i.e., NumUnknownEdges == 0).
468  // In this case, we simply check that the sum of all the edges
469  // is the same as BB's weight. If not, we change BB's weight
470  // to match. Additionally, if BB had not been visited before,
471  // we mark it visited.
472  //
473  // - Only one edge is unknown and BB has already been visited.
474  // In this case, we can compute the weight of the edge by
475  // subtracting the total block weight from all the known
476  // edge weights. If the edges weight more than BB, then the
477  // edge of the last remaining edge is set to zero.
478  //
479  // - There exists a self-referential edge and the weight of BB is
480  // known. In this case, this edge can be based on BB's weight.
481  // We add up all the other known edges and set the weight on
482  // the self-referential edge as we did in the previous case.
483  //
484  // In any other case, we must continue iterating. Eventually,
485  // all edges will get a weight, or iteration will stop when
486  // it reaches SampleProfileMaxPropagateIterations.
487  if (NumUnknownEdges <= 1) {
488  unsigned &BBWeight = BlockWeights[BB];
489  if (NumUnknownEdges == 0) {
490  // If we already know the weight of all edges, the weight of the
491  // basic block can be computed. It should be no larger than the sum
492  // of all edge weights.
493  if (TotalWeight > BBWeight) {
494  BBWeight = TotalWeight;
495  Changed = true;
496  DEBUG(dbgs() << "All edge weights for " << BB->getName()
497  << " known. Set weight for block: ";
498  printBlockWeight(dbgs(), BB););
499  }
500  if (VisitedBlocks.insert(BB).second)
501  Changed = true;
502  } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
503  // If there is a single unknown edge and the block has been
504  // visited, then we can compute E's weight.
505  if (BBWeight >= TotalWeight)
506  EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
507  else
508  EdgeWeights[UnknownEdge] = 0;
509  VisitedEdges.insert(UnknownEdge);
510  Changed = true;
511  DEBUG(dbgs() << "Set weight for edge: ";
512  printEdgeWeight(dbgs(), UnknownEdge));
513  }
514  } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
515  unsigned &BBWeight = BlockWeights[BB];
516  // We have a self-referential edge and the weight of BB is known.
517  if (BBWeight >= TotalWeight)
518  EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
519  else
520  EdgeWeights[SelfReferentialEdge] = 0;
521  VisitedEdges.insert(SelfReferentialEdge);
522  Changed = true;
523  DEBUG(dbgs() << "Set self-referential edge weight to: ";
524  printEdgeWeight(dbgs(), SelfReferentialEdge));
525  }
526  }
527  }
528 
529  return Changed;
530 }
531 
532 /// \brief Build in/out edge lists for each basic block in the CFG.
533 ///
534 /// We are interested in unique edges. If a block B1 has multiple
535 /// edges to another block B2, we only add a single B1->B2 edge.
536 void SampleProfileLoader::buildEdges(Function &F) {
537  for (auto &BI : F) {
538  BasicBlock *B1 = &BI;
539 
540  // Add predecessors for B1.
542  if (!Predecessors[B1].empty())
543  llvm_unreachable("Found a stale predecessors list in a basic block.");
544  for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
545  BasicBlock *B2 = *PI;
546  if (Visited.insert(B2).second)
547  Predecessors[B1].push_back(B2);
548  }
549 
550  // Add successors for B1.
551  Visited.clear();
552  if (!Successors[B1].empty())
553  llvm_unreachable("Found a stale successors list in a basic block.");
554  for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
555  BasicBlock *B2 = *SI;
556  if (Visited.insert(B2).second)
557  Successors[B1].push_back(B2);
558  }
559  }
560 }
561 
562 /// \brief Propagate weights into edges
563 ///
564 /// The following rules are applied to every block BB in the CFG:
565 ///
566 /// - If BB has a single predecessor/successor, then the weight
567 /// of that edge is the weight of the block.
568 ///
569 /// - If all incoming or outgoing edges are known except one, and the
570 /// weight of the block is already known, the weight of the unknown
571 /// edge will be the weight of the block minus the sum of all the known
572 /// edges. If the sum of all the known edges is larger than BB's weight,
573 /// we set the unknown edge weight to zero.
574 ///
575 /// - If there is a self-referential edge, and the weight of the block is
576 /// known, the weight for that edge is set to the weight of the block
577 /// minus the weight of the other incoming edges to that block (if
578 /// known).
579 void SampleProfileLoader::propagateWeights(Function &F) {
580  bool Changed = true;
581  unsigned i = 0;
582 
583  // Add an entry count to the function using the samples gathered
584  // at the function entry.
585  F.setEntryCount(Samples->getHeadSamples());
586 
587  // Before propagation starts, build, for each block, a list of
588  // unique predecessors and successors. This is necessary to handle
589  // identical edges in multiway branches. Since we visit all blocks and all
590  // edges of the CFG, it is cleaner to build these lists once at the start
591  // of the pass.
592  buildEdges(F);
593 
594  // Propagate until we converge or we go past the iteration limit.
595  while (Changed && i++ < SampleProfileMaxPropagateIterations) {
596  Changed = propagateThroughEdges(F);
597  }
598 
599  // Generate MD_prof metadata for every branch instruction using the
600  // edge weights computed during propagation.
601  DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
602  MDBuilder MDB(F.getContext());
603  for (auto &BI : F) {
604  BasicBlock *BB = &BI;
605  TerminatorInst *TI = BB->getTerminator();
606  if (TI->getNumSuccessors() == 1)
607  continue;
608  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
609  continue;
610 
611  DEBUG(dbgs() << "\nGetting weights for branch at line "
612  << TI->getDebugLoc().getLine() << ".\n");
613  SmallVector<unsigned, 4> Weights;
614  bool AllWeightsZero = true;
615  for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
616  BasicBlock *Succ = TI->getSuccessor(I);
617  Edge E = std::make_pair(BB, Succ);
618  unsigned Weight = EdgeWeights[E];
619  DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
620  Weights.push_back(Weight);
621  if (Weight != 0)
622  AllWeightsZero = false;
623  }
624 
625  // Only set weights if there is at least one non-zero weight.
626  // In any other case, let the analyzer set weights.
627  if (!AllWeightsZero) {
628  DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
630  MDB.createBranchWeights(Weights));
631  } else {
632  DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
633  }
634  }
635 }
636 
637 /// \brief Get the line number for the function header.
638 ///
639 /// This looks up function \p F in the current compilation unit and
640 /// retrieves the line number where the function is defined. This is
641 /// line 0 for all the samples read from the profile file. Every line
642 /// number is relative to this line.
643 ///
644 /// \param F Function object to query.
645 ///
646 /// \returns the line number where \p F is defined. If it returns 0,
647 /// it means that there is no debug information available for \p F.
648 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
649  if (DISubprogram *S = getDISubprogram(&F))
650  return S->getLine();
651 
652  // If could not find the start of \p F, emit a diagnostic to inform the user
653  // about the missed opportunity.
655  "No debug information found in function " + F.getName() +
656  ": Function profile not used",
657  DS_Warning));
658  return 0;
659 }
660 
661 /// \brief Generate branch weight metadata for all branches in \p F.
662 ///
663 /// Branch weights are computed out of instruction samples using a
664 /// propagation heuristic. Propagation proceeds in 3 phases:
665 ///
666 /// 1- Assignment of block weights. All the basic blocks in the function
667 /// are initial assigned the same weight as their most frequently
668 /// executed instruction.
669 ///
670 /// 2- Creation of equivalence classes. Since samples may be missing from
671 /// blocks, we can fill in the gaps by setting the weights of all the
672 /// blocks in the same equivalence class to the same weight. To compute
673 /// the concept of equivalence, we use dominance and loop information.
674 /// Two blocks B1 and B2 are in the same equivalence class if B1
675 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
676 ///
677 /// 3- Propagation of block weights into edges. This uses a simple
678 /// propagation heuristic. The following rules are applied to every
679 /// block BB in the CFG:
680 ///
681 /// - If BB has a single predecessor/successor, then the weight
682 /// of that edge is the weight of the block.
683 ///
684 /// - If all the edges are known except one, and the weight of the
685 /// block is already known, the weight of the unknown edge will
686 /// be the weight of the block minus the sum of all the known
687 /// edges. If the sum of all the known edges is larger than BB's weight,
688 /// we set the unknown edge weight to zero.
689 ///
690 /// - If there is a self-referential edge, and the weight of the block is
691 /// known, the weight for that edge is set to the weight of the block
692 /// minus the weight of the other incoming edges to that block (if
693 /// known).
694 ///
695 /// Since this propagation is not guaranteed to finalize for every CFG, we
696 /// only allow it to proceed for a limited number of iterations (controlled
697 /// by -sample-profile-max-propagate-iterations).
698 ///
699 /// FIXME: Try to replace this propagation heuristic with a scheme
700 /// that is guaranteed to finalize. A work-list approach similar to
701 /// the standard value propagation algorithm used by SSA-CCP might
702 /// work here.
703 ///
704 /// Once all the branch weights are computed, we emit the MD_prof
705 /// metadata on BB using the computed values for each of its branches.
706 ///
707 /// \param F The function to query.
708 ///
709 /// \returns true if \p F was modified. Returns false, otherwise.
710 bool SampleProfileLoader::emitAnnotations(Function &F) {
711  bool Changed = false;
712 
713  // Initialize invariants used during computation and propagation.
714  HeaderLineno = getFunctionLoc(F);
715  if (HeaderLineno == 0)
716  return false;
717 
718  DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
719  << ": " << HeaderLineno << "\n");
720 
721  // Compute basic block weights.
722  Changed |= computeBlockWeights(F);
723 
724  if (Changed) {
725  // Find equivalence classes.
726  findEquivalenceClasses(F);
727 
728  // Propagate weights to all edges.
729  propagateWeights(F);
730  }
731 
732  return Changed;
733 }
734 
735 char SampleProfileLoader::ID = 0;
736 INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
737  "Sample Profile loader", false, false)
741 INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
742 INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
743  "Sample Profile loader", false, false)
744 
745 bool SampleProfileLoader::doInitialization(Module &M) {
746  auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
747  if (std::error_code EC = ReaderOrErr.getError()) {
748  std::string Msg = "Could not open profile: " + EC.message();
749  M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
750  return false;
751  }
752  Reader = std::move(ReaderOrErr.get());
753  ProfileIsValid = (Reader->read() == sampleprof_error::success);
754  return true;
755 }
756 
758  return new SampleProfileLoader(SampleProfileFile);
759 }
760 
762  return new SampleProfileLoader(Name);
763 }
764 
765 bool SampleProfileLoader::runOnFunction(Function &F) {
766  if (!ProfileIsValid)
767  return false;
768 
769  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
770  PDT = &getAnalysis<PostDominatorTree>();
771  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
772  Ctx = &F.getParent()->getContext();
773  Samples = Reader->getSamplesFor(F);
774  if (!Samples->empty())
775  return emitAnnotations(F);
776  return false;
777 }
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
sample profile
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
This file contains the declarations for metadata subclasses.
A debug info location.
Definition: DebugLoc.h:34
F(f)
INITIALIZE_PASS_BEGIN(SampleProfileLoader,"sample-profile","Sample Profile loader", false, false) INITIALIZE_PASS_END(SampleProfileLoader
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
Representation of the samples collected for a function.
Definition: SampleProf.h:167
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
static StringRef getName(Value *V)
DISubprogram * getDISubprogram(const MDNode *Scope)
Find subprogram that is enclosing this scope.
Definition: DebugInfo.cpp:36
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:104
Subprogram description.
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
Debug location.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C)
Create a sample profile reader appropriate to the file format.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:107
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
Definition: InstrTypes.h:57
unsigned getLine() const
Definition: DebugLoc.cpp:26
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:35
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
Definition: InstrTypes.h:62
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:114
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:230
Represent the analysis usage information of a pass.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:252
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:117
void setMetadata(unsigned KindID, MDNode *Node)
setMetadata - Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1083
void setEntryCount(uint64_t Count)
Set the entry count for this function.
Definition: Function.cpp:985
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
static cl::opt< unsigned > SampleProfileMaxPropagateIterations("sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating ""sample block/edge weights through the CFG."))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
FunctionPass * createSampleProfileLoaderPass()
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
sample Sample Profile false
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
sample Sample Profile loader
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
#define DEBUG(X)
Definition: Debug.h:92
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:737
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
Diagnostic information for the sample profiler.
void initializeSampleProfileLoaderPass(PassRegistry &)
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:265