clang  5.0.0
CodeGenPGO.cpp
Go to the documentation of this file.
1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Instrumentation-based profile-guided optimization
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenPGO.h"
15 #include "CodeGenFunction.h"
16 #include "CoverageMappingGen.h"
18 #include "clang/AST/StmtVisitor.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/IR/MDBuilder.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/MD5.h"
24 
25 static llvm::cl::opt<bool> EnableValueProfiling(
26  "enable-value-profiling", llvm::cl::ZeroOrMore,
27  llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));
28 
29 using namespace clang;
30 using namespace CodeGen;
31 
32 void CodeGenPGO::setFuncName(StringRef Name,
33  llvm::GlobalValue::LinkageTypes Linkage) {
34  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
35  FuncName = llvm::getPGOFuncName(
36  Name, Linkage, CGM.getCodeGenOpts().MainFileName,
37  PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
38 
39  // If we're generating a profile, create a variable for the name.
41  FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
42 }
43 
44 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
45  setFuncName(Fn->getName(), Fn->getLinkage());
46  // Create PGOFuncName meta data.
47  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
48 }
49 
50 namespace {
51 /// \brief Stable hasher for PGO region counters.
52 ///
53 /// PGOHash produces a stable hash of a given function's control flow.
54 ///
55 /// Changing the output of this hash will invalidate all previously generated
56 /// profiles -- i.e., don't do it.
57 ///
58 /// \note When this hash does eventually change (years?), we still need to
59 /// support old hashes. We'll need to pull in the version number from the
60 /// profile data format and use the matching hash function.
61 class PGOHash {
62  uint64_t Working;
63  unsigned Count;
64  llvm::MD5 MD5;
65 
66  static const int NumBitsPerType = 6;
67  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
68  static const unsigned TooBig = 1u << NumBitsPerType;
69 
70 public:
71  /// \brief Hash values for AST nodes.
72  ///
73  /// Distinct values for AST nodes that have region counters attached.
74  ///
75  /// These values must be stable. All new members must be added at the end,
76  /// and no members should be removed. Changing the enumeration value for an
77  /// AST node will affect the hash of every function that contains that node.
78  enum HashType : unsigned char {
79  None = 0,
80  LabelStmt = 1,
81  WhileStmt,
82  DoStmt,
83  ForStmt,
86  SwitchStmt,
87  CaseStmt,
89  IfStmt,
90  CXXTryStmt,
93  BinaryOperatorLAnd,
94  BinaryOperatorLOr,
96 
97  // Keep this last. It's for the static assert that follows.
98  LastHashType
99  };
100  static_assert(LastHashType <= TooBig, "Too many types in HashType");
101 
102  // TODO: When this format changes, take in a version number here, and use the
103  // old hash calculation for file formats that used the old hash.
104  PGOHash() : Working(0), Count(0) {}
105  void combine(HashType Type);
106  uint64_t finalize();
107 };
108 const int PGOHash::NumBitsPerType;
109 const unsigned PGOHash::NumTypesPerWord;
110 const unsigned PGOHash::TooBig;
111 
112 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
113 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
114  /// The next counter value to assign.
115  unsigned NextCounter;
116  /// The function hash.
117  PGOHash Hash;
118  /// The map of statements to counters.
119  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
120 
121  MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
122  : NextCounter(0), CounterMap(CounterMap) {}
123 
124  // Blocks and lambdas are handled as separate functions, so we need not
125  // traverse them in the parent context.
126  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
127  bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
128  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
129 
130  bool VisitDecl(const Decl *D) {
131  switch (D->getKind()) {
132  default:
133  break;
134  case Decl::Function:
135  case Decl::CXXMethod:
136  case Decl::CXXConstructor:
137  case Decl::CXXDestructor:
138  case Decl::CXXConversion:
139  case Decl::ObjCMethod:
140  case Decl::Block:
141  case Decl::Captured:
142  CounterMap[D->getBody()] = NextCounter++;
143  break;
144  }
145  return true;
146  }
147 
148  bool VisitStmt(const Stmt *S) {
149  auto Type = getHashType(S);
150  if (Type == PGOHash::None)
151  return true;
152 
153  CounterMap[S] = NextCounter++;
154  Hash.combine(Type);
155  return true;
156  }
157  PGOHash::HashType getHashType(const Stmt *S) {
158  switch (S->getStmtClass()) {
159  default:
160  break;
161  case Stmt::LabelStmtClass:
162  return PGOHash::LabelStmt;
163  case Stmt::WhileStmtClass:
164  return PGOHash::WhileStmt;
165  case Stmt::DoStmtClass:
166  return PGOHash::DoStmt;
167  case Stmt::ForStmtClass:
168  return PGOHash::ForStmt;
169  case Stmt::CXXForRangeStmtClass:
170  return PGOHash::CXXForRangeStmt;
171  case Stmt::ObjCForCollectionStmtClass:
172  return PGOHash::ObjCForCollectionStmt;
173  case Stmt::SwitchStmtClass:
174  return PGOHash::SwitchStmt;
175  case Stmt::CaseStmtClass:
176  return PGOHash::CaseStmt;
177  case Stmt::DefaultStmtClass:
178  return PGOHash::DefaultStmt;
179  case Stmt::IfStmtClass:
180  return PGOHash::IfStmt;
181  case Stmt::CXXTryStmtClass:
182  return PGOHash::CXXTryStmt;
183  case Stmt::CXXCatchStmtClass:
184  return PGOHash::CXXCatchStmt;
185  case Stmt::ConditionalOperatorClass:
186  return PGOHash::ConditionalOperator;
187  case Stmt::BinaryConditionalOperatorClass:
188  return PGOHash::BinaryConditionalOperator;
189  case Stmt::BinaryOperatorClass: {
190  const BinaryOperator *BO = cast<BinaryOperator>(S);
191  if (BO->getOpcode() == BO_LAnd)
192  return PGOHash::BinaryOperatorLAnd;
193  if (BO->getOpcode() == BO_LOr)
194  return PGOHash::BinaryOperatorLOr;
195  break;
196  }
197  }
198  return PGOHash::None;
199  }
200 };
201 
202 /// A StmtVisitor that propagates the raw counts through the AST and
203 /// records the count at statements where the value may change.
204 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
205  /// PGO state.
206  CodeGenPGO &PGO;
207 
208  /// A flag that is set when the current count should be recorded on the
209  /// next statement, such as at the exit of a loop.
210  bool RecordNextStmtCount;
211 
212  /// The count at the current location in the traversal.
213  uint64_t CurrentCount;
214 
215  /// The map of statements to count values.
216  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
217 
218  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
219  struct BreakContinue {
220  uint64_t BreakCount;
221  uint64_t ContinueCount;
222  BreakContinue() : BreakCount(0), ContinueCount(0) {}
223  };
224  SmallVector<BreakContinue, 8> BreakContinueStack;
225 
226  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
227  CodeGenPGO &PGO)
228  : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
229 
230  void RecordStmtCount(const Stmt *S) {
231  if (RecordNextStmtCount) {
232  CountMap[S] = CurrentCount;
233  RecordNextStmtCount = false;
234  }
235  }
236 
237  /// Set and return the current count.
238  uint64_t setCount(uint64_t Count) {
239  CurrentCount = Count;
240  return Count;
241  }
242 
243  void VisitStmt(const Stmt *S) {
244  RecordStmtCount(S);
245  for (const Stmt *Child : S->children())
246  if (Child)
247  this->Visit(Child);
248  }
249 
250  void VisitFunctionDecl(const FunctionDecl *D) {
251  // Counter tracks entry to the function body.
252  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
253  CountMap[D->getBody()] = BodyCount;
254  Visit(D->getBody());
255  }
256 
257  // Skip lambda expressions. We visit these as FunctionDecls when we're
258  // generating them and aren't interested in the body when generating a
259  // parent context.
260  void VisitLambdaExpr(const LambdaExpr *LE) {}
261 
262  void VisitCapturedDecl(const CapturedDecl *D) {
263  // Counter tracks entry to the capture body.
264  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
265  CountMap[D->getBody()] = BodyCount;
266  Visit(D->getBody());
267  }
268 
269  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
270  // Counter tracks entry to the method body.
271  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
272  CountMap[D->getBody()] = BodyCount;
273  Visit(D->getBody());
274  }
275 
276  void VisitBlockDecl(const BlockDecl *D) {
277  // Counter tracks entry to the block body.
278  uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
279  CountMap[D->getBody()] = BodyCount;
280  Visit(D->getBody());
281  }
282 
283  void VisitReturnStmt(const ReturnStmt *S) {
284  RecordStmtCount(S);
285  if (S->getRetValue())
286  Visit(S->getRetValue());
287  CurrentCount = 0;
288  RecordNextStmtCount = true;
289  }
290 
291  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
292  RecordStmtCount(E);
293  if (E->getSubExpr())
294  Visit(E->getSubExpr());
295  CurrentCount = 0;
296  RecordNextStmtCount = true;
297  }
298 
299  void VisitGotoStmt(const GotoStmt *S) {
300  RecordStmtCount(S);
301  CurrentCount = 0;
302  RecordNextStmtCount = true;
303  }
304 
305  void VisitLabelStmt(const LabelStmt *S) {
306  RecordNextStmtCount = false;
307  // Counter tracks the block following the label.
308  uint64_t BlockCount = setCount(PGO.getRegionCount(S));
309  CountMap[S] = BlockCount;
310  Visit(S->getSubStmt());
311  }
312 
313  void VisitBreakStmt(const BreakStmt *S) {
314  RecordStmtCount(S);
315  assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
316  BreakContinueStack.back().BreakCount += CurrentCount;
317  CurrentCount = 0;
318  RecordNextStmtCount = true;
319  }
320 
321  void VisitContinueStmt(const ContinueStmt *S) {
322  RecordStmtCount(S);
323  assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
324  BreakContinueStack.back().ContinueCount += CurrentCount;
325  CurrentCount = 0;
326  RecordNextStmtCount = true;
327  }
328 
329  void VisitWhileStmt(const WhileStmt *S) {
330  RecordStmtCount(S);
331  uint64_t ParentCount = CurrentCount;
332 
333  BreakContinueStack.push_back(BreakContinue());
334  // Visit the body region first so the break/continue adjustments can be
335  // included when visiting the condition.
336  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
337  CountMap[S->getBody()] = CurrentCount;
338  Visit(S->getBody());
339  uint64_t BackedgeCount = CurrentCount;
340 
341  // ...then go back and propagate counts through the condition. The count
342  // at the start of the condition is the sum of the incoming edges,
343  // the backedge from the end of the loop body, and the edges from
344  // continue statements.
345  BreakContinue BC = BreakContinueStack.pop_back_val();
346  uint64_t CondCount =
347  setCount(ParentCount + BackedgeCount + BC.ContinueCount);
348  CountMap[S->getCond()] = CondCount;
349  Visit(S->getCond());
350  setCount(BC.BreakCount + CondCount - BodyCount);
351  RecordNextStmtCount = true;
352  }
353 
354  void VisitDoStmt(const DoStmt *S) {
355  RecordStmtCount(S);
356  uint64_t LoopCount = PGO.getRegionCount(S);
357 
358  BreakContinueStack.push_back(BreakContinue());
359  // The count doesn't include the fallthrough from the parent scope. Add it.
360  uint64_t BodyCount = setCount(LoopCount + CurrentCount);
361  CountMap[S->getBody()] = BodyCount;
362  Visit(S->getBody());
363  uint64_t BackedgeCount = CurrentCount;
364 
365  BreakContinue BC = BreakContinueStack.pop_back_val();
366  // The count at the start of the condition is equal to the count at the
367  // end of the body, plus any continues.
368  uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
369  CountMap[S->getCond()] = CondCount;
370  Visit(S->getCond());
371  setCount(BC.BreakCount + CondCount - LoopCount);
372  RecordNextStmtCount = true;
373  }
374 
375  void VisitForStmt(const ForStmt *S) {
376  RecordStmtCount(S);
377  if (S->getInit())
378  Visit(S->getInit());
379 
380  uint64_t ParentCount = CurrentCount;
381 
382  BreakContinueStack.push_back(BreakContinue());
383  // Visit the body region first. (This is basically the same as a while
384  // loop; see further comments in VisitWhileStmt.)
385  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
386  CountMap[S->getBody()] = BodyCount;
387  Visit(S->getBody());
388  uint64_t BackedgeCount = CurrentCount;
389  BreakContinue BC = BreakContinueStack.pop_back_val();
390 
391  // The increment is essentially part of the body but it needs to include
392  // the count for all the continue statements.
393  if (S->getInc()) {
394  uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
395  CountMap[S->getInc()] = IncCount;
396  Visit(S->getInc());
397  }
398 
399  // ...then go back and propagate counts through the condition.
400  uint64_t CondCount =
401  setCount(ParentCount + BackedgeCount + BC.ContinueCount);
402  if (S->getCond()) {
403  CountMap[S->getCond()] = CondCount;
404  Visit(S->getCond());
405  }
406  setCount(BC.BreakCount + CondCount - BodyCount);
407  RecordNextStmtCount = true;
408  }
409 
410  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
411  RecordStmtCount(S);
412  Visit(S->getLoopVarStmt());
413  Visit(S->getRangeStmt());
414  Visit(S->getBeginStmt());
415  Visit(S->getEndStmt());
416 
417  uint64_t ParentCount = CurrentCount;
418  BreakContinueStack.push_back(BreakContinue());
419  // Visit the body region first. (This is basically the same as a while
420  // loop; see further comments in VisitWhileStmt.)
421  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
422  CountMap[S->getBody()] = BodyCount;
423  Visit(S->getBody());
424  uint64_t BackedgeCount = CurrentCount;
425  BreakContinue BC = BreakContinueStack.pop_back_val();
426 
427  // The increment is essentially part of the body but it needs to include
428  // the count for all the continue statements.
429  uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
430  CountMap[S->getInc()] = IncCount;
431  Visit(S->getInc());
432 
433  // ...then go back and propagate counts through the condition.
434  uint64_t CondCount =
435  setCount(ParentCount + BackedgeCount + BC.ContinueCount);
436  CountMap[S->getCond()] = CondCount;
437  Visit(S->getCond());
438  setCount(BC.BreakCount + CondCount - BodyCount);
439  RecordNextStmtCount = true;
440  }
441 
442  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
443  RecordStmtCount(S);
444  Visit(S->getElement());
445  uint64_t ParentCount = CurrentCount;
446  BreakContinueStack.push_back(BreakContinue());
447  // Counter tracks the body of the loop.
448  uint64_t BodyCount = setCount(PGO.getRegionCount(S));
449  CountMap[S->getBody()] = BodyCount;
450  Visit(S->getBody());
451  uint64_t BackedgeCount = CurrentCount;
452  BreakContinue BC = BreakContinueStack.pop_back_val();
453 
454  setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
455  BodyCount);
456  RecordNextStmtCount = true;
457  }
458 
459  void VisitSwitchStmt(const SwitchStmt *S) {
460  RecordStmtCount(S);
461  if (S->getInit())
462  Visit(S->getInit());
463  Visit(S->getCond());
464  CurrentCount = 0;
465  BreakContinueStack.push_back(BreakContinue());
466  Visit(S->getBody());
467  // If the switch is inside a loop, add the continue counts.
468  BreakContinue BC = BreakContinueStack.pop_back_val();
469  if (!BreakContinueStack.empty())
470  BreakContinueStack.back().ContinueCount += BC.ContinueCount;
471  // Counter tracks the exit block of the switch.
472  setCount(PGO.getRegionCount(S));
473  RecordNextStmtCount = true;
474  }
475 
476  void VisitSwitchCase(const SwitchCase *S) {
477  RecordNextStmtCount = false;
478  // Counter for this particular case. This counts only jumps from the
479  // switch header and does not include fallthrough from the case before
480  // this one.
481  uint64_t CaseCount = PGO.getRegionCount(S);
482  setCount(CurrentCount + CaseCount);
483  // We need the count without fallthrough in the mapping, so it's more useful
484  // for branch probabilities.
485  CountMap[S] = CaseCount;
486  RecordNextStmtCount = true;
487  Visit(S->getSubStmt());
488  }
489 
490  void VisitIfStmt(const IfStmt *S) {
491  RecordStmtCount(S);
492  uint64_t ParentCount = CurrentCount;
493  if (S->getInit())
494  Visit(S->getInit());
495  Visit(S->getCond());
496 
497  // Counter tracks the "then" part of an if statement. The count for
498  // the "else" part, if it exists, will be calculated from this counter.
499  uint64_t ThenCount = setCount(PGO.getRegionCount(S));
500  CountMap[S->getThen()] = ThenCount;
501  Visit(S->getThen());
502  uint64_t OutCount = CurrentCount;
503 
504  uint64_t ElseCount = ParentCount - ThenCount;
505  if (S->getElse()) {
506  setCount(ElseCount);
507  CountMap[S->getElse()] = ElseCount;
508  Visit(S->getElse());
509  OutCount += CurrentCount;
510  } else
511  OutCount += ElseCount;
512  setCount(OutCount);
513  RecordNextStmtCount = true;
514  }
515 
516  void VisitCXXTryStmt(const CXXTryStmt *S) {
517  RecordStmtCount(S);
518  Visit(S->getTryBlock());
519  for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
520  Visit(S->getHandler(I));
521  // Counter tracks the continuation block of the try statement.
522  setCount(PGO.getRegionCount(S));
523  RecordNextStmtCount = true;
524  }
525 
526  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
527  RecordNextStmtCount = false;
528  // Counter tracks the catch statement's handler block.
529  uint64_t CatchCount = setCount(PGO.getRegionCount(S));
530  CountMap[S] = CatchCount;
531  Visit(S->getHandlerBlock());
532  }
533 
534  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
535  RecordStmtCount(E);
536  uint64_t ParentCount = CurrentCount;
537  Visit(E->getCond());
538 
539  // Counter tracks the "true" part of a conditional operator. The
540  // count in the "false" part will be calculated from this counter.
541  uint64_t TrueCount = setCount(PGO.getRegionCount(E));
542  CountMap[E->getTrueExpr()] = TrueCount;
543  Visit(E->getTrueExpr());
544  uint64_t OutCount = CurrentCount;
545 
546  uint64_t FalseCount = setCount(ParentCount - TrueCount);
547  CountMap[E->getFalseExpr()] = FalseCount;
548  Visit(E->getFalseExpr());
549  OutCount += CurrentCount;
550 
551  setCount(OutCount);
552  RecordNextStmtCount = true;
553  }
554 
555  void VisitBinLAnd(const BinaryOperator *E) {
556  RecordStmtCount(E);
557  uint64_t ParentCount = CurrentCount;
558  Visit(E->getLHS());
559  // Counter tracks the right hand side of a logical and operator.
560  uint64_t RHSCount = setCount(PGO.getRegionCount(E));
561  CountMap[E->getRHS()] = RHSCount;
562  Visit(E->getRHS());
563  setCount(ParentCount + RHSCount - CurrentCount);
564  RecordNextStmtCount = true;
565  }
566 
567  void VisitBinLOr(const BinaryOperator *E) {
568  RecordStmtCount(E);
569  uint64_t ParentCount = CurrentCount;
570  Visit(E->getLHS());
571  // Counter tracks the right hand side of a logical or operator.
572  uint64_t RHSCount = setCount(PGO.getRegionCount(E));
573  CountMap[E->getRHS()] = RHSCount;
574  Visit(E->getRHS());
575  setCount(ParentCount + RHSCount - CurrentCount);
576  RecordNextStmtCount = true;
577  }
578 };
579 } // end anonymous namespace
580 
581 void PGOHash::combine(HashType Type) {
582  // Check that we never combine 0 and only have six bits.
583  assert(Type && "Hash is invalid: unexpected type 0");
584  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
585 
586  // Pass through MD5 if enough work has built up.
587  if (Count && Count % NumTypesPerWord == 0) {
588  using namespace llvm::support;
589  uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
590  MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
591  Working = 0;
592  }
593 
594  // Accumulate the current type.
595  ++Count;
596  Working = Working << NumBitsPerType | Type;
597 }
598 
599 uint64_t PGOHash::finalize() {
600  // Use Working as the hash directly if we never used MD5.
601  if (Count <= NumTypesPerWord)
602  // No need to byte swap here, since none of the math was endian-dependent.
603  // This number will be byte-swapped as required on endianness transitions,
604  // so we will see the same value on the other side.
605  return Working;
606 
607  // Check for remaining work in Working.
608  if (Working)
609  MD5.update(Working);
610 
611  // Finalize the MD5 and return the hash.
612  llvm::MD5::MD5Result Result;
613  MD5.final(Result);
614  using namespace llvm::support;
615  return Result.low();
616 }
617 
618 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
619  const Decl *D = GD.getDecl();
620  if (!D->hasBody())
621  return;
622 
623  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
624  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
625  if (!InstrumentRegions && !PGOReader)
626  return;
627  if (D->isImplicit())
628  return;
629  // Constructors and destructors may be represented by several functions in IR.
630  // If so, instrument only base variant, others are implemented by delegation
631  // to the base one, it would be counted twice otherwise.
633  if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base)
634  return;
635 
636  if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
637  if (GD.getCtorType() != Ctor_Base &&
639  return;
640  }
642  setFuncName(Fn);
643 
644  mapRegionCounters(D);
645  if (CGM.getCodeGenOpts().CoverageMapping)
646  emitCounterRegionMapping(D);
647  if (PGOReader) {
649  loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
650  computeRegionCounts(D);
651  applyFunctionAttributes(PGOReader, Fn);
652  }
653 }
654 
655 void CodeGenPGO::mapRegionCounters(const Decl *D) {
656  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
657  MapRegionCounters Walker(*RegionCounterMap);
658  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
659  Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
660  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
661  Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
662  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
663  Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
664  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
665  Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
666  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
667  NumRegionCounters = Walker.NextCounter;
668  FunctionHash = Walker.Hash.finalize();
669 }
670 
671 bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
672  if (!D->getBody())
673  return true;
674 
675  // Don't map the functions in system headers.
676  const auto &SM = CGM.getContext().getSourceManager();
677  auto Loc = D->getBody()->getLocStart();
678  return SM.isInSystemHeader(Loc);
679 }
680 
681 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
682  if (skipRegionMappingForDecl(D))
683  return;
684 
685  std::string CoverageMapping;
686  llvm::raw_string_ostream OS(CoverageMapping);
687  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
689  CGM.getLangOpts(), RegionCounterMap.get());
690  MappingGen.emitCounterMapping(D, OS);
691  OS.flush();
692 
693  if (CoverageMapping.empty())
694  return;
695 
697  FuncNameVar, FuncName, FunctionHash, CoverageMapping);
698 }
699 
700 void
701 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
702  llvm::GlobalValue::LinkageTypes Linkage) {
703  if (skipRegionMappingForDecl(D))
704  return;
705 
706  std::string CoverageMapping;
707  llvm::raw_string_ostream OS(CoverageMapping);
708  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
710  CGM.getLangOpts());
711  MappingGen.emitEmptyMapping(D, OS);
712  OS.flush();
713 
714  if (CoverageMapping.empty())
715  return;
716 
717  setFuncName(Name, Linkage);
719  FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
720 }
721 
722 void CodeGenPGO::computeRegionCounts(const Decl *D) {
723  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
724  ComputeRegionCounts Walker(*StmtCountMap, *this);
725  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
726  Walker.VisitFunctionDecl(FD);
727  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
728  Walker.VisitObjCMethodDecl(MD);
729  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
730  Walker.VisitBlockDecl(BD);
731  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
732  Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
733 }
734 
735 void
736 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
737  llvm::Function *Fn) {
738  if (!haveRegionCounts())
739  return;
740 
741  uint64_t FunctionCount = getRegionCount(nullptr);
742  Fn->setEntryCount(FunctionCount);
743 }
744 
746  llvm::Value *StepV) {
747  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
748  return;
749  if (!Builder.GetInsertBlock())
750  return;
751 
752  unsigned Counter = (*RegionCounterMap)[S];
753  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
754 
755  llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
756  Builder.getInt64(FunctionHash),
757  Builder.getInt32(NumRegionCounters),
758  Builder.getInt32(Counter), StepV};
759  if (!StepV)
760  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
761  makeArrayRef(Args, 4));
762  else
763  Builder.CreateCall(
764  CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
765  makeArrayRef(Args));
766 }
767 
768 // This method either inserts a call to the profile run-time during
769 // instrumentation or puts profile data into metadata for PGO use.
770 void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
771  llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
772 
774  return;
775 
776  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
777  return;
778 
779  if (isa<llvm::Constant>(ValuePtr))
780  return;
781 
782  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
783  if (InstrumentValueSites && RegionCounterMap) {
784  auto BuilderInsertPoint = Builder.saveIP();
785  Builder.SetInsertPoint(ValueSite);
786  llvm::Value *Args[5] = {
787  llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
788  Builder.getInt64(FunctionHash),
789  Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
790  Builder.getInt32(ValueKind),
791  Builder.getInt32(NumValueSites[ValueKind]++)
792  };
793  Builder.CreateCall(
794  CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
795  Builder.restoreIP(BuilderInsertPoint);
796  return;
797  }
798 
799  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
800  if (PGOReader && haveRegionCounts()) {
801  // We record the top most called three functions at each call site.
802  // Profile metadata contains "VP" string identifying this metadata
803  // as value profiling data, then a uint32_t value for the value profiling
804  // kind, a uint64_t value for the total number of times the call is
805  // executed, followed by the function hash and execution count (uint64_t)
806  // pairs for each function.
807  if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
808  return;
809 
810  llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
811  (llvm::InstrProfValueKind)ValueKind,
812  NumValueSites[ValueKind]);
813 
814  NumValueSites[ValueKind]++;
815  }
816 }
817 
818 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
819  bool IsInMainFile) {
820  CGM.getPGOStats().addVisited(IsInMainFile);
821  RegionCounts.clear();
823  PGOReader->getInstrProfRecord(FuncName, FunctionHash);
824  if (auto E = RecordExpected.takeError()) {
825  auto IPE = llvm::InstrProfError::take(std::move(E));
826  if (IPE == llvm::instrprof_error::unknown_function)
827  CGM.getPGOStats().addMissing(IsInMainFile);
828  else if (IPE == llvm::instrprof_error::hash_mismatch)
829  CGM.getPGOStats().addMismatched(IsInMainFile);
830  else if (IPE == llvm::instrprof_error::malformed)
831  // TODO: Consider a more specific warning for this case.
832  CGM.getPGOStats().addMismatched(IsInMainFile);
833  return;
834  }
835  ProfRecord =
836  llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
837  RegionCounts = ProfRecord->Counts;
838 }
839 
840 /// \brief Calculate what to divide by to scale weights.
841 ///
842 /// Given the maximum weight, calculate a divisor that will scale all the
843 /// weights to strictly less than UINT32_MAX.
844 static uint64_t calculateWeightScale(uint64_t MaxWeight) {
845  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
846 }
847 
848 /// \brief Scale an individual branch weight (and add 1).
849 ///
850 /// Scale a 64-bit weight down to 32-bits using \c Scale.
851 ///
852 /// According to Laplace's Rule of Succession, it is better to compute the
853 /// weight based on the count plus 1, so universally add 1 to the value.
854 ///
855 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
856 /// greater than \c Weight.
857 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
858  assert(Scale && "scale by 0?");
859  uint64_t Scaled = Weight / Scale + 1;
860  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
861  return Scaled;
862 }
863 
864 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
865  uint64_t FalseCount) {
866  // Check for empty weights.
867  if (!TrueCount && !FalseCount)
868  return nullptr;
869 
870  // Calculate how to scale down to 32-bits.
871  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
872 
873  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
874  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
875  scaleBranchWeight(FalseCount, Scale));
876 }
877 
878 llvm::MDNode *
879 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
880  // We need at least two elements to create meaningful weights.
881  if (Weights.size() < 2)
882  return nullptr;
883 
884  // Check for empty weights.
885  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
886  if (MaxWeight == 0)
887  return nullptr;
888 
889  // Calculate how to scale down to 32-bits.
890  uint64_t Scale = calculateWeightScale(MaxWeight);
891 
892  SmallVector<uint32_t, 16> ScaledWeights;
893  ScaledWeights.reserve(Weights.size());
894  for (uint64_t W : Weights)
895  ScaledWeights.push_back(scaleBranchWeight(W, Scale));
896 
897  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
898  return MDHelper.createBranchWeights(ScaledWeights);
899 }
900 
901 llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
902  uint64_t LoopCount) {
903  if (!PGO.haveRegionCounts())
904  return nullptr;
905  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
906  assert(CondCount.hasValue() && "missing expected loop condition count");
907  if (*CondCount == 0)
908  return nullptr;
909  return createProfileWeights(LoopCount,
910  std::max(*CondCount, LoopCount) - LoopCount);
911 }
Expr * getInc()
Definition: Stmt.h:1213
Optional< uint64_t > getStmtCount(const Stmt *S)
Check if an execution count is known for a given statement.
Definition: CodeGenPGO.h:64
StmtClass getStmtClass() const
Definition: Stmt.h:361
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1618
CXXCtorType getCtorType() const
Definition: GlobalDecl.h:64
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:187
Expr * getCond()
Definition: Stmt.h:1101
llvm::Module & getModule() const
static uint64_t calculateWeightScale(uint64_t MaxWeight)
Calculate what to divide by to scale weights.
Definition: CodeGenPGO.cpp:844
llvm::LLVMContext & getLLVMContext()
Stmt - This represents one statement.
Definition: Stmt.h:60
CXXCatchStmt * getHandler(unsigned i)
Definition: StmtCXX.h:104
IfStmt - This represents an if/then/else.
Definition: Stmt.h:905
Organizes the per-function state that is used while generating code coverage mapping data...
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:81
The base class of the type hierarchy.
Definition: Type.h:1303
static llvm::cl::opt< bool > EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore, llvm::cl::desc("Enable value profiling"), llvm::cl::init(false))
const Stmt * getElse() const
Definition: Stmt.h:945
void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, llvm::Value *StepV)
Definition: CodeGenPGO.cpp:745
ObjCMethodDecl - Represents an instance or class method declaration.
Definition: DeclObjC.h:113
llvm::ImmutableMap< CountKey, unsigned > CountMap
A C++ throw-expression (C++ [except.throw]).
Definition: ExprCXX.h:928
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have...
Definition: Linkage.h:25
LabelStmt - Represents a label, which has a substatement.
Definition: Stmt.h:813
Stmt * getBody()
Definition: Stmt.h:1149
void addFunctionMappingRecord(llvm::GlobalVariable *FunctionName, StringRef FunctionNameValue, uint64_t FunctionHash, const std::string &CoverageMapping, bool IsUsed=true)
Add a function's coverage mapping record to the collection of the function mapping records...
Stmt * getBody() const override
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition: Decl.h:3637
const Decl * getDecl() const
Definition: GlobalDecl.h:62
Expr * getLHS() const
Definition: Expr.h:3011
Stmt * getBody() const override
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition: Decl.cpp:4223
ForStmt - This represents a 'for (init;cond;inc)' stmt.
Definition: Stmt.h:1179
Base object ctor.
Definition: ABI.h:27
Expr * getTrueExpr() const
Definition: Expr.h:3407
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:537
Stmt * getHandlerBlock() const
Definition: StmtCXX.h:52
Stmt * getBody()
Definition: Stmt.h:1214
child_range children()
Definition: Stmt.cpp:208
A builtin binary operation expression such as "x + y" or "x <= y".
Definition: Expr.h:2967
Stmt * getInit()
Definition: Stmt.h:1193
CXXForRangeStmt - This represents C++0x [stmt.ranged]'s ranged for statement, represented as 'for (ra...
Definition: StmtCXX.h:128
void addMissing(bool MainFile)
Record that a function we've visited has no profile data.
Expr * getCond()
Definition: Stmt.h:1212
void assignRegionCounters(GlobalDecl GD, llvm::Function *Fn)
Assign counters to regions and configure them for PGO of a given function.
Definition: CodeGenPGO.cpp:618
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
Definition: ExprCXX.h:1519
A class that does preordor or postorder depth-first traversal on the entire Clang AST and visits each...
This represents the body of a CapturedStmt, and serves as its DeclContext.
Definition: Decl.h:3726
detail::InMemoryDirectory::const_iterator I
Stmt * getInit()
Definition: Stmt.h:938
InstrProfStats & getPGOStats()
ConditionalOperator - The ?: ternary operator.
Definition: Expr.h:3245
const TargetInfo & getTarget() const
bool hasConstructorVariants() const
Does this ABI have different entrypoints for complete-object and base-subobject constructors?
Definition: TargetCXXABI.h:222
void valueProfile(CGBuilderTy &Builder, uint32_t ValueKind, llvm::Instruction *ValueSite, llvm::Value *ValuePtr)
Definition: CodeGenPGO.cpp:770
BlockDecl - This represents a block literal declaration, which is like an unnamed FunctionDecl...
Definition: Decl.h:3557
CXXDtorType getDtorType() const
Definition: GlobalDecl.h:69
DeclStmt * getEndStmt()
Definition: StmtCXX.h:158
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
Definition: Expr.h:4820
Kind getKind() const
Definition: DeclBase.h:410
bool haveRegionCounts() const
Whether or not we have PGO region data for the current function.
Definition: CodeGenPGO.h:52
Stmt * getBody()
Definition: Stmt.h:1104
ASTContext & getContext() const
CXXTryStmt - A C++ try block, including all handlers.
Definition: StmtCXX.h:65
bool isInSystemHeader(SourceLocation Loc) const
Returns if a SourceLocation is in a system header.
Base object dtor.
Definition: ABI.h:37
void addMismatched(bool MainFile)
Record that a function we've visited has mismatched profile data.
ReturnStmt - This represents a return, optionally of an expression: return; return 4;...
Definition: Stmt.h:1392
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=None)
const SourceManager & SM
Definition: Format.cpp:1293
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:29
DoStmt - This represents a 'do/while' stmt.
Definition: Stmt.h:1128
void addVisited(bool MainFile)
Record that we've visited a function and whether or not that function was in the main source file...
Stmt * getBody(const FunctionDecl *&Definition) const
getBody - Retrieve the body (definition) of the function.
Definition: Decl.cpp:2597
bool isInMainFile(SourceLocation Loc) const
Returns whether the PresumedLoc for a given SourceLocation is in the main file.
#define false
Definition: stdbool.h:33
This captures a statement into a function.
Definition: Stmt.h:2032
const Expr * getCond() const
Definition: Stmt.h:1020
Stmt * getBody() const override
Retrieve the body of this method, if it has one.
Definition: DeclObjC.cpp:793
Per-function PGO state.
Definition: CodeGenPGO.h:29
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition: DeclBase.h:948
const CodeGenOptions & getCodeGenOpts() const
const LangOptions & getLangOpts() const
const Expr * getSubExpr() const
Definition: ExprCXX.h:948
StringRef Name
Definition: USRFinder.cpp:123
const Stmt * getBody() const
Definition: Stmt.h:1021
unsigned getNumHandlers() const
Definition: StmtCXX.h:103
detail::InMemoryDirectory::const_iterator E
const Expr * getRetValue() const
Definition: Stmt.cpp:905
const Stmt * getThen() const
Definition: Stmt.h:943
void emitEmptyCounterMapping(const Decl *D, StringRef FuncName, llvm::GlobalValue::LinkageTypes Linkage)
Emit a coverage mapping range with a counter zero for an unused declaration.
Definition: CodeGenPGO.cpp:701
SwitchStmt - This represents a 'switch' stmt.
Definition: Stmt.h:983
Expr * getFalseExpr() const
Definition: Expr.h:3413
Represents Objective-C's collection statement.
Definition: StmtObjC.h:24
AbstractConditionalOperator - An abstract base class for ConditionalOperator and BinaryConditionalOpe...
Definition: Expr.h:3203
Stmt * getInit()
Definition: Stmt.h:1017
bool hasProfileClangInstr() const
Check if Clang profile instrumenation is on.
Stmt * getSubStmt()
Definition: Stmt.cpp:862
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
SourceManager & getSourceManager()
Definition: ASTContext.h:616
DeclStmt * getRangeStmt()
Definition: StmtCXX.h:154
GotoStmt - This represents a direct goto.
Definition: Stmt.h:1250
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale)
Scale an individual branch weight (and add 1).
Definition: CodeGenPGO.cpp:857
Expr * getCond()
Definition: Stmt.h:1146
void emitCounterMapping(const Decl *D, llvm::raw_ostream &OS)
Emit the coverage mapping data which maps the regions of code to counters that will be used to find t...
BoundNodesTreeBuilder *const Builder
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:861
ContinueStmt - This represents a continue.
Definition: Stmt.h:1328
std::string MainFileName
The user provided name for the "main file", if non-empty.
Opcode getOpcode() const
Definition: Expr.h:3008
BinaryConditionalOperator - The GNU extension to the conditional operator which allows the middle ope...
Definition: Expr.h:3318
CXXCatchStmt - This represents a C++ catch block.
Definition: StmtCXX.h:29
static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor)
Checks whether the given constructor is a valid subject for the complete-to-base constructor delegati...
Definition: CGClass.cpp:692
WhileStmt - This represents a 'while' stmt.
Definition: Stmt.h:1073
const Expr * getCond() const
Definition: Stmt.h:941
void ClearUnusedCoverageMapping(const Decl *D)
Remove the deferred empty coverage mapping as this declaration is actually instrumented.
CompoundStmt * getTryBlock()
Definition: StmtCXX.h:96
Expr * getRHS() const
Definition: Expr.h:3013
uint64_t getRegionCount(const Stmt *S)
Return the region count for the counter at the given index.
Definition: CodeGenPGO.h:109
BreakStmt - This represents a break.
Definition: Stmt.h:1354
CoverageMappingModuleGen * getCoverageMapping() const
Stmt * getSubStmt()
Definition: Stmt.h:833
DeclStmt * getLoopVarStmt()
Definition: StmtCXX.h:161
SourceLocation getLocation() const
Definition: DeclBase.h:407
llvm::IndexedInstrProfReader * getPGOReader() const
DeclStmt * getBeginStmt()
Definition: StmtCXX.h:155
void emitEmptyMapping(const Decl *D, llvm::raw_ostream &OS)
Emit the coverage mapping data for an unused function.
SourceLocation getLocStart() const LLVM_READONLY
Definition: Stmt.cpp:257
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:954
This class handles loading and caching of source files into memory.
const NamedDecl * Result
Definition: USRFinder.cpp:70