LLVM 17.0.0git
SampleProfileLoaderBaseImpl.h
Go to the documentation of this file.
1////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides the interface for the sampled PGO profile loader base
11/// implementation.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
16#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/DenseSet.h"
22#include "llvm/ADT/SmallSet.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/CFG.h"
30#include "llvm/IR/DebugLoc.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/Instruction.h"
35#include "llvm/IR/Module.h"
43
44namespace llvm {
45using namespace sampleprof;
46using namespace sampleprofutil;
48
49#define DEBUG_TYPE "sample-profile-impl"
50
51namespace afdo_detail {
52
53template <typename BlockT> struct IRTraits;
54template <> struct IRTraits<BasicBlock> {
59 using LoopT = Loop;
60 using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
61 using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
63 using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
68 static Function &getFunction(Function &F) { return F; }
69 static const BasicBlock *getEntryBB(const Function *F) {
70 return &F->getEntryBlock();
71 }
73 static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
74};
75
76} // end namespace afdo_detail
77
78extern cl::opt<bool> SampleProfileUseProfi;
79
80template <typename BT> class SampleProfileLoaderBaseImpl {
81public:
82 SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
83 : Filename(Name), RemappingFilename(RemapName) {}
84 void dump() { Reader->dump(); }
85
105
109 using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
113
114protected:
117
120 }
123 }
126 }
129 }
130
131 unsigned getFunctionLoc(FunctionT &Func);
137 virtual const FunctionSamples *
140 void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const;
145 ArrayRef<BasicBlockT *> Descendants,
146 PostDominatorTreeT *DomTree);
149 BlockWeightMap &SampleBlockWeights,
151 uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
153 bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
154 void clearFunctionData(bool ResetDT = true);
156 bool
158 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
160 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
161 void
163 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
165
166 /// Map basic blocks to their computed weights.
167 ///
168 /// The weight of a basic block is defined to be the maximum
169 /// of all the instruction weights in that block.
171
172 /// Map edges to their computed weights.
173 ///
174 /// Edge weights are computed by propagating basic block weights in
175 /// SampleProfile::propagateWeights.
177
178 /// Set of visited blocks during propagation.
180
181 /// Set of visited edges during propagation.
183
184 /// Equivalence classes for block weights.
185 ///
186 /// Two blocks BB1 and BB2 are in the same equivalence class if they
187 /// dominate and post-dominate each other, and they are in the same loop
188 /// nest. When this happens, the two blocks are guaranteed to execute
189 /// the same number of times.
191
192 /// Dominance, post-dominance and loop information.
196
197 /// Predecessors for each basic block in the CFG.
199
200 /// Successors for each basic block in the CFG.
202
203 /// Profile coverage tracker.
205
206 /// Profile reader object.
207 std::unique_ptr<SampleProfileReader> Reader;
208
209 /// Samples collected for the body of this function.
211
212 /// Name of the profile file to load.
213 std::string Filename;
214
215 /// Name of the profile remapping file to load.
216 std::string RemappingFilename;
217
218 /// Profile Summary Info computed from sample profile.
220
221 /// Optimization Remark Emitter used to emit diagnostic remarks.
223};
224
225/// Clear all the per-function data used to load samples and propagate weights.
226template <typename BT>
228 BlockWeights.clear();
229 EdgeWeights.clear();
230 VisitedBlocks.clear();
231 VisitedEdges.clear();
232 EquivalenceClass.clear();
233 if (ResetDT) {
234 DT = nullptr;
235 PDT = nullptr;
236 LI = nullptr;
237 }
238 Predecessors.clear();
239 Successors.clear();
240 CoverageTracker.clear();
241}
242
243#ifndef NDEBUG
244/// Print the weight of edge \p E on stream \p OS.
245///
246/// \param OS Stream to emit the output to.
247/// \param E Edge to print.
248template <typename BT>
250 OS << "weight[" << E.first->getName() << "->" << E.second->getName()
251 << "]: " << EdgeWeights[E] << "\n";
252}
253
254/// Print the equivalence class of block \p BB on stream \p OS.
255///
256/// \param OS Stream to emit the output to.
257/// \param BB Block to print.
258template <typename BT>
260 raw_ostream &OS, const BasicBlockT *BB) {
261 const BasicBlockT *Equiv = EquivalenceClass[BB];
262 OS << "equivalence[" << BB->getName()
263 << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
264}
265
266/// Print the weight of block \p BB on stream \p OS.
267///
268/// \param OS Stream to emit the output to.
269/// \param BB Block to print.
270template <typename BT>
272 raw_ostream &OS, const BasicBlockT *BB) const {
273 const auto &I = BlockWeights.find(BB);
274 uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
275 OS << "weight[" << BB->getName() << "]: " << W << "\n";
276}
277#endif
278
279/// Get the weight for an instruction.
280///
281/// The "weight" of an instruction \p Inst is the number of samples
282/// collected on that instruction at runtime. To retrieve it, we
283/// need to compute the line number of \p Inst relative to the start of its
284/// function. We use HeaderLineno to compute the offset. We then
285/// look up the samples collected for \p Inst using BodySamples.
286///
287/// \param Inst Instruction to query.
288///
289/// \returns the weight of \p Inst.
290template <typename BT>
293 return getInstWeightImpl(Inst);
294}
295
296template <typename BT>
299 const FunctionSamples *FS = findFunctionSamples(Inst);
300 if (!FS)
301 return std::error_code();
302
303 const DebugLoc &DLoc = Inst.getDebugLoc();
304 if (!DLoc)
305 return std::error_code();
306
307 const DILocation *DIL = DLoc;
308 uint32_t LineOffset = FunctionSamples::getOffset(DIL);
309 uint32_t Discriminator;
311 Discriminator = DIL->getDiscriminator();
312 else
313 Discriminator = DIL->getBaseDiscriminator();
314
315 ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
316 if (R) {
317 bool FirstMark =
318 CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
319 if (FirstMark) {
320 ORE->emit([&]() {
321 OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
322 Remark << "Applied " << ore::NV("NumSamples", *R);
323 Remark << " samples from profile (offset: ";
324 Remark << ore::NV("LineOffset", LineOffset);
325 if (Discriminator) {
326 Remark << ".";
327 Remark << ore::NV("Discriminator", Discriminator);
328 }
329 Remark << ")";
330 return Remark;
331 });
332 }
333 LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." << Discriminator << ":"
334 << Inst << " (line offset: " << LineOffset << "."
335 << Discriminator << " - weight: " << R.get() << ")\n");
336 }
337 return R;
338}
339
340/// Compute the weight of a basic block.
341///
342/// The weight of basic block \p BB is the maximum weight of all the
343/// instructions in BB.
344///
345/// \param BB The basic block to query.
346///
347/// \returns the weight for \p BB.
348template <typename BT>
351 uint64_t Max = 0;
352 bool HasWeight = false;
353 for (auto &I : *BB) {
354 const ErrorOr<uint64_t> &R = getInstWeight(I);
355 if (R) {
356 Max = std::max(Max, R.get());
357 HasWeight = true;
358 }
359 }
360 return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
361}
362
363/// Compute and store the weights of every basic block.
364///
365/// This populates the BlockWeights map by computing
366/// the weights of every basic block in the CFG.
367///
368/// \param F The function to query.
369template <typename BT>
371 bool Changed = false;
372 LLVM_DEBUG(dbgs() << "Block weights\n");
373 for (const auto &BB : F) {
374 ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
375 if (Weight) {
376 BlockWeights[&BB] = Weight.get();
377 VisitedBlocks.insert(&BB);
378 Changed = true;
379 }
380 LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
381 }
382
383 return Changed;
384}
385
386/// Get the FunctionSamples for an instruction.
387///
388/// The FunctionSamples of an instruction \p Inst is the inlined instance
389/// in which that instruction is coming from. We traverse the inline stack
390/// of that instruction, and match it with the tree nodes in the profile.
391///
392/// \param Inst Instruction to query.
393///
394/// \returns the FunctionSamples pointer to the inlined instance.
395template <typename BT>
397 const InstructionT &Inst) const {
398 const DILocation *DIL = Inst.getDebugLoc();
399 if (!DIL)
400 return Samples;
401
402 auto it = DILocation2SampleMap.try_emplace(DIL, nullptr);
403 if (it.second) {
404 it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
405 }
406 return it.first->second;
407}
408
409/// Find equivalence classes for the given block.
410///
411/// This finds all the blocks that are guaranteed to execute the same
412/// number of times as \p BB1. To do this, it traverses all the
413/// descendants of \p BB1 in the dominator or post-dominator tree.
414///
415/// A block BB2 will be in the same equivalence class as \p BB1 if
416/// the following holds:
417///
418/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
419/// is a descendant of \p BB1 in the dominator tree, then BB2 should
420/// dominate BB1 in the post-dominator tree.
421///
422/// 2- Both BB2 and \p BB1 must be in the same loop.
423///
424/// For every block BB2 that meets those two requirements, we set BB2's
425/// equivalence class to \p BB1.
426///
427/// \param BB1 Block to check.
428/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
429/// \param DomTree Opposite dominator tree. If \p Descendants is filled
430/// with blocks from \p BB1's dominator tree, then
431/// this is the post-dominator tree, and vice versa.
432template <typename BT>
434 BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants,
435 PostDominatorTreeT *DomTree) {
436 const BasicBlockT *EC = EquivalenceClass[BB1];
437 uint64_t Weight = BlockWeights[EC];
438 for (const auto *BB2 : Descendants) {
439 bool IsDomParent = DomTree->dominates(BB2, BB1);
440 bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
441 if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
442 EquivalenceClass[BB2] = EC;
443 // If BB2 is visited, then the entire EC should be marked as visited.
444 if (VisitedBlocks.count(BB2)) {
445 VisitedBlocks.insert(EC);
446 }
447
448 // If BB2 is heavier than BB1, make BB2 have the same weight
449 // as BB1.
450 //
451 // Note that we don't worry about the opposite situation here
452 // (when BB2 is lighter than BB1). We will deal with this
453 // during the propagation phase. Right now, we just want to
454 // make sure that BB1 has the largest weight of all the
455 // members of its equivalence set.
456 Weight = std::max(Weight, BlockWeights[BB2]);
457 }
458 }
459 const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
460 if (EC == EntryBB) {
461 BlockWeights[EC] = Samples->getHeadSamples() + 1;
462 } else {
463 BlockWeights[EC] = Weight;
464 }
465}
466
467/// Find equivalence classes.
468///
469/// Since samples may be missing from blocks, we can fill in the gaps by setting
470/// the weights of all the blocks in the same equivalence class to the same
471/// weight. To compute the concept of equivalence, we use dominance and loop
472/// information. Two blocks B1 and B2 are in the same equivalence class if B1
473/// dominates B2, B2 post-dominates B1 and both are in the same loop.
474///
475/// \param F The function to query.
476template <typename BT>
479 LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
480 // Find equivalence sets based on dominance and post-dominance information.
481 for (auto &BB : F) {
482 BasicBlockT *BB1 = &BB;
483
484 // Compute BB1's equivalence class once.
485 if (EquivalenceClass.count(BB1)) {
486 LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
487 continue;
488 }
489
490 // By default, blocks are in their own equivalence class.
491 EquivalenceClass[BB1] = BB1;
492
493 // Traverse all the blocks dominated by BB1. We are looking for
494 // every basic block BB2 such that:
495 //
496 // 1- BB1 dominates BB2.
497 // 2- BB2 post-dominates BB1.
498 // 3- BB1 and BB2 are in the same loop nest.
499 //
500 // If all those conditions hold, it means that BB2 is executed
501 // as many times as BB1, so they are placed in the same equivalence
502 // class by making BB2's equivalence class be BB1.
503 DominatedBBs.clear();
504 DT->getDescendants(BB1, DominatedBBs);
505 findEquivalencesFor(BB1, DominatedBBs, &*PDT);
506
507 LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
508 }
509
510 // Assign weights to equivalence classes.
511 //
512 // All the basic blocks in the same equivalence class will execute
513 // the same number of times. Since we know that the head block in
514 // each equivalence class has the largest weight, assign that weight
515 // to all the blocks in that equivalence class.
517 dbgs() << "\nAssign the same weight to all blocks in the same class\n");
518 for (auto &BI : F) {
519 const BasicBlockT *BB = &BI;
520 const BasicBlockT *EquivBB = EquivalenceClass[BB];
521 if (BB != EquivBB)
522 BlockWeights[BB] = BlockWeights[EquivBB];
523 LLVM_DEBUG(printBlockWeight(dbgs(), BB));
524 }
525}
526
527/// Visit the given edge to decide if it has a valid weight.
528///
529/// If \p E has not been visited before, we copy to \p UnknownEdge
530/// and increment the count of unknown edges.
531///
532/// \param E Edge to visit.
533/// \param NumUnknownEdges Current number of unknown edges.
534/// \param UnknownEdge Set if E has not been visited before.
535///
536/// \returns E's weight, if known. Otherwise, return 0.
537template <typename BT>
539 unsigned *NumUnknownEdges,
540 Edge *UnknownEdge) {
541 if (!VisitedEdges.count(E)) {
542 (*NumUnknownEdges)++;
543 *UnknownEdge = E;
544 return 0;
545 }
546
547 return EdgeWeights[E];
548}
549
550/// Propagate weights through incoming/outgoing edges.
551///
552/// If the weight of a basic block is known, and there is only one edge
553/// with an unknown weight, we can calculate the weight of that edge.
554///
555/// Similarly, if all the edges have a known count, we can calculate the
556/// count of the basic block, if needed.
557///
558/// \param F Function to process.
559/// \param UpdateBlockCount Whether we should update basic block counts that
560/// has already been annotated.
561///
562/// \returns True if new weights were assigned to edges or blocks.
563template <typename BT>
565 FunctionT &F, bool UpdateBlockCount) {
566 bool Changed = false;
567 LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
568 for (const auto &BI : F) {
569 const BasicBlockT *BB = &BI;
570 const BasicBlockT *EC = EquivalenceClass[BB];
571
572 // Visit all the predecessor and successor edges to determine
573 // which ones have a weight assigned already. Note that it doesn't
574 // matter that we only keep track of a single unknown edge. The
575 // only case we are interested in handling is when only a single
576 // edge is unknown (see setEdgeOrBlockWeight).
577 for (unsigned i = 0; i < 2; i++) {
578 uint64_t TotalWeight = 0;
579 unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
580 Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
581
582 if (i == 0) {
583 // First, visit all predecessor edges.
584 NumTotalEdges = Predecessors[BB].size();
585 for (auto *Pred : Predecessors[BB]) {
586 Edge E = std::make_pair(Pred, BB);
587 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
588 if (E.first == E.second)
589 SelfReferentialEdge = E;
590 }
591 if (NumTotalEdges == 1) {
592 SingleEdge = std::make_pair(Predecessors[BB][0], BB);
593 }
594 } else {
595 // On the second round, visit all successor edges.
596 NumTotalEdges = Successors[BB].size();
597 for (auto *Succ : Successors[BB]) {
598 Edge E = std::make_pair(BB, Succ);
599 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
600 }
601 if (NumTotalEdges == 1) {
602 SingleEdge = std::make_pair(BB, Successors[BB][0]);
603 }
604 }
605
606 // After visiting all the edges, there are three cases that we
607 // can handle immediately:
608 //
609 // - All the edge weights are known (i.e., NumUnknownEdges == 0).
610 // In this case, we simply check that the sum of all the edges
611 // is the same as BB's weight. If not, we change BB's weight
612 // to match. Additionally, if BB had not been visited before,
613 // we mark it visited.
614 //
615 // - Only one edge is unknown and BB has already been visited.
616 // In this case, we can compute the weight of the edge by
617 // subtracting the total block weight from all the known
618 // edge weights. If the edges weight more than BB, then the
619 // edge of the last remaining edge is set to zero.
620 //
621 // - There exists a self-referential edge and the weight of BB is
622 // known. In this case, this edge can be based on BB's weight.
623 // We add up all the other known edges and set the weight on
624 // the self-referential edge as we did in the previous case.
625 //
626 // In any other case, we must continue iterating. Eventually,
627 // all edges will get a weight, or iteration will stop when
628 // it reaches SampleProfileMaxPropagateIterations.
629 if (NumUnknownEdges <= 1) {
630 uint64_t &BBWeight = BlockWeights[EC];
631 if (NumUnknownEdges == 0) {
632 if (!VisitedBlocks.count(EC)) {
633 // If we already know the weight of all edges, the weight of the
634 // basic block can be computed. It should be no larger than the sum
635 // of all edge weights.
636 if (TotalWeight > BBWeight) {
637 BBWeight = TotalWeight;
638 Changed = true;
639 LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
640 << " known. Set weight for block: ";
641 printBlockWeight(dbgs(), BB););
642 }
643 } else if (NumTotalEdges == 1 &&
644 EdgeWeights[SingleEdge] < BlockWeights[EC]) {
645 // If there is only one edge for the visited basic block, use the
646 // block weight to adjust edge weight if edge weight is smaller.
647 EdgeWeights[SingleEdge] = BlockWeights[EC];
648 Changed = true;
649 }
650 } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
651 // If there is a single unknown edge and the block has been
652 // visited, then we can compute E's weight.
653 if (BBWeight >= TotalWeight)
654 EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
655 else
656 EdgeWeights[UnknownEdge] = 0;
657 const BasicBlockT *OtherEC;
658 if (i == 0)
659 OtherEC = EquivalenceClass[UnknownEdge.first];
660 else
661 OtherEC = EquivalenceClass[UnknownEdge.second];
662 // Edge weights should never exceed the BB weights it connects.
663 if (VisitedBlocks.count(OtherEC) &&
664 EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
665 EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
666 VisitedEdges.insert(UnknownEdge);
667 Changed = true;
668 LLVM_DEBUG(dbgs() << "Set weight for edge: ";
669 printEdgeWeight(dbgs(), UnknownEdge));
670 }
671 } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
672 // If a block Weights 0, all its in/out edges should weight 0.
673 if (i == 0) {
674 for (auto *Pred : Predecessors[BB]) {
675 Edge E = std::make_pair(Pred, BB);
676 EdgeWeights[E] = 0;
677 VisitedEdges.insert(E);
678 }
679 } else {
680 for (auto *Succ : Successors[BB]) {
681 Edge E = std::make_pair(BB, Succ);
682 EdgeWeights[E] = 0;
683 VisitedEdges.insert(E);
684 }
685 }
686 } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
687 uint64_t &BBWeight = BlockWeights[BB];
688 // We have a self-referential edge and the weight of BB is known.
689 if (BBWeight >= TotalWeight)
690 EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
691 else
692 EdgeWeights[SelfReferentialEdge] = 0;
693 VisitedEdges.insert(SelfReferentialEdge);
694 Changed = true;
695 LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
696 printEdgeWeight(dbgs(), SelfReferentialEdge));
697 }
698 if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
699 BlockWeights[EC] = TotalWeight;
700 VisitedBlocks.insert(EC);
701 Changed = true;
702 }
703 }
704 }
705
706 return Changed;
707}
708
709/// Build in/out edge lists for each basic block in the CFG.
710///
711/// We are interested in unique edges. If a block B1 has multiple
712/// edges to another block B2, we only add a single B1->B2 edge.
713template <typename BT>
715 for (auto &BI : F) {
716 BasicBlockT *B1 = &BI;
717
718 // Add predecessors for B1.
720 if (!Predecessors[B1].empty())
721 llvm_unreachable("Found a stale predecessors list in a basic block.");
722 for (auto *B2 : getPredecessors(B1))
723 if (Visited.insert(B2).second)
724 Predecessors[B1].push_back(B2);
725
726 // Add successors for B1.
727 Visited.clear();
728 if (!Successors[B1].empty())
729 llvm_unreachable("Found a stale successors list in a basic block.");
730 for (auto *B2 : getSuccessors(B1))
731 if (Visited.insert(B2).second)
732 Successors[B1].push_back(B2);
733 }
734}
735
736/// Propagate weights into edges
737///
738/// The following rules are applied to every block BB in the CFG:
739///
740/// - If BB has a single predecessor/successor, then the weight
741/// of that edge is the weight of the block.
742///
743/// - If all incoming or outgoing edges are known except one, and the
744/// weight of the block is already known, the weight of the unknown
745/// edge will be the weight of the block minus the sum of all the known
746/// edges. If the sum of all the known edges is larger than BB's weight,
747/// we set the unknown edge weight to zero.
748///
749/// - If there is a self-referential edge, and the weight of the block is
750/// known, the weight for that edge is set to the weight of the block
751/// minus the weight of the other incoming edges to that block (if
752/// known).
753template <typename BT>
755 // Flow-based profile inference is only usable with BasicBlock instantiation
756 // of SampleProfileLoaderBaseImpl.
758 // Prepare block sample counts for inference.
759 BlockWeightMap SampleBlockWeights;
760 for (const auto &BI : F) {
761 ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
762 if (Weight)
763 SampleBlockWeights[&BI] = Weight.get();
764 }
765 // Fill in BlockWeights and EdgeWeights using an inference algorithm.
766 applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
767 } else {
768 bool Changed = true;
769 unsigned I = 0;
770
771 // If BB weight is larger than its corresponding loop's header BB weight,
772 // use the BB weight to replace the loop header BB weight.
773 for (auto &BI : F) {
774 BasicBlockT *BB = &BI;
775 LoopT *L = LI->getLoopFor(BB);
776 if (!L) {
777 continue;
778 }
779 BasicBlockT *Header = L->getHeader();
780 if (Header && BlockWeights[BB] > BlockWeights[Header]) {
781 BlockWeights[Header] = BlockWeights[BB];
782 }
783 }
784
785 // Propagate until we converge or we go past the iteration limit.
786 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
787 Changed = propagateThroughEdges(F, false);
788 }
789
790 // The first propagation propagates BB counts from annotated BBs to unknown
791 // BBs. The 2nd propagation pass resets edges weights, and use all BB
792 // weights to propagate edge weights.
793 VisitedEdges.clear();
794 Changed = true;
795 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
796 Changed = propagateThroughEdges(F, false);
797 }
798
799 // The 3rd propagation pass allows adjust annotated BB weights that are
800 // obviously wrong.
801 Changed = true;
802 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
803 Changed = propagateThroughEdges(F, true);
804 }
805 }
806}
807
808template <typename BT>
810 FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
811 BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
812 auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
813 Infer.apply(BlockWeights, EdgeWeights);
814}
815
816/// Generate branch weight metadata for all branches in \p F.
817///
818/// Branch weights are computed out of instruction samples using a
819/// propagation heuristic. Propagation proceeds in 3 phases:
820///
821/// 1- Assignment of block weights. All the basic blocks in the function
822/// are initial assigned the same weight as their most frequently
823/// executed instruction.
824///
825/// 2- Creation of equivalence classes. Since samples may be missing from
826/// blocks, we can fill in the gaps by setting the weights of all the
827/// blocks in the same equivalence class to the same weight. To compute
828/// the concept of equivalence, we use dominance and loop information.
829/// Two blocks B1 and B2 are in the same equivalence class if B1
830/// dominates B2, B2 post-dominates B1 and both are in the same loop.
831///
832/// 3- Propagation of block weights into edges. This uses a simple
833/// propagation heuristic. The following rules are applied to every
834/// block BB in the CFG:
835///
836/// - If BB has a single predecessor/successor, then the weight
837/// of that edge is the weight of the block.
838///
839/// - If all the edges are known except one, and the weight of the
840/// block is already known, the weight of the unknown edge will
841/// be the weight of the block minus the sum of all the known
842/// edges. If the sum of all the known edges is larger than BB's weight,
843/// we set the unknown edge weight to zero.
844///
845/// - If there is a self-referential edge, and the weight of the block is
846/// known, the weight for that edge is set to the weight of the block
847/// minus the weight of the other incoming edges to that block (if
848/// known).
849///
850/// Since this propagation is not guaranteed to finalize for every CFG, we
851/// only allow it to proceed for a limited number of iterations (controlled
852/// by -sample-profile-max-propagate-iterations).
853///
854/// FIXME: Try to replace this propagation heuristic with a scheme
855/// that is guaranteed to finalize. A work-list approach similar to
856/// the standard value propagation algorithm used by SSA-CCP might
857/// work here.
858///
859/// \param F The function to query.
860///
861/// \returns true if \p F was modified. Returns false, otherwise.
862template <typename BT>
864 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
865 bool Changed = (InlinedGUIDs.size() != 0);
866
867 // Compute basic block weights.
868 Changed |= computeBlockWeights(F);
869
870 if (Changed) {
871 // Initialize propagation.
872 initWeightPropagation(F, InlinedGUIDs);
873
874 // Propagate weights to all edges.
875 propagateWeights(F);
876
877 // Post-process propagated weights.
878 finalizeWeightPropagation(F, InlinedGUIDs);
879 }
880
881 return Changed;
882}
883
884template <typename BT>
886 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
887 // Add an entry count to the function using the samples gathered at the
888 // function entry.
889 // Sets the GUIDs that are inlined in the profiled binary. This is used
890 // for ThinLink to make correct liveness analysis, and also make the IR
891 // match the profiled binary before annotation.
893 ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
894 &InlinedGUIDs);
895
897 // Compute dominance and loop info needed for propagation.
898 computeDominanceAndLoopInfo(F);
899
900 // Find equivalence classes.
901 findEquivalenceClasses(F);
902 }
903
904 // Before propagation starts, build, for each block, a list of
905 // unique predecessors and successors. This is necessary to handle
906 // identical edges in multiway branches. Since we visit all blocks and all
907 // edges of the CFG, it is cleaner to build these lists once at the start
908 // of the pass.
909 buildEdges(F);
910}
911
912template <typename BT>
914 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
915 // If we utilize a flow-based count inference, then we trust the computed
916 // counts and set the entry count as computed by the algorithm. This is
917 // primarily done to sync the counts produced by profi and BFI inference,
918 // which uses the entry count for mass propagation.
919 // If profi produces a zero-value for the entry count, we fallback to
920 // Samples->getHeadSamples() + 1 to avoid functions with zero count.
922 const BasicBlockT *EntryBB = getEntryBB(&F);
923 ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
924 if (BlockWeights[EntryBB] > 0) {
926 ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
927 &InlinedGUIDs);
928 }
929 }
930}
931
932template <typename BT>
934 // If coverage checking was requested, compute it now.
935 const Function &Func = getFunction(F);
937 unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
938 unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
939 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
940 if (Coverage < SampleProfileRecordCoverage) {
941 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
942 Func.getSubprogram()->getFilename(), getFunctionLoc(F),
943 Twine(Used) + " of " + Twine(Total) + " available profile records (" +
944 Twine(Coverage) + "%) were applied",
945 DS_Warning));
946 }
947 }
948
950 uint64_t Used = CoverageTracker.getTotalUsedSamples();
951 uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
952 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
953 if (Coverage < SampleProfileSampleCoverage) {
954 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
955 Func.getSubprogram()->getFilename(), getFunctionLoc(F),
956 Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
957 Twine(Coverage) + "%) were applied",
958 DS_Warning));
959 }
960 }
961}
962
963/// Get the line number for the function header.
964///
965/// This looks up function \p F in the current compilation unit and
966/// retrieves the line number where the function is defined. This is
967/// line 0 for all the samples read from the profile file. Every line
968/// number is relative to this line.
969///
970/// \param F Function object to query.
971///
972/// \returns the line number where \p F is defined. If it returns 0,
973/// it means that there is no debug information available for \p F.
974template <typename BT>
976 const Function &Func = getFunction(F);
977 if (DISubprogram *S = Func.getSubprogram())
978 return S->getLine();
979
981 return 0;
982
983 // If the start of \p F is missing, emit a diagnostic to inform the user
984 // about the missed opportunity.
985 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
986 "No debug information found in function " + Func.getName() +
987 ": Function profile not used",
988 DS_Warning));
989 return 0;
990}
991
992template <typename BT>
994 FunctionT &F) {
995 DT.reset(new DominatorTree);
996 DT->recalculate(F);
997
998 PDT.reset(new PostDominatorTree(F));
999
1000 LI.reset(new LoopInfo);
1001 LI->analyze(*DT);
1002}
1003
1004#undef DEBUG_TYPE
1005
1006} // namespace llvm
1007#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
static StringRef getName(Value *V)
This file provides the interface for the profile inference algorithm, profi.
This file provides the utility functions for the sampled PGO loader base implementation.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Debug location.
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
unsigned getLine() const
Definition: DebugLoc.cpp:24
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for the sample profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Represents either an error or a value T.
Definition: ErrorOr.h:56
reference get()
Definition: ErrorOr.h:150
Class to represent profile counts.
Definition: Function.h:252
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:2059
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis providing profile information.
Sample profile inference pass.
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
typename afdo_detail::IRTraits< BT >::BlockFrequencyInfoT BlockFrequencyInfoT
std::string Filename
Name of the profile file to load.
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
const BasicBlockT * getEntryBB(const FunctionT *F)
typename afdo_detail::IRTraits< BT >::OptRemarkEmitterT OptRemarkEmitterT
ErrorOr< uint64_t > getBlockWeight(const BasicBlockT *BB)
Compute the weight of a basic block.
ProfileSummaryInfo * PSI
Profile Summary Info computed from sample profile.
typename afdo_detail::IRTraits< BT >::PostDominatorTreeT PostDominatorTreeT
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
typename afdo_detail::IRTraits< BT >::DominatorTreePtrT DominatorTreePtrT
SmallPtrSet< const BasicBlockT *, 32 > VisitedBlocks
Set of visited blocks during propagation.
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
typename afdo_detail::IRTraits< BT >::PredRangeT PredRangeT
void applyProfi(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights)
typename afdo_detail::IRTraits< BT >::PostDominatorTreePtrT PostDominatorTreePtrT
void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const
Print the weight of block BB on stream OS.
void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB)
Print the equivalence class of block BB on stream OS.
std::string RemappingFilename
Name of the profile remapping file to load.
SampleCoverageTracker CoverageTracker
Profile coverage tracker.
SmallSet< Edge, 32 > VisitedEdges
Set of visited edges during propagation.
BlockEdgeMap Predecessors
Predecessors for each basic block in the CFG.
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount)
Propagate weights through incoming/outgoing edges.
typename afdo_detail::IRTraits< BT >::FunctionT FunctionT
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge)
Visit the given edge to decide if it has a valid weight.
void initWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
typename afdo_detail::IRTraits< BT >::InstructionT InstructionT
BlockEdgeMap Successors
Successors for each basic block in the CFG.
void finalizeWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
typename afdo_detail::IRTraits< BT >::SuccRangeT SuccRangeT
bool computeBlockWeights(FunctionT &F)
Compute and store the weights of every basic block.
typename afdo_detail::IRTraits< BT >::OptRemarkAnalysisT OptRemarkAnalysisT
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
FunctionSamples * Samples
Samples collected for the body of this function.
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
typename afdo_detail::IRTraits< BT >::LoopT LoopT
void findEquivalenceClasses(FunctionT &F)
Find equivalence classes.
OptRemarkEmitterT * ORE
Optimization Remark Emitter used to emit diagnostic remarks.
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
typename afdo_detail::IRTraits< BT >::BasicBlockT BasicBlockT
void buildEdges(FunctionT &F)
Build in/out edge lists for each basic block in the CFG.
void findEquivalencesFor(BasicBlockT *BB1, ArrayRef< BasicBlockT * > Descendants, PostDominatorTreeT *DomTree)
Find equivalence classes for the given block.
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
DenseMap< const DILocation *, const FunctionSamples * > DILocation2SampleMap
void printEdgeWeight(raw_ostream &OS, Edge E)
Print the weight of edge E on stream OS.
typename afdo_detail::IRTraits< BT >::LoopInfoPtrT LoopInfoPtrT
void propagateWeights(FunctionT &F)
Propagate weights into edges.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
size_type size() const
Definition: DenseSet.h:81
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
Representation of the samples collected for a function.
Definition: SampleProf.h:718
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:216
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
cl::opt< unsigned > SampleProfileSampleCoverage
cl::opt< unsigned > SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileMaxPropagateIterations
cl::opt< bool > SampleProfileUseProfi
cl::opt< bool > EnableFSDiscriminator
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< succ_iterator > succ_range
Definition: CFG.h:244
cl::opt< bool > NoWarnSampleUnused
iterator_range< pred_iterator > pred_range
Definition: CFG.h:107
@ DS_Warning
auto predecessors(const MachineBasicBlock *BB)
std::unique_ptr< PostDominatorTree > PostDominatorTreePtrT
static pred_range getPredecessors(BasicBlock *BB)
static succ_range getSuccessors(BasicBlock *BB)
std::unique_ptr< DominatorTree > DominatorTreePtrT
static const BasicBlock * getEntryBB(const Function *F)