LLVM 20.0.0git
SampleProfileLoaderBaseImpl.h
Go to the documentation of this file.
1////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides the interface for the sampled PGO profile loader base
11/// implementation.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
16#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/DenseSet.h"
23#include "llvm/ADT/SmallSet.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/CFG.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/IR/Dominators.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/PseudoProbe.h"
46
47namespace llvm {
48using namespace sampleprof;
49using namespace sampleprofutil;
51
52namespace vfs {
53class FileSystem;
54} // namespace vfs
55
56#define DEBUG_TYPE "sample-profile-impl"
57
58namespace afdo_detail {
59
60template <typename BlockT> struct IRTraits;
61template <> struct IRTraits<BasicBlock> {
66 using LoopT = Loop;
67 using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
68 using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
70 using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
75 static Function &getFunction(Function &F) { return F; }
76 static const BasicBlock *getEntryBB(const Function *F) {
77 return &F->getEntryBlock();
78 }
80 static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
81};
82
83} // end namespace afdo_detail
84
85// This class serves sample counts correlation for SampleProfileLoader by
86// analyzing pseudo probes and their function descriptors injected by
87// SampleProfileProber.
90
91public:
93 if (NamedMDNode *FuncInfo =
94 M.getNamedMetadata(PseudoProbeDescMetadataName)) {
95 for (const auto *Operand : FuncInfo->operands()) {
96 const auto *MD = cast<MDNode>(Operand);
97 auto GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))
98 ->getZExtValue();
99 auto Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))
100 ->getZExtValue();
101 GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
102 }
103 }
104 }
105
107 auto I = GUIDToProbeDescMap.find(GUID);
108 return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
109 }
110
111 const PseudoProbeDescriptor *getDesc(StringRef FProfileName) const {
112 return getDesc(Function::getGUID(FProfileName));
113 }
114
117 }
118
120 const FunctionSamples &Samples) const {
121 return FuncDesc.getFunctionHash() != Samples.getFunctionHash();
122 }
123
124 bool moduleIsProbed(const Module &M) const {
125 return M.getNamedMetadata(PseudoProbeDescMetadataName);
126 }
127
128 bool profileIsValid(const Function &F, const FunctionSamples &Samples) const {
129 const auto *Desc = getDesc(F);
130 bool IsAvailableExternallyLinkage =
132 // Always check the function attribute to determine checksum mismatch for
133 // `available_externally` functions even if their desc are available. This
134 // is because the desc is computed based on the original internal function
135 // and it's substituted by the `available_externally` function during link
136 // time. However, when unstable IR or ODR violation issue occurs, the
137 // definitions of the same function across different translation units could
138 // be different and result in different checksums. So we should use the
139 // state from the new (available_externally) function, which is saved in its
140 // attribute.
141 // TODO: If the function's profile only exists as nested inlinee profile in
142 // a different module, we don't have the attr mismatch state(unknown), we
143 // need to fix it later.
144 if (IsAvailableExternallyLinkage || !Desc)
145 return !F.hasFnAttribute("profile-checksum-mismatch");
146
147 return Desc && !profileIsHashMismatched(*Desc, Samples);
148 }
149};
150
151
152
153extern cl::opt<bool> SampleProfileUseProfi;
154
155static inline bool skipProfileForFunction(const Function &F) {
156 return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile");
157}
158
159static inline void
161 std::vector<Function *> &FunctionOrderList) {
162 CG.buildRefSCCs();
164 for (LazyCallGraph::SCC &C : RC) {
165 for (LazyCallGraph::Node &N : C) {
166 Function &F = N.getFunction();
168 FunctionOrderList.push_back(&F);
169 }
170 }
171 }
172 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
173}
174
175template <typename FT> class SampleProfileLoaderBaseImpl {
176public:
177 SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName,
179 : Filename(Name), RemappingFilename(RemapName), FS(std::move(FS)) {}
180 void dump() { Reader->dump(); }
181
183 using BT = std::remove_pointer_t<NodeRef>;
203
207 using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
211
212protected:
215
218 }
221 }
224 }
227 }
228
229 unsigned getFunctionLoc(FunctionT &Func);
236 virtual const FunctionSamples *
244 ArrayRef<BasicBlockT *> Descendants,
245 PostDominatorTreeT *DomTree);
248 BlockWeightMap &SampleBlockWeights,
250 uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
252 bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
253 void clearFunctionData(bool ResetDT = true);
255 bool
257 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
259 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
260 void
262 const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
264
265 /// Map basic blocks to their computed weights.
266 ///
267 /// The weight of a basic block is defined to be the maximum
268 /// of all the instruction weights in that block.
270
271 /// Map edges to their computed weights.
272 ///
273 /// Edge weights are computed by propagating basic block weights in
274 /// SampleProfile::propagateWeights.
276
277 /// Set of visited blocks during propagation.
279
280 /// Set of visited edges during propagation.
282
283 /// Equivalence classes for block weights.
284 ///
285 /// Two blocks BB1 and BB2 are in the same equivalence class if they
286 /// dominate and post-dominate each other, and they are in the same loop
287 /// nest. When this happens, the two blocks are guaranteed to execute
288 /// the same number of times.
290
291 /// Dominance, post-dominance and loop information.
295
296 /// Predecessors for each basic block in the CFG.
298
299 /// Successors for each basic block in the CFG.
301
302 /// Profile coverage tracker.
304
305 /// Profile reader object.
306 std::unique_ptr<SampleProfileReader> Reader;
307
308 /// Synthetic samples created by duplicating the samples of inlined functions
309 /// from the original profile as if they were top level sample profiles.
310 /// Use std::map because insertion may happen while its content is referenced.
311 std::map<SampleContext, FunctionSamples> OutlineFunctionSamples;
312
313 // A pseudo probe helper to correlate the imported sample counts.
314 std::unique_ptr<PseudoProbeManager> ProbeManager;
315
316 /// Samples collected for the body of this function.
318
319 /// Name of the profile file to load.
320 std::string Filename;
321
322 /// Name of the profile remapping file to load.
323 std::string RemappingFilename;
324
325 /// VirtualFileSystem to load profile files from.
327
328 /// Profile Summary Info computed from sample profile.
330
331 /// Optimization Remark Emitter used to emit diagnostic remarks.
333};
334
335/// Clear all the per-function data used to load samples and propagate weights.
336template <typename BT>
338 BlockWeights.clear();
339 EdgeWeights.clear();
340 VisitedBlocks.clear();
341 VisitedEdges.clear();
342 EquivalenceClass.clear();
343 if (ResetDT) {
344 DT = nullptr;
345 PDT = nullptr;
346 LI = nullptr;
347 }
348 Predecessors.clear();
349 Successors.clear();
350 CoverageTracker.clear();
351}
352
353#ifndef NDEBUG
354/// Print the weight of edge \p E on stream \p OS.
355///
356/// \param OS Stream to emit the output to.
357/// \param E Edge to print.
358template <typename BT>
360 OS << "weight[" << E.first->getName() << "->" << E.second->getName()
361 << "]: " << EdgeWeights[E] << "\n";
362}
363
364/// Print the equivalence class of block \p BB on stream \p OS.
365///
366/// \param OS Stream to emit the output to.
367/// \param BB Block to print.
368template <typename BT>
370 raw_ostream &OS, const BasicBlockT *BB) {
371 const BasicBlockT *Equiv = EquivalenceClass[BB];
372 OS << "equivalence[" << BB->getName()
373 << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
374}
375
376/// Print the weight of block \p BB on stream \p OS.
377///
378/// \param OS Stream to emit the output to.
379/// \param BB Block to print.
380template <typename BT>
382 raw_ostream &OS, const BasicBlockT *BB) const {
383 const auto &I = BlockWeights.find(BB);
384 uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
385 OS << "weight[" << BB->getName() << "]: " << W << "\n";
386}
387#endif
388
389/// Get the weight for an instruction.
390///
391/// The "weight" of an instruction \p Inst is the number of samples
392/// collected on that instruction at runtime. To retrieve it, we
393/// need to compute the line number of \p Inst relative to the start of its
394/// function. We use HeaderLineno to compute the offset. We then
395/// look up the samples collected for \p Inst using BodySamples.
396///
397/// \param Inst Instruction to query.
398///
399/// \returns the weight of \p Inst.
400template <typename BT>
404 return getProbeWeight(Inst);
405 return getInstWeightImpl(Inst);
406}
407
408template <typename BT>
411 const FunctionSamples *FS = findFunctionSamples(Inst);
412 if (!FS)
413 return std::error_code();
414
415 const DebugLoc &DLoc = Inst.getDebugLoc();
416 if (!DLoc)
417 return std::error_code();
418
419 const DILocation *DIL = DLoc;
420 uint32_t LineOffset = FunctionSamples::getOffset(DIL);
421 uint32_t Discriminator;
423 Discriminator = DIL->getDiscriminator();
424 else
425 Discriminator = DIL->getBaseDiscriminator();
426
427 ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
428 if (R) {
429 bool FirstMark =
430 CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
431 if (FirstMark) {
432 ORE->emit([&]() {
433 OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
434 Remark << "Applied " << ore::NV("NumSamples", *R);
435 Remark << " samples from profile (offset: ";
436 Remark << ore::NV("LineOffset", LineOffset);
437 if (Discriminator) {
438 Remark << ".";
439 Remark << ore::NV("Discriminator", Discriminator);
440 }
441 Remark << ")";
442 return Remark;
443 });
444 }
445 LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." << Discriminator << ":"
446 << Inst << " (line offset: " << LineOffset << "."
447 << Discriminator << " - weight: " << R.get() << ")\n");
448 }
449 return R;
450}
451
452// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
453// of non-probe instruction. So if all instructions of the BB give error_code,
454// tell the inference algorithm to infer the BB weight.
455template <typename BT>
459 "Profile is not pseudo probe based");
460 std::optional<PseudoProbe> Probe = extractProbe(Inst);
461 // Ignore the non-probe instruction. If none of the instruction in the BB is
462 // probe, we choose to infer the BB's weight.
463 if (!Probe)
464 return std::error_code();
465
466 const FunctionSamples *FS = findFunctionSamples(Inst);
467 // If none of the instruction has FunctionSample, we choose to return zero
468 // value sample to indicate the BB is cold. This could happen when the
469 // instruction is from inlinee and no profile data is found.
470 // FIXME: This should not be affected by the source drift issue as 1) if the
471 // newly added function is top-level inliner, it won't match the CFG checksum
472 // in the function profile or 2) if it's the inlinee, the inlinee should have
473 // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
474 // we can improve it by adding a switch for profile-sample-block-accurate for
475 // block level counts in the future.
476 if (!FS)
477 return 0;
478
479 auto R = FS->findSamplesAt(Probe->Id, Probe->Discriminator);
480 if (R) {
481 uint64_t Samples = R.get() * Probe->Factor;
482 bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
483 if (FirstMark) {
484 ORE->emit([&]() {
485 OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
486 Remark << "Applied " << ore::NV("NumSamples", Samples);
487 Remark << " samples from profile (ProbeId=";
488 Remark << ore::NV("ProbeId", Probe->Id);
489 if (Probe->Discriminator) {
490 Remark << ".";
491 Remark << ore::NV("Discriminator", Probe->Discriminator);
492 }
493 Remark << ", Factor=";
494 Remark << ore::NV("Factor", Probe->Factor);
495 Remark << ", OriginalSamples=";
496 Remark << ore::NV("OriginalSamples", R.get());
497 Remark << ")";
498 return Remark;
499 });
500 }
501 LLVM_DEBUG({dbgs() << " " << Probe->Id;
502 if (Probe->Discriminator)
503 dbgs() << "." << Probe->Discriminator;
504 dbgs() << ":" << Inst << " - weight: " << R.get()
505 << " - factor: " << format("%0.2f", Probe->Factor) << ")\n";});
506 return Samples;
507 }
508 return R;
509}
510
511/// Compute the weight of a basic block.
512///
513/// The weight of basic block \p BB is the maximum weight of all the
514/// instructions in BB.
515///
516/// \param BB The basic block to query.
517///
518/// \returns the weight for \p BB.
519template <typename BT>
522 uint64_t Max = 0;
523 bool HasWeight = false;
524 for (auto &I : *BB) {
525 const ErrorOr<uint64_t> &R = getInstWeight(I);
526 if (R) {
527 Max = std::max(Max, R.get());
528 HasWeight = true;
529 }
530 }
531 return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
532}
533
534/// Compute and store the weights of every basic block.
535///
536/// This populates the BlockWeights map by computing
537/// the weights of every basic block in the CFG.
538///
539/// \param F The function to query.
540template <typename BT>
542 bool Changed = false;
543 LLVM_DEBUG(dbgs() << "Block weights\n");
544 for (const auto &BB : F) {
545 ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
546 if (Weight) {
547 BlockWeights[&BB] = Weight.get();
548 VisitedBlocks.insert(&BB);
549 Changed = true;
550 }
551 LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
552 }
553
554 return Changed;
555}
556
557/// Get the FunctionSamples for an instruction.
558///
559/// The FunctionSamples of an instruction \p Inst is the inlined instance
560/// in which that instruction is coming from. We traverse the inline stack
561/// of that instruction, and match it with the tree nodes in the profile.
562///
563/// \param Inst Instruction to query.
564///
565/// \returns the FunctionSamples pointer to the inlined instance.
566template <typename BT>
568 const InstructionT &Inst) const {
569 const DILocation *DIL = Inst.getDebugLoc();
570 if (!DIL)
571 return Samples;
572
573 auto it = DILocation2SampleMap.try_emplace(DIL, nullptr);
574 if (it.second) {
575 it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
576 }
577 return it.first->second;
578}
579
580/// Find equivalence classes for the given block.
581///
582/// This finds all the blocks that are guaranteed to execute the same
583/// number of times as \p BB1. To do this, it traverses all the
584/// descendants of \p BB1 in the dominator or post-dominator tree.
585///
586/// A block BB2 will be in the same equivalence class as \p BB1 if
587/// the following holds:
588///
589/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
590/// is a descendant of \p BB1 in the dominator tree, then BB2 should
591/// dominate BB1 in the post-dominator tree.
592///
593/// 2- Both BB2 and \p BB1 must be in the same loop.
594///
595/// For every block BB2 that meets those two requirements, we set BB2's
596/// equivalence class to \p BB1.
597///
598/// \param BB1 Block to check.
599/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
600/// \param DomTree Opposite dominator tree. If \p Descendants is filled
601/// with blocks from \p BB1's dominator tree, then
602/// this is the post-dominator tree, and vice versa.
603template <typename BT>
605 BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants,
606 PostDominatorTreeT *DomTree) {
607 const BasicBlockT *EC = EquivalenceClass[BB1];
608 uint64_t Weight = BlockWeights[EC];
609 for (const auto *BB2 : Descendants) {
610 bool IsDomParent = DomTree->dominates(BB2, BB1);
611 bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
612 if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
613 EquivalenceClass[BB2] = EC;
614 // If BB2 is visited, then the entire EC should be marked as visited.
615 if (VisitedBlocks.count(BB2)) {
616 VisitedBlocks.insert(EC);
617 }
618
619 // If BB2 is heavier than BB1, make BB2 have the same weight
620 // as BB1.
621 //
622 // Note that we don't worry about the opposite situation here
623 // (when BB2 is lighter than BB1). We will deal with this
624 // during the propagation phase. Right now, we just want to
625 // make sure that BB1 has the largest weight of all the
626 // members of its equivalence set.
627 Weight = std::max(Weight, BlockWeights[BB2]);
628 }
629 }
630 const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
631 if (EC == EntryBB) {
632 BlockWeights[EC] = Samples->getHeadSamples() + 1;
633 } else {
634 BlockWeights[EC] = Weight;
635 }
636}
637
638/// Find equivalence classes.
639///
640/// Since samples may be missing from blocks, we can fill in the gaps by setting
641/// the weights of all the blocks in the same equivalence class to the same
642/// weight. To compute the concept of equivalence, we use dominance and loop
643/// information. Two blocks B1 and B2 are in the same equivalence class if B1
644/// dominates B2, B2 post-dominates B1 and both are in the same loop.
645///
646/// \param F The function to query.
647template <typename BT>
650 LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
651 // Find equivalence sets based on dominance and post-dominance information.
652 for (auto &BB : F) {
653 BasicBlockT *BB1 = &BB;
654
655 // Compute BB1's equivalence class once.
656 if (EquivalenceClass.count(BB1)) {
657 LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
658 continue;
659 }
660
661 // By default, blocks are in their own equivalence class.
662 EquivalenceClass[BB1] = BB1;
663
664 // Traverse all the blocks dominated by BB1. We are looking for
665 // every basic block BB2 such that:
666 //
667 // 1- BB1 dominates BB2.
668 // 2- BB2 post-dominates BB1.
669 // 3- BB1 and BB2 are in the same loop nest.
670 //
671 // If all those conditions hold, it means that BB2 is executed
672 // as many times as BB1, so they are placed in the same equivalence
673 // class by making BB2's equivalence class be BB1.
674 DominatedBBs.clear();
675 DT->getDescendants(BB1, DominatedBBs);
676 findEquivalencesFor(BB1, DominatedBBs, &*PDT);
677
678 LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
679 }
680
681 // Assign weights to equivalence classes.
682 //
683 // All the basic blocks in the same equivalence class will execute
684 // the same number of times. Since we know that the head block in
685 // each equivalence class has the largest weight, assign that weight
686 // to all the blocks in that equivalence class.
688 dbgs() << "\nAssign the same weight to all blocks in the same class\n");
689 for (auto &BI : F) {
690 const BasicBlockT *BB = &BI;
691 const BasicBlockT *EquivBB = EquivalenceClass[BB];
692 if (BB != EquivBB)
693 BlockWeights[BB] = BlockWeights[EquivBB];
694 LLVM_DEBUG(printBlockWeight(dbgs(), BB));
695 }
696}
697
698/// Visit the given edge to decide if it has a valid weight.
699///
700/// If \p E has not been visited before, we copy to \p UnknownEdge
701/// and increment the count of unknown edges.
702///
703/// \param E Edge to visit.
704/// \param NumUnknownEdges Current number of unknown edges.
705/// \param UnknownEdge Set if E has not been visited before.
706///
707/// \returns E's weight, if known. Otherwise, return 0.
708template <typename BT>
710 unsigned *NumUnknownEdges,
711 Edge *UnknownEdge) {
712 if (!VisitedEdges.count(E)) {
713 (*NumUnknownEdges)++;
714 *UnknownEdge = E;
715 return 0;
716 }
717
718 return EdgeWeights[E];
719}
720
721/// Propagate weights through incoming/outgoing edges.
722///
723/// If the weight of a basic block is known, and there is only one edge
724/// with an unknown weight, we can calculate the weight of that edge.
725///
726/// Similarly, if all the edges have a known count, we can calculate the
727/// count of the basic block, if needed.
728///
729/// \param F Function to process.
730/// \param UpdateBlockCount Whether we should update basic block counts that
731/// has already been annotated.
732///
733/// \returns True if new weights were assigned to edges or blocks.
734template <typename BT>
736 FunctionT &F, bool UpdateBlockCount) {
737 bool Changed = false;
738 LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
739 for (const auto &BI : F) {
740 const BasicBlockT *BB = &BI;
741 const BasicBlockT *EC = EquivalenceClass[BB];
742
743 // Visit all the predecessor and successor edges to determine
744 // which ones have a weight assigned already. Note that it doesn't
745 // matter that we only keep track of a single unknown edge. The
746 // only case we are interested in handling is when only a single
747 // edge is unknown (see setEdgeOrBlockWeight).
748 for (unsigned i = 0; i < 2; i++) {
749 uint64_t TotalWeight = 0;
750 unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
751 Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
752
753 if (i == 0) {
754 // First, visit all predecessor edges.
755 NumTotalEdges = Predecessors[BB].size();
756 for (auto *Pred : Predecessors[BB]) {
757 Edge E = std::make_pair(Pred, BB);
758 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
759 if (E.first == E.second)
760 SelfReferentialEdge = E;
761 }
762 if (NumTotalEdges == 1) {
763 SingleEdge = std::make_pair(Predecessors[BB][0], BB);
764 }
765 } else {
766 // On the second round, visit all successor edges.
767 NumTotalEdges = Successors[BB].size();
768 for (auto *Succ : Successors[BB]) {
769 Edge E = std::make_pair(BB, Succ);
770 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
771 }
772 if (NumTotalEdges == 1) {
773 SingleEdge = std::make_pair(BB, Successors[BB][0]);
774 }
775 }
776
777 // After visiting all the edges, there are three cases that we
778 // can handle immediately:
779 //
780 // - All the edge weights are known (i.e., NumUnknownEdges == 0).
781 // In this case, we simply check that the sum of all the edges
782 // is the same as BB's weight. If not, we change BB's weight
783 // to match. Additionally, if BB had not been visited before,
784 // we mark it visited.
785 //
786 // - Only one edge is unknown and BB has already been visited.
787 // In this case, we can compute the weight of the edge by
788 // subtracting the total block weight from all the known
789 // edge weights. If the edges weight more than BB, then the
790 // edge of the last remaining edge is set to zero.
791 //
792 // - There exists a self-referential edge and the weight of BB is
793 // known. In this case, this edge can be based on BB's weight.
794 // We add up all the other known edges and set the weight on
795 // the self-referential edge as we did in the previous case.
796 //
797 // In any other case, we must continue iterating. Eventually,
798 // all edges will get a weight, or iteration will stop when
799 // it reaches SampleProfileMaxPropagateIterations.
800 if (NumUnknownEdges <= 1) {
801 uint64_t &BBWeight = BlockWeights[EC];
802 if (NumUnknownEdges == 0) {
803 if (!VisitedBlocks.count(EC)) {
804 // If we already know the weight of all edges, the weight of the
805 // basic block can be computed. It should be no larger than the sum
806 // of all edge weights.
807 if (TotalWeight > BBWeight) {
808 BBWeight = TotalWeight;
809 Changed = true;
810 LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
811 << " known. Set weight for block: ";
812 printBlockWeight(dbgs(), BB););
813 }
814 } else if (NumTotalEdges == 1 &&
815 EdgeWeights[SingleEdge] < BlockWeights[EC]) {
816 // If there is only one edge for the visited basic block, use the
817 // block weight to adjust edge weight if edge weight is smaller.
818 EdgeWeights[SingleEdge] = BlockWeights[EC];
819 Changed = true;
820 }
821 } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
822 // If there is a single unknown edge and the block has been
823 // visited, then we can compute E's weight.
824 if (BBWeight >= TotalWeight)
825 EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
826 else
827 EdgeWeights[UnknownEdge] = 0;
828 const BasicBlockT *OtherEC;
829 if (i == 0)
830 OtherEC = EquivalenceClass[UnknownEdge.first];
831 else
832 OtherEC = EquivalenceClass[UnknownEdge.second];
833 // Edge weights should never exceed the BB weights it connects.
834 if (VisitedBlocks.count(OtherEC) &&
835 EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
836 EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
837 VisitedEdges.insert(UnknownEdge);
838 Changed = true;
839 LLVM_DEBUG(dbgs() << "Set weight for edge: ";
840 printEdgeWeight(dbgs(), UnknownEdge));
841 }
842 } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
843 // If a block Weights 0, all its in/out edges should weight 0.
844 if (i == 0) {
845 for (auto *Pred : Predecessors[BB]) {
846 Edge E = std::make_pair(Pred, BB);
847 EdgeWeights[E] = 0;
848 VisitedEdges.insert(E);
849 }
850 } else {
851 for (auto *Succ : Successors[BB]) {
852 Edge E = std::make_pair(BB, Succ);
853 EdgeWeights[E] = 0;
854 VisitedEdges.insert(E);
855 }
856 }
857 } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
858 uint64_t &BBWeight = BlockWeights[BB];
859 // We have a self-referential edge and the weight of BB is known.
860 if (BBWeight >= TotalWeight)
861 EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
862 else
863 EdgeWeights[SelfReferentialEdge] = 0;
864 VisitedEdges.insert(SelfReferentialEdge);
865 Changed = true;
866 LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
867 printEdgeWeight(dbgs(), SelfReferentialEdge));
868 }
869 if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
870 BlockWeights[EC] = TotalWeight;
871 VisitedBlocks.insert(EC);
872 Changed = true;
873 }
874 }
875 }
876
877 return Changed;
878}
879
880/// Build in/out edge lists for each basic block in the CFG.
881///
882/// We are interested in unique edges. If a block B1 has multiple
883/// edges to another block B2, we only add a single B1->B2 edge.
884template <typename BT>
886 for (auto &BI : F) {
887 BasicBlockT *B1 = &BI;
888
889 // Add predecessors for B1.
891 if (!Predecessors[B1].empty())
892 llvm_unreachable("Found a stale predecessors list in a basic block.");
893 for (auto *B2 : getPredecessors(B1))
894 if (Visited.insert(B2).second)
895 Predecessors[B1].push_back(B2);
896
897 // Add successors for B1.
898 Visited.clear();
899 if (!Successors[B1].empty())
900 llvm_unreachable("Found a stale successors list in a basic block.");
901 for (auto *B2 : getSuccessors(B1))
902 if (Visited.insert(B2).second)
903 Successors[B1].push_back(B2);
904 }
905}
906
907/// Propagate weights into edges
908///
909/// The following rules are applied to every block BB in the CFG:
910///
911/// - If BB has a single predecessor/successor, then the weight
912/// of that edge is the weight of the block.
913///
914/// - If all incoming or outgoing edges are known except one, and the
915/// weight of the block is already known, the weight of the unknown
916/// edge will be the weight of the block minus the sum of all the known
917/// edges. If the sum of all the known edges is larger than BB's weight,
918/// we set the unknown edge weight to zero.
919///
920/// - If there is a self-referential edge, and the weight of the block is
921/// known, the weight for that edge is set to the weight of the block
922/// minus the weight of the other incoming edges to that block (if
923/// known).
924template <typename BT>
926 // Flow-based profile inference is only usable with BasicBlock instantiation
927 // of SampleProfileLoaderBaseImpl.
929 // Prepare block sample counts for inference.
930 BlockWeightMap SampleBlockWeights;
931 for (const auto &BI : F) {
932 ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
933 if (Weight)
934 SampleBlockWeights[&BI] = Weight.get();
935 }
936 // Fill in BlockWeights and EdgeWeights using an inference algorithm.
937 applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
938 } else {
939 bool Changed = true;
940 unsigned I = 0;
941
942 // If BB weight is larger than its corresponding loop's header BB weight,
943 // use the BB weight to replace the loop header BB weight.
944 for (auto &BI : F) {
945 BasicBlockT *BB = &BI;
946 LoopT *L = LI->getLoopFor(BB);
947 if (!L) {
948 continue;
949 }
950 BasicBlockT *Header = L->getHeader();
951 if (Header && BlockWeights[BB] > BlockWeights[Header]) {
952 BlockWeights[Header] = BlockWeights[BB];
953 }
954 }
955
956 // Propagate until we converge or we go past the iteration limit.
957 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
958 Changed = propagateThroughEdges(F, false);
959 }
960
961 // The first propagation propagates BB counts from annotated BBs to unknown
962 // BBs. The 2nd propagation pass resets edges weights, and use all BB
963 // weights to propagate edge weights.
964 VisitedEdges.clear();
965 Changed = true;
966 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
967 Changed = propagateThroughEdges(F, false);
968 }
969
970 // The 3rd propagation pass allows adjust annotated BB weights that are
971 // obviously wrong.
972 Changed = true;
973 while (Changed && I++ < SampleProfileMaxPropagateIterations) {
974 Changed = propagateThroughEdges(F, true);
975 }
976 }
977}
978
979template <typename FT>
981 FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
982 BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
983 auto Infer = SampleProfileInference<FT>(F, Successors, SampleBlockWeights);
984 Infer.apply(BlockWeights, EdgeWeights);
985}
986
987/// Generate branch weight metadata for all branches in \p F.
988///
989/// Branch weights are computed out of instruction samples using a
990/// propagation heuristic. Propagation proceeds in 3 phases:
991///
992/// 1- Assignment of block weights. All the basic blocks in the function
993/// are initial assigned the same weight as their most frequently
994/// executed instruction.
995///
996/// 2- Creation of equivalence classes. Since samples may be missing from
997/// blocks, we can fill in the gaps by setting the weights of all the
998/// blocks in the same equivalence class to the same weight. To compute
999/// the concept of equivalence, we use dominance and loop information.
1000/// Two blocks B1 and B2 are in the same equivalence class if B1
1001/// dominates B2, B2 post-dominates B1 and both are in the same loop.
1002///
1003/// 3- Propagation of block weights into edges. This uses a simple
1004/// propagation heuristic. The following rules are applied to every
1005/// block BB in the CFG:
1006///
1007/// - If BB has a single predecessor/successor, then the weight
1008/// of that edge is the weight of the block.
1009///
1010/// - If all the edges are known except one, and the weight of the
1011/// block is already known, the weight of the unknown edge will
1012/// be the weight of the block minus the sum of all the known
1013/// edges. If the sum of all the known edges is larger than BB's weight,
1014/// we set the unknown edge weight to zero.
1015///
1016/// - If there is a self-referential edge, and the weight of the block is
1017/// known, the weight for that edge is set to the weight of the block
1018/// minus the weight of the other incoming edges to that block (if
1019/// known).
1020///
1021/// Since this propagation is not guaranteed to finalize for every CFG, we
1022/// only allow it to proceed for a limited number of iterations (controlled
1023/// by -sample-profile-max-propagate-iterations).
1024///
1025/// FIXME: Try to replace this propagation heuristic with a scheme
1026/// that is guaranteed to finalize. A work-list approach similar to
1027/// the standard value propagation algorithm used by SSA-CCP might
1028/// work here.
1029///
1030/// \param F The function to query.
1031///
1032/// \returns true if \p F was modified. Returns false, otherwise.
1033template <typename BT>
1035 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1036 bool Changed = (InlinedGUIDs.size() != 0);
1037
1038 // Compute basic block weights.
1039 Changed |= computeBlockWeights(F);
1040
1041 if (Changed) {
1042 // Initialize propagation.
1043 initWeightPropagation(F, InlinedGUIDs);
1044
1045 // Propagate weights to all edges.
1046 propagateWeights(F);
1047
1048 // Post-process propagated weights.
1049 finalizeWeightPropagation(F, InlinedGUIDs);
1050 }
1051
1052 return Changed;
1053}
1054
1055template <typename BT>
1057 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1058 // Add an entry count to the function using the samples gathered at the
1059 // function entry.
1060 // Sets the GUIDs that are inlined in the profiled binary. This is used
1061 // for ThinLink to make correct liveness analysis, and also make the IR
1062 // match the profiled binary before annotation.
1064 ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
1065 &InlinedGUIDs);
1066
1067 if (!SampleProfileUseProfi) {
1068 // Compute dominance and loop info needed for propagation.
1069 computeDominanceAndLoopInfo(F);
1070
1071 // Find equivalence classes.
1072 findEquivalenceClasses(F);
1073 }
1074
1075 // Before propagation starts, build, for each block, a list of
1076 // unique predecessors and successors. This is necessary to handle
1077 // identical edges in multiway branches. Since we visit all blocks and all
1078 // edges of the CFG, it is cleaner to build these lists once at the start
1079 // of the pass.
1080 buildEdges(F);
1081}
1082
1083template <typename BT>
1085 FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1086 // If we utilize a flow-based count inference, then we trust the computed
1087 // counts and set the entry count as computed by the algorithm. This is
1088 // primarily done to sync the counts produced by profi and BFI inference,
1089 // which uses the entry count for mass propagation.
1090 // If profi produces a zero-value for the entry count, we fallback to
1091 // Samples->getHeadSamples() + 1 to avoid functions with zero count.
1093 const BasicBlockT *EntryBB = getEntryBB(&F);
1094 ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
1095 if (BlockWeights[EntryBB] > 0) {
1097 ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
1098 &InlinedGUIDs);
1099 }
1100 }
1101}
1102
1103template <typename BT>
1105 // If coverage checking was requested, compute it now.
1106 const Function &Func = getFunction(F);
1108 unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
1109 unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
1110 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1111 if (Coverage < SampleProfileRecordCoverage) {
1112 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
1113 Func.getSubprogram()->getFilename(), getFunctionLoc(F),
1114 Twine(Used) + " of " + Twine(Total) + " available profile records (" +
1115 Twine(Coverage) + "%) were applied",
1116 DS_Warning));
1117 }
1118 }
1119
1121 uint64_t Used = CoverageTracker.getTotalUsedSamples();
1122 uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
1123 unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
1124 if (Coverage < SampleProfileSampleCoverage) {
1125 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
1126 Func.getSubprogram()->getFilename(), getFunctionLoc(F),
1127 Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
1128 Twine(Coverage) + "%) were applied",
1129 DS_Warning));
1130 }
1131 }
1132}
1133
1134/// Get the line number for the function header.
1135///
1136/// This looks up function \p F in the current compilation unit and
1137/// retrieves the line number where the function is defined. This is
1138/// line 0 for all the samples read from the profile file. Every line
1139/// number is relative to this line.
1140///
1141/// \param F Function object to query.
1142///
1143/// \returns the line number where \p F is defined. If it returns 0,
1144/// it means that there is no debug information available for \p F.
1145template <typename BT>
1147 const Function &Func = getFunction(F);
1148 if (DISubprogram *S = Func.getSubprogram())
1149 return S->getLine();
1150
1152 return 0;
1153
1154 // If the start of \p F is missing, emit a diagnostic to inform the user
1155 // about the missed opportunity.
1156 Func.getContext().diagnose(DiagnosticInfoSampleProfile(
1157 "No debug information found in function " + Func.getName() +
1158 ": Function profile not used",
1159 DS_Warning));
1160 return 0;
1161}
1162
1163#undef DEBUG_TYPE
1164
1165} // namespace llvm
1166#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the RefCountedBase, ThreadSafeRefCountedBase, and IntrusiveRefCntPtr classes.
Implements a lazy call graph analysis and related passes for the new pass manager.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file provides the interface for the profile inference algorithm, profi.
This file provides the utility functions for the sampled PGO loader base implementation.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Debug location.
unsigned getBaseDiscriminator() const
Returns the base discriminator stored in the discriminator.
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
unsigned getLine() const
Definition: DebugLoc.cpp:24
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:226
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for the sample profiler.
Represents either an error or a value T.
Definition: ErrorOr.h:56
reference get()
Definition: ErrorOr.h:149
Class to represent profile counts.
Definition: Function.h:296
void setEntryCount(ProfileCount Count, const DenseSet< GlobalValue::GUID > *Imports=nullptr)
Set the entry count for this function.
Definition: Function.cpp:2056
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:379
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:595
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
A node in the call graph.
A RefSCC of the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A tuple of MDNodes.
Definition: Metadata.h:1730
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis providing profile information.
uint64_t getFunctionHash() const
Definition: PseudoProbe.h:114
const PseudoProbeDescriptor * getDesc(StringRef FProfileName) const
bool profileIsHashMismatched(const PseudoProbeDescriptor &FuncDesc, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(const Function &F) const
bool moduleIsProbed(const Module &M) const
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const
const PseudoProbeDescriptor * getDesc(uint64_t GUID) const
Sample profile inference pass.
bool computeAndPropagateWeights(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
Generate branch weight metadata for all branches in F.
void computeDominanceAndLoopInfo(FunctionT &F)
typename afdo_detail::IRTraits< BT >::BasicBlockT BasicBlockT
IntrusiveRefCntPtr< vfs::FileSystem > FS
VirtualFileSystem to load profile files from.
typename afdo_detail::IRTraits< BT >::OptRemarkAnalysisT OptRemarkAnalysisT
typename afdo_detail::IRTraits< BT >::SuccRangeT SuccRangeT
EdgeWeightMap EdgeWeights
Map edges to their computed weights.
SmallSet< Edge, 32 > VisitedEdges
Set of visited edges during propagation.
std::map< SampleContext, FunctionSamples > OutlineFunctionSamples
Synthetic samples created by duplicating the samples of inlined functions from the original profile a...
OptRemarkEmitterT * ORE
Optimization Remark Emitter used to emit diagnostic remarks.
const BasicBlockT * getEntryBB(const FunctionT *F)
ErrorOr< uint64_t > getBlockWeight(const BasicBlockT *BB)
Compute the weight of a basic block.
unsigned getFunctionLoc(FunctionT &Func)
Get the line number for the function header.
ErrorOr< uint64_t > getInstWeightImpl(const InstructionT &Inst)
virtual ErrorOr< uint64_t > getInstWeight(const InstructionT &Inst)
Get the weight for an instruction.
SmallPtrSet< const BasicBlockT *, 32 > VisitedBlocks
Set of visited blocks during propagation.
EquivalenceClassMap EquivalenceClass
Equivalence classes for block weights.
typename afdo_detail::IRTraits< BT >::DominatorTreePtrT DominatorTreePtrT
SampleCoverageTracker CoverageTracker
Profile coverage tracker.
typename afdo_detail::IRTraits< BT >::LoopT LoopT
typename GraphTraits< FT * >::NodeRef NodeRef
std::unique_ptr< SampleProfileReader > Reader
Profile reader object.
typename afdo_detail::IRTraits< BT >::OptRemarkEmitterT OptRemarkEmitterT
void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const
Print the weight of block BB on stream OS.
DominatorTreePtrT DT
Dominance, post-dominance and loop information.
void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB)
Print the equivalence class of block BB on stream OS.
std::remove_pointer_t< NodeRef > BT
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName, IntrusiveRefCntPtr< vfs::FileSystem > FS)
std::unique_ptr< PseudoProbeManager > ProbeManager
typename afdo_detail::IRTraits< BT >::LoopInfoPtrT LoopInfoPtrT
std::string Filename
Name of the profile file to load.
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount)
Propagate weights through incoming/outgoing edges.
typename afdo_detail::IRTraits< BT >::InstructionT InstructionT
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge)
Visit the given edge to decide if it has a valid weight.
typename afdo_detail::IRTraits< BT >::PostDominatorTreePtrT PostDominatorTreePtrT
void initWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
BlockEdgeMap Predecessors
Predecessors for each basic block in the CFG.
void finalizeWeightPropagation(FunctionT &F, const DenseSet< GlobalValue::GUID > &InlinedGUIDs)
bool computeBlockWeights(FunctionT &F)
Compute and store the weights of every basic block.
virtual const FunctionSamples * findFunctionSamples(const InstructionT &I) const
Get the FunctionSamples for an instruction.
typename afdo_detail::IRTraits< BT >::PostDominatorTreeT PostDominatorTreeT
virtual ErrorOr< uint64_t > getProbeWeight(const InstructionT &Inst)
std::string RemappingFilename
Name of the profile remapping file to load.
typename afdo_detail::IRTraits< BT >::PredRangeT PredRangeT
void applyProfi(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights)
typename afdo_detail::IRTraits< BT >::BlockFrequencyInfoT BlockFrequencyInfoT
BlockEdgeMap Successors
Successors for each basic block in the CFG.
FunctionSamples * Samples
Samples collected for the body of this function.
void findEquivalenceClasses(FunctionT &F)
Find equivalence classes.
std::pair< const BasicBlockT *, const BasicBlockT * > Edge
ProfileSummaryInfo * PSI
Profile Summary Info computed from sample profile.
void clearFunctionData(bool ResetDT=true)
Clear all the per-function data used to load samples and propagate weights.
DenseMap< const DILocation *, const FunctionSamples * > DILocation2SampleMap
void buildEdges(FunctionT &F)
Build in/out edge lists for each basic block in the CFG.
void findEquivalencesFor(BasicBlockT *BB1, ArrayRef< BasicBlockT * > Descendants, PostDominatorTreeT *DomTree)
Find equivalence classes for the given block.
void printEdgeWeight(raw_ostream &OS, Edge E)
Print the weight of edge E on stream OS.
typename afdo_detail::IRTraits< BT >::FunctionT FunctionT
BlockWeightMap BlockWeights
Map basic blocks to their computed weights.
void propagateWeights(FunctionT &F)
Propagate weights into edges.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
size_type size() const
Definition: DenseSet.h:81
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
Representation of the samples collected for a function.
Definition: SampleProf.h:745
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
Definition: SampleProf.h:1090
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:216
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Function::ProfileCount ProfileCount
auto successors(const MachineBasicBlock *BB)
cl::opt< unsigned > SampleProfileSampleCoverage
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
cl::opt< unsigned > SampleProfileRecordCoverage
cl::opt< unsigned > SampleProfileMaxPropagateIterations
cl::opt< bool > SampleProfileUseProfi
cl::opt< bool > EnableFSDiscriminator
std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
Definition: PseudoProbe.cpp:56
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< succ_iterator > succ_range
Definition: CFG.h:244
cl::opt< bool > NoWarnSampleUnused
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
iterator_range< pred_iterator > pred_range
Definition: CFG.h:107
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
@ DS_Warning
static bool skipProfileForFunction(const Function &F)
auto predecessors(const MachineBasicBlock *BB)
constexpr const char * PseudoProbeDescMetadataName
Definition: PseudoProbe.h:25
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
Description of the encoding of one expression Op.
typename GraphType::UnknownGraphTypeError NodeRef
Definition: GraphTraits.h:95
std::unique_ptr< PostDominatorTree > PostDominatorTreePtrT
static pred_range getPredecessors(BasicBlock *BB)
static succ_range getSuccessors(BasicBlock *BB)
std::unique_ptr< DominatorTree > DominatorTreePtrT
static const BasicBlock * getEntryBB(const Function *F)