LCOV - code coverage report
Current view: top level - include/llvm/ProfileData - SampleProfReader.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 16 17 94.1 %
Date: 2017-09-14 15:23:50 Functions: 8 11 72.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===//
       2             : //
       3             : //                      The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains definitions needed for reading sample profiles.
      11             : //
      12             : // NOTE: If you are making changes to this file format, please remember
      13             : //       to document them in the Clang documentation at
      14             : //       tools/clang/docs/UsersManual.rst.
      15             : //
      16             : // Text format
      17             : // -----------
      18             : //
      19             : // Sample profiles are written as ASCII text. The file is divided into
      20             : // sections, which correspond to each of the functions executed at runtime.
      21             : // Each section has the following format
      22             : //
      23             : //     function1:total_samples:total_head_samples
      24             : //      offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
      25             : //      offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
      26             : //      ...
      27             : //      offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
      28             : //      offsetA[.discriminator]: fnA:num_of_total_samples
      29             : //       offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
      30             : //       ...
      31             : //
      32             : // This is a nested tree in which the identation represents the nesting level
      33             : // of the inline stack. There are no blank lines in the file. And the spacing
      34             : // within a single line is fixed. Additional spaces will result in an error
      35             : // while reading the file.
      36             : //
      37             : // Any line starting with the '#' character is completely ignored.
      38             : //
      39             : // Inlined calls are represented with indentation. The Inline stack is a
      40             : // stack of source locations in which the top of the stack represents the
      41             : // leaf function, and the bottom of the stack represents the actual
      42             : // symbol to which the instruction belongs.
      43             : //
      44             : // Function names must be mangled in order for the profile loader to
      45             : // match them in the current translation unit. The two numbers in the
      46             : // function header specify how many total samples were accumulated in the
      47             : // function (first number), and the total number of samples accumulated
      48             : // in the prologue of the function (second number). This head sample
      49             : // count provides an indicator of how frequently the function is invoked.
      50             : //
      51             : // There are two types of lines in the function body.
      52             : //
      53             : // * Sampled line represents the profile information of a source location.
      54             : // * Callsite line represents the profile information of a callsite.
      55             : //
      56             : // Each sampled line may contain several items. Some are optional (marked
      57             : // below):
      58             : //
      59             : // a. Source line offset. This number represents the line number
      60             : //    in the function where the sample was collected. The line number is
      61             : //    always relative to the line where symbol of the function is
      62             : //    defined. So, if the function has its header at line 280, the offset
      63             : //    13 is at line 293 in the file.
      64             : //
      65             : //    Note that this offset should never be a negative number. This could
      66             : //    happen in cases like macros. The debug machinery will register the
      67             : //    line number at the point of macro expansion. So, if the macro was
      68             : //    expanded in a line before the start of the function, the profile
      69             : //    converter should emit a 0 as the offset (this means that the optimizers
      70             : //    will not be able to associate a meaningful weight to the instructions
      71             : //    in the macro).
      72             : //
      73             : // b. [OPTIONAL] Discriminator. This is used if the sampled program
      74             : //    was compiled with DWARF discriminator support
      75             : //    (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
      76             : //    DWARF discriminators are unsigned integer values that allow the
      77             : //    compiler to distinguish between multiple execution paths on the
      78             : //    same source line location.
      79             : //
      80             : //    For example, consider the line of code ``if (cond) foo(); else bar();``.
      81             : //    If the predicate ``cond`` is true 80% of the time, then the edge
      82             : //    into function ``foo`` should be considered to be taken most of the
      83             : //    time. But both calls to ``foo`` and ``bar`` are at the same source
      84             : //    line, so a sample count at that line is not sufficient. The
      85             : //    compiler needs to know which part of that line is taken more
      86             : //    frequently.
      87             : //
      88             : //    This is what discriminators provide. In this case, the calls to
      89             : //    ``foo`` and ``bar`` will be at the same line, but will have
      90             : //    different discriminator values. This allows the compiler to correctly
      91             : //    set edge weights into ``foo`` and ``bar``.
      92             : //
      93             : // c. Number of samples. This is an integer quantity representing the
      94             : //    number of samples collected by the profiler at this source
      95             : //    location.
      96             : //
      97             : // d. [OPTIONAL] Potential call targets and samples. If present, this
      98             : //    line contains a call instruction. This models both direct and
      99             : //    number of samples. For example,
     100             : //
     101             : //      130: 7  foo:3  bar:2  baz:7
     102             : //
     103             : //    The above means that at relative line offset 130 there is a call
     104             : //    instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
     105             : //    with ``baz()`` being the relatively more frequently called target.
     106             : //
     107             : // Each callsite line may contain several items. Some are optional.
     108             : //
     109             : // a. Source line offset. This number represents the line number of the
     110             : //    callsite that is inlined in the profiled binary.
     111             : //
     112             : // b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
     113             : //
     114             : // c. Number of samples. This is an integer quantity representing the
     115             : //    total number of samples collected for the inlined instance at this
     116             : //    callsite
     117             : //
     118             : //
     119             : // Binary format
     120             : // -------------
     121             : //
     122             : // This is a more compact encoding. Numbers are encoded as ULEB128 values
     123             : // and all strings are encoded in a name table. The file is organized in
     124             : // the following sections:
     125             : //
     126             : // MAGIC (uint64_t)
     127             : //    File identifier computed by function SPMagic() (0x5350524f463432ff)
     128             : //
     129             : // VERSION (uint32_t)
     130             : //    File format version number computed by SPVersion()
     131             : //
     132             : // SUMMARY
     133             : //    TOTAL_COUNT (uint64_t)
     134             : //        Total number of samples in the profile.
     135             : //    MAX_COUNT (uint64_t)
     136             : //        Maximum value of samples on a line.
     137             : //    MAX_FUNCTION_COUNT (uint64_t)
     138             : //        Maximum number of samples at function entry (head samples).
     139             : //    NUM_COUNTS (uint64_t)
     140             : //        Number of lines with samples.
     141             : //    NUM_FUNCTIONS (uint64_t)
     142             : //        Number of functions with samples.
     143             : //    NUM_DETAILED_SUMMARY_ENTRIES (size_t)
     144             : //        Number of entries in detailed summary
     145             : //    DETAILED_SUMMARY
     146             : //        A list of detailed summary entry. Each entry consists of
     147             : //        CUTOFF (uint32_t)
     148             : //            Required percentile of total sample count expressed as a fraction
     149             : //            multiplied by 1000000.
     150             : //        MIN_COUNT (uint64_t)
     151             : //            The minimum number of samples required to reach the target
     152             : //            CUTOFF.
     153             : //        NUM_COUNTS (uint64_t)
     154             : //            Number of samples to get to the desrired percentile.
     155             : //
     156             : // NAME TABLE
     157             : //    SIZE (uint32_t)
     158             : //        Number of entries in the name table.
     159             : //    NAMES
     160             : //        A NUL-separated list of SIZE strings.
     161             : //
     162             : // FUNCTION BODY (one for each uninlined function body present in the profile)
     163             : //    HEAD_SAMPLES (uint64_t) [only for top-level functions]
     164             : //        Total number of samples collected at the head (prologue) of the
     165             : //        function.
     166             : //        NOTE: This field should only be present for top-level functions
     167             : //              (i.e., not inlined into any caller). Inlined function calls
     168             : //              have no prologue, so they don't need this.
     169             : //    NAME_IDX (uint32_t)
     170             : //        Index into the name table indicating the function name.
     171             : //    SAMPLES (uint64_t)
     172             : //        Total number of samples collected in this function.
     173             : //    NRECS (uint32_t)
     174             : //        Total number of sampling records this function's profile.
     175             : //    BODY RECORDS
     176             : //        A list of NRECS entries. Each entry contains:
     177             : //          OFFSET (uint32_t)
     178             : //            Line offset from the start of the function.
     179             : //          DISCRIMINATOR (uint32_t)
     180             : //            Discriminator value (see description of discriminators
     181             : //            in the text format documentation above).
     182             : //          SAMPLES (uint64_t)
     183             : //            Number of samples collected at this location.
     184             : //          NUM_CALLS (uint32_t)
     185             : //            Number of non-inlined function calls made at this location. In the
     186             : //            case of direct calls, this number will always be 1. For indirect
     187             : //            calls (virtual functions and function pointers) this will
     188             : //            represent all the actual functions called at runtime.
     189             : //          CALL_TARGETS
     190             : //            A list of NUM_CALLS entries for each called function:
     191             : //               NAME_IDX (uint32_t)
     192             : //                  Index into the name table with the callee name.
     193             : //               SAMPLES (uint64_t)
     194             : //                  Number of samples collected at the call site.
     195             : //    NUM_INLINED_FUNCTIONS (uint32_t)
     196             : //      Number of callees inlined into this function.
     197             : //    INLINED FUNCTION RECORDS
     198             : //      A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
     199             : //      callees.
     200             : //        OFFSET (uint32_t)
     201             : //          Line offset from the start of the function.
     202             : //        DISCRIMINATOR (uint32_t)
     203             : //          Discriminator value (see description of discriminators
     204             : //          in the text format documentation above).
     205             : //        FUNCTION BODY
     206             : //          A FUNCTION BODY entry describing the inlined function.
     207             : //===----------------------------------------------------------------------===//
     208             : 
     209             : #ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
     210             : #define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
     211             : 
     212             : #include "llvm/ADT/SmallVector.h"
     213             : #include "llvm/ADT/StringMap.h"
     214             : #include "llvm/ADT/StringRef.h"
     215             : #include "llvm/ADT/Twine.h"
     216             : #include "llvm/IR/DiagnosticInfo.h"
     217             : #include "llvm/IR/Function.h"
     218             : #include "llvm/IR/LLVMContext.h"
     219             : #include "llvm/IR/ProfileSummary.h"
     220             : #include "llvm/ProfileData/SampleProf.h"
     221             : #include "llvm/Support/Debug.h"
     222             : #include "llvm/Support/ErrorOr.h"
     223             : #include "llvm/Support/GCOV.h"
     224             : #include "llvm/Support/MemoryBuffer.h"
     225             : #include <algorithm>
     226             : #include <cstdint>
     227             : #include <memory>
     228             : #include <string>
     229             : #include <system_error>
     230             : #include <vector>
     231             : 
     232             : namespace llvm {
     233             : 
     234             : class raw_ostream;
     235             : 
     236             : namespace sampleprof {
     237             : 
     238             : /// \brief Sample-based profile reader.
     239             : ///
     240             : /// Each profile contains sample counts for all the functions
     241             : /// executed. Inside each function, statements are annotated with the
     242             : /// collected samples on all the instructions associated with that
     243             : /// statement.
     244             : ///
     245             : /// For this to produce meaningful data, the program needs to be
     246             : /// compiled with some debug information (at minimum, line numbers:
     247             : /// -gline-tables-only). Otherwise, it will be impossible to match IR
     248             : /// instructions to the line numbers collected by the profiler.
     249             : ///
     250             : /// From the profile file, we are interested in collecting the
     251             : /// following information:
     252             : ///
     253             : /// * A list of functions included in the profile (mangled names).
     254             : ///
     255             : /// * For each function F:
     256             : ///   1. The total number of samples collected in F.
     257             : ///
     258             : ///   2. The samples collected at each line in F. To provide some
     259             : ///      protection against source code shuffling, line numbers should
     260             : ///      be relative to the start of the function.
     261             : ///
     262             : /// The reader supports two file formats: text and binary. The text format
     263             : /// is useful for debugging and testing, while the binary format is more
     264             : /// compact and I/O efficient. They can both be used interchangeably.
     265             : class SampleProfileReader {
     266             : public:
     267             :   SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
     268         428 :       : Profiles(0), Ctx(C), Buffer(std::move(B)) {}
     269             : 
     270          99 :   virtual ~SampleProfileReader() = default;
     271             : 
     272             :   /// \brief Read and validate the file header.
     273             :   virtual std::error_code readHeader() = 0;
     274             : 
     275             :   /// \brief Read sample profiles from the associated file.
     276             :   virtual std::error_code read() = 0;
     277             : 
     278             :   /// \brief Print the profile for \p FName on stream \p OS.
     279             :   void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
     280             : 
     281             :   /// \brief Print all the profiles on stream \p OS.
     282             :   void dump(raw_ostream &OS = dbgs());
     283             : 
     284             :   /// \brief Return the samples collected for function \p F.
     285         119 :   FunctionSamples *getSamplesFor(const Function &F) {
     286             :     // The function name may have been updated by adding suffix. In sample
     287             :     // profile, the function names are all stripped, so we need to strip
     288             :     // the function name suffix before matching with profile.
     289         238 :     if (Profiles.count(F.getName().split('.').first))
     290         162 :       return &Profiles[(F.getName().split('.').first)];
     291             :     return nullptr;
     292             :   }
     293             : 
     294             :   /// \brief Return all the profiles.
     295           2 :   StringMap<FunctionSamples> &getProfiles() { return Profiles; }
     296             : 
     297             :   /// \brief Report a parse error message.
     298           8 :   void reportError(int64_t LineNumber, Twine Msg) const {
     299          24 :     Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
     300             :                                              LineNumber, Msg));
     301           0 :   }
     302             : 
     303             :   /// \brief Create a sample profile reader appropriate to the file format.
     304             :   static ErrorOr<std::unique_ptr<SampleProfileReader>>
     305             :   create(const Twine &Filename, LLVMContext &C);
     306             : 
     307             :   /// \brief Create a sample profile reader from the supplied memory buffer.
     308             :   static ErrorOr<std::unique_ptr<SampleProfileReader>>
     309             :   create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C);
     310             : 
     311             :   /// \brief Return the profile summary.
     312         124 :   ProfileSummary &getSummary() { return *(Summary.get()); }
     313             : 
     314             : protected:
     315             :   /// \brief Map every function to its associated profile.
     316             :   ///
     317             :   /// The profile of every function executed at runtime is collected
     318             :   /// in the structure FunctionSamples. This maps function objects
     319             :   /// to their corresponding profiles.
     320             :   StringMap<FunctionSamples> Profiles;
     321             : 
     322             :   /// \brief LLVM context used to emit diagnostics.
     323             :   LLVMContext &Ctx;
     324             : 
     325             :   /// \brief Memory buffer holding the profile file.
     326             :   std::unique_ptr<MemoryBuffer> Buffer;
     327             : 
     328             :   /// \brief Profile summary information.
     329             :   std::unique_ptr<ProfileSummary> Summary;
     330             : 
     331             :   /// \brief Compute summary for this profile.
     332             :   void computeSummary();
     333             : };
     334             : 
     335          79 : class SampleProfileReaderText : public SampleProfileReader {
     336             : public:
     337             :   SampleProfileReaderText(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
     338         348 :       : SampleProfileReader(std::move(B), C) {}
     339             : 
     340             :   /// \brief Read and validate the file header.
     341         174 :   std::error_code readHeader() override { return sampleprof_error::success; }
     342             : 
     343             :   /// \brief Read sample profiles from the associated file.
     344             :   std::error_code read() override;
     345             : 
     346             :   /// \brief Return true if \p Buffer is in the format supported by this class.
     347             :   static bool hasFormat(const MemoryBuffer &Buffer);
     348             : };
     349             : 
     350          36 : class SampleProfileReaderBinary : public SampleProfileReader {
     351             : public:
     352             :   SampleProfileReaderBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
     353          60 :       : SampleProfileReader(std::move(B), C) {}
     354             : 
     355             :   /// \brief Read and validate the file header.
     356             :   std::error_code readHeader() override;
     357             : 
     358             :   /// \brief Read sample profiles from the associated file.
     359             :   std::error_code read() override;
     360             : 
     361             :   /// \brief Return true if \p Buffer is in the format supported by this class.
     362             :   static bool hasFormat(const MemoryBuffer &Buffer);
     363             : 
     364             : protected:
     365             :   /// \brief Read a numeric value of type T from the profile.
     366             :   ///
     367             :   /// If an error occurs during decoding, a diagnostic message is emitted and
     368             :   /// EC is set.
     369             :   ///
     370             :   /// \returns the read value.
     371             :   template <typename T> ErrorOr<T> readNumber();
     372             : 
     373             :   /// \brief Read a string from the profile.
     374             :   ///
     375             :   /// If an error occurs during decoding, a diagnostic message is emitted and
     376             :   /// EC is set.
     377             :   ///
     378             :   /// \returns the read value.
     379             :   ErrorOr<StringRef> readString();
     380             : 
     381             :   /// Read a string indirectly via the name table.
     382             :   ErrorOr<StringRef> readStringFromTable();
     383             : 
     384             :   /// \brief Return true if we've reached the end of file.
     385             :   bool at_eof() const { return Data >= End; }
     386             : 
     387             :   /// Read the contents of the given profile instance.
     388             :   std::error_code readProfile(FunctionSamples &FProfile);
     389             : 
     390             :   /// \brief Points to the current location in the buffer.
     391             :   const uint8_t *Data = nullptr;
     392             : 
     393             :   /// \brief Points to the end of the buffer.
     394             :   const uint8_t *End = nullptr;
     395             : 
     396             :   /// Function name table.
     397             :   std::vector<StringRef> NameTable;
     398             : 
     399             : private:
     400             :   std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
     401             : 
     402             :   /// \brief Read profile summary.
     403             :   std::error_code readSummary();
     404             : };
     405             : 
     406             : using InlineCallStack = SmallVector<FunctionSamples *, 10>;
     407             : 
     408             : // Supported histogram types in GCC.  Currently, we only need support for
     409             : // call target histograms.
     410             : enum HistType {
     411             :   HIST_TYPE_INTERVAL,
     412             :   HIST_TYPE_POW2,
     413             :   HIST_TYPE_SINGLE_VALUE,
     414             :   HIST_TYPE_CONST_DELTA,
     415             :   HIST_TYPE_INDIR_CALL,
     416             :   HIST_TYPE_AVERAGE,
     417             :   HIST_TYPE_IOR,
     418             :   HIST_TYPE_INDIR_CALL_TOPN
     419             : };
     420             : 
     421           8 : class SampleProfileReaderGCC : public SampleProfileReader {
     422             : public:
     423             :   SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
     424          56 :       : SampleProfileReader(std::move(B), C), GcovBuffer(Buffer.get()) {}
     425             : 
     426             :   /// \brief Read and validate the file header.
     427             :   std::error_code readHeader() override;
     428             : 
     429             :   /// \brief Read sample profiles from the associated file.
     430             :   std::error_code read() override;
     431             : 
     432             :   /// \brief Return true if \p Buffer is in the format supported by this class.
     433             :   static bool hasFormat(const MemoryBuffer &Buffer);
     434             : 
     435             : protected:
     436             :   std::error_code readNameTable();
     437             :   std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
     438             :                                          bool Update, uint32_t Offset);
     439             :   std::error_code readFunctionProfiles();
     440             :   std::error_code skipNextWord();
     441             :   template <typename T> ErrorOr<T> readNumber();
     442             :   ErrorOr<StringRef> readString();
     443             : 
     444             :   /// \brief Read the section tag and check that it's the same as \p Expected.
     445             :   std::error_code readSectionTag(uint32_t Expected);
     446             : 
     447             :   /// GCOV buffer containing the profile.
     448             :   GCOVBuffer GcovBuffer;
     449             : 
     450             :   /// Function names in this profile.
     451             :   std::vector<std::string> Names;
     452             : 
     453             :   /// GCOV tags used to separate sections in the profile file.
     454             :   static const uint32_t GCOVTagAFDOFileNames = 0xaa000000;
     455             :   static const uint32_t GCOVTagAFDOFunction = 0xac000000;
     456             : };
     457             : 
     458             : } // end namespace sampleprof
     459             : 
     460             : } // end namespace llvm
     461             : 
     462             : #endif // LLVM_PROFILEDATA_SAMPLEPROFREADER_H

Generated by: LCOV version 1.13