LCOV - code coverage report
Current view: top level - include/llvm/ProfileData - SampleProfReader.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 22 23 95.7 %
Date: 2018-07-13 00:08:38 Functions: 9 13 69.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===//
       2             : //
       3             : //                      The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains definitions needed for reading sample profiles.
      11             : //
      12             : // NOTE: If you are making changes to this file format, please remember
      13             : //       to document them in the Clang documentation at
      14             : //       tools/clang/docs/UsersManual.rst.
      15             : //
      16             : // Text format
      17             : // -----------
      18             : //
      19             : // Sample profiles are written as ASCII text. The file is divided into
      20             : // sections, which correspond to each of the functions executed at runtime.
      21             : // Each section has the following format
      22             : //
      23             : //     function1:total_samples:total_head_samples
      24             : //      offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
      25             : //      offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
      26             : //      ...
      27             : //      offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
      28             : //      offsetA[.discriminator]: fnA:num_of_total_samples
      29             : //       offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
      30             : //       ...
      31             : //
      32             : // This is a nested tree in which the identation represents the nesting level
      33             : // of the inline stack. There are no blank lines in the file. And the spacing
      34             : // within a single line is fixed. Additional spaces will result in an error
      35             : // while reading the file.
      36             : //
      37             : // Any line starting with the '#' character is completely ignored.
      38             : //
      39             : // Inlined calls are represented with indentation. The Inline stack is a
      40             : // stack of source locations in which the top of the stack represents the
      41             : // leaf function, and the bottom of the stack represents the actual
      42             : // symbol to which the instruction belongs.
      43             : //
      44             : // Function names must be mangled in order for the profile loader to
      45             : // match them in the current translation unit. The two numbers in the
      46             : // function header specify how many total samples were accumulated in the
      47             : // function (first number), and the total number of samples accumulated
      48             : // in the prologue of the function (second number). This head sample
      49             : // count provides an indicator of how frequently the function is invoked.
      50             : //
      51             : // There are two types of lines in the function body.
      52             : //
      53             : // * Sampled line represents the profile information of a source location.
      54             : // * Callsite line represents the profile information of a callsite.
      55             : //
      56             : // Each sampled line may contain several items. Some are optional (marked
      57             : // below):
      58             : //
      59             : // a. Source line offset. This number represents the line number
      60             : //    in the function where the sample was collected. The line number is
      61             : //    always relative to the line where symbol of the function is
      62             : //    defined. So, if the function has its header at line 280, the offset
      63             : //    13 is at line 293 in the file.
      64             : //
      65             : //    Note that this offset should never be a negative number. This could
      66             : //    happen in cases like macros. The debug machinery will register the
      67             : //    line number at the point of macro expansion. So, if the macro was
      68             : //    expanded in a line before the start of the function, the profile
      69             : //    converter should emit a 0 as the offset (this means that the optimizers
      70             : //    will not be able to associate a meaningful weight to the instructions
      71             : //    in the macro).
      72             : //
      73             : // b. [OPTIONAL] Discriminator. This is used if the sampled program
      74             : //    was compiled with DWARF discriminator support
      75             : //    (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
      76             : //    DWARF discriminators are unsigned integer values that allow the
      77             : //    compiler to distinguish between multiple execution paths on the
      78             : //    same source line location.
      79             : //
      80             : //    For example, consider the line of code ``if (cond) foo(); else bar();``.
      81             : //    If the predicate ``cond`` is true 80% of the time, then the edge
      82             : //    into function ``foo`` should be considered to be taken most of the
      83             : //    time. But both calls to ``foo`` and ``bar`` are at the same source
      84             : //    line, so a sample count at that line is not sufficient. The
      85             : //    compiler needs to know which part of that line is taken more
      86             : //    frequently.
      87             : //
      88             : //    This is what discriminators provide. In this case, the calls to
      89             : //    ``foo`` and ``bar`` will be at the same line, but will have
      90             : //    different discriminator values. This allows the compiler to correctly
      91             : //    set edge weights into ``foo`` and ``bar``.
      92             : //
      93             : // c. Number of samples. This is an integer quantity representing the
      94             : //    number of samples collected by the profiler at this source
      95             : //    location.
      96             : //
      97             : // d. [OPTIONAL] Potential call targets and samples. If present, this
      98             : //    line contains a call instruction. This models both direct and
      99             : //    number of samples. For example,
     100             : //
     101             : //      130: 7  foo:3  bar:2  baz:7
     102             : //
     103             : //    The above means that at relative line offset 130 there is a call
     104             : //    instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
     105             : //    with ``baz()`` being the relatively more frequently called target.
     106             : //
     107             : // Each callsite line may contain several items. Some are optional.
     108             : //
     109             : // a. Source line offset. This number represents the line number of the
     110             : //    callsite that is inlined in the profiled binary.
     111             : //
     112             : // b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
     113             : //
     114             : // c. Number of samples. This is an integer quantity representing the
     115             : //    total number of samples collected for the inlined instance at this
     116             : //    callsite
     117             : //
     118             : //
     119             : // Binary format
     120             : // -------------
     121             : //
     122             : // This is a more compact encoding. Numbers are encoded as ULEB128 values
     123             : // and all strings are encoded in a name table. The file is organized in
     124             : // the following sections:
     125             : //
     126             : // MAGIC (uint64_t)
     127             : //    File identifier computed by function SPMagic() (0x5350524f463432ff)
     128             : //
     129             : // VERSION (uint32_t)
     130             : //    File format version number computed by SPVersion()
     131             : //
     132             : // SUMMARY
     133             : //    TOTAL_COUNT (uint64_t)
     134             : //        Total number of samples in the profile.
     135             : //    MAX_COUNT (uint64_t)
     136             : //        Maximum value of samples on a line.
     137             : //    MAX_FUNCTION_COUNT (uint64_t)
     138             : //        Maximum number of samples at function entry (head samples).
     139             : //    NUM_COUNTS (uint64_t)
     140             : //        Number of lines with samples.
     141             : //    NUM_FUNCTIONS (uint64_t)
     142             : //        Number of functions with samples.
     143             : //    NUM_DETAILED_SUMMARY_ENTRIES (size_t)
     144             : //        Number of entries in detailed summary
     145             : //    DETAILED_SUMMARY
     146             : //        A list of detailed summary entry. Each entry consists of
     147             : //        CUTOFF (uint32_t)
     148             : //            Required percentile of total sample count expressed as a fraction
     149             : //            multiplied by 1000000.
     150             : //        MIN_COUNT (uint64_t)
     151             : //            The minimum number of samples required to reach the target
     152             : //            CUTOFF.
     153             : //        NUM_COUNTS (uint64_t)
     154             : //            Number of samples to get to the desrired percentile.
     155             : //
     156             : // NAME TABLE
     157             : //    SIZE (uint32_t)
     158             : //        Number of entries in the name table.
     159             : //    NAMES
     160             : //        A NUL-separated list of SIZE strings.
     161             : //
     162             : // FUNCTION BODY (one for each uninlined function body present in the profile)
     163             : //    HEAD_SAMPLES (uint64_t) [only for top-level functions]
     164             : //        Total number of samples collected at the head (prologue) of the
     165             : //        function.
     166             : //        NOTE: This field should only be present for top-level functions
     167             : //              (i.e., not inlined into any caller). Inlined function calls
     168             : //              have no prologue, so they don't need this.
     169             : //    NAME_IDX (uint32_t)
     170             : //        Index into the name table indicating the function name.
     171             : //    SAMPLES (uint64_t)
     172             : //        Total number of samples collected in this function.
     173             : //    NRECS (uint32_t)
     174             : //        Total number of sampling records this function's profile.
     175             : //    BODY RECORDS
     176             : //        A list of NRECS entries. Each entry contains:
     177             : //          OFFSET (uint32_t)
     178             : //            Line offset from the start of the function.
     179             : //          DISCRIMINATOR (uint32_t)
     180             : //            Discriminator value (see description of discriminators
     181             : //            in the text format documentation above).
     182             : //          SAMPLES (uint64_t)
     183             : //            Number of samples collected at this location.
     184             : //          NUM_CALLS (uint32_t)
     185             : //            Number of non-inlined function calls made at this location. In the
     186             : //            case of direct calls, this number will always be 1. For indirect
     187             : //            calls (virtual functions and function pointers) this will
     188             : //            represent all the actual functions called at runtime.
     189             : //          CALL_TARGETS
     190             : //            A list of NUM_CALLS entries for each called function:
     191             : //               NAME_IDX (uint32_t)
     192             : //                  Index into the name table with the callee name.
     193             : //               SAMPLES (uint64_t)
     194             : //                  Number of samples collected at the call site.
     195             : //    NUM_INLINED_FUNCTIONS (uint32_t)
     196             : //      Number of callees inlined into this function.
     197             : //    INLINED FUNCTION RECORDS
     198             : //      A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
     199             : //      callees.
     200             : //        OFFSET (uint32_t)
     201             : //          Line offset from the start of the function.
     202             : //        DISCRIMINATOR (uint32_t)
     203             : //          Discriminator value (see description of discriminators
     204             : //          in the text format documentation above).
     205             : //        FUNCTION BODY
     206             : //          A FUNCTION BODY entry describing the inlined function.
     207             : //===----------------------------------------------------------------------===//
     208             : 
     209             : #ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
     210             : #define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
     211             : 
     212             : #include "llvm/ADT/SmallVector.h"
     213             : #include "llvm/ADT/StringMap.h"
     214             : #include "llvm/ADT/StringRef.h"
     215             : #include "llvm/ADT/Twine.h"
     216             : #include "llvm/IR/DiagnosticInfo.h"
     217             : #include "llvm/IR/Function.h"
     218             : #include "llvm/IR/LLVMContext.h"
     219             : #include "llvm/IR/ProfileSummary.h"
     220             : #include "llvm/ProfileData/GCOV.h"
     221             : #include "llvm/ProfileData/SampleProf.h"
     222             : #include "llvm/Support/Debug.h"
     223             : #include "llvm/Support/ErrorOr.h"
     224             : #include "llvm/Support/MemoryBuffer.h"
     225             : #include <algorithm>
     226             : #include <cstdint>
     227             : #include <memory>
     228             : #include <string>
     229             : #include <system_error>
     230             : #include <vector>
     231             : 
     232             : namespace llvm {
     233             : 
     234             : class raw_ostream;
     235             : 
     236             : namespace sampleprof {
     237             : 
     238             : /// Sample-based profile reader.
     239             : ///
     240             : /// Each profile contains sample counts for all the functions
     241             : /// executed. Inside each function, statements are annotated with the
     242             : /// collected samples on all the instructions associated with that
     243             : /// statement.
     244             : ///
     245             : /// For this to produce meaningful data, the program needs to be
     246             : /// compiled with some debug information (at minimum, line numbers:
     247             : /// -gline-tables-only). Otherwise, it will be impossible to match IR
     248             : /// instructions to the line numbers collected by the profiler.
     249             : ///
     250             : /// From the profile file, we are interested in collecting the
     251             : /// following information:
     252             : ///
     253             : /// * A list of functions included in the profile (mangled names).
     254             : ///
     255             : /// * For each function F:
     256             : ///   1. The total number of samples collected in F.
     257             : ///
     258             : ///   2. The samples collected at each line in F. To provide some
     259             : ///      protection against source code shuffling, line numbers should
     260             : ///      be relative to the start of the function.
     261             : ///
     262             : /// The reader supports two file formats: text and binary. The text format
     263             : /// is useful for debugging and testing, while the binary format is more
     264             : /// compact and I/O efficient. They can both be used interchangeably.
     265             : class SampleProfileReader {
     266             : public:
     267             :   SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
     268             :                       SampleProfileFormat Format = SPF_None)
     269         354 :       : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}
     270             : 
     271         220 :   virtual ~SampleProfileReader() = default;
     272             : 
     273             :   /// Read and validate the file header.
     274             :   virtual std::error_code readHeader() = 0;
     275             : 
     276             :   /// Read sample profiles from the associated file.
     277             :   virtual std::error_code read() = 0;
     278             : 
     279             :   /// Print the profile for \p FName on stream \p OS.
     280             :   void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
     281             : 
     282             :   /// Print all the profiles on stream \p OS.
     283             :   void dump(raw_ostream &OS = dbgs());
     284             : 
     285             :   /// Return the samples collected for function \p F.
     286         146 :   FunctionSamples *getSamplesFor(const Function &F) {
     287             :     // The function name may have been updated by adding suffix. In sample
     288             :     // profile, the function names are all stripped, so we need to strip
     289             :     // the function name suffix before matching with profile.
     290         292 :     StringRef Fname = F.getName().split('.').first;
     291             :     std::string FGUID;
     292         146 :     Fname = getRepInFormat(Fname, getFormat(), FGUID);
     293         146 :     if (Profiles.count(Fname))
     294          92 :       return &Profiles[Fname];
     295             :     return nullptr;
     296             :   }
     297             : 
     298             :   /// Return all the profiles.
     299           3 :   StringMap<FunctionSamples> &getProfiles() { return Profiles; }
     300             : 
     301             :   /// Report a parse error message.
     302           8 :   void reportError(int64_t LineNumber, Twine Msg) const {
     303          24 :     Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
     304             :                                              LineNumber, Msg));
     305           0 :   }
     306             : 
     307             :   /// Create a sample profile reader appropriate to the file format.
     308             :   static ErrorOr<std::unique_ptr<SampleProfileReader>>
     309             :   create(const Twine &Filename, LLVMContext &C);
     310             : 
     311             :   /// Create a sample profile reader from the supplied memory buffer.
     312             :   static ErrorOr<std::unique_ptr<SampleProfileReader>>
     313             :   create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C);
     314             : 
     315             :   /// Return the profile summary.
     316             :   ProfileSummary &getSummary() { return *(Summary.get()); }
     317             : 
     318             :   /// \brief Return the profile format.
     319             :   SampleProfileFormat getFormat() { return Format; }
     320             : 
     321             : protected:
     322             :   /// Map every function to its associated profile.
     323             :   ///
     324             :   /// The profile of every function executed at runtime is collected
     325             :   /// in the structure FunctionSamples. This maps function objects
     326             :   /// to their corresponding profiles.
     327             :   StringMap<FunctionSamples> Profiles;
     328             : 
     329             :   /// LLVM context used to emit diagnostics.
     330             :   LLVMContext &Ctx;
     331             : 
     332             :   /// Memory buffer holding the profile file.
     333             :   std::unique_ptr<MemoryBuffer> Buffer;
     334             : 
     335             :   /// Profile summary information.
     336             :   std::unique_ptr<ProfileSummary> Summary;
     337             : 
     338             :   /// Compute summary for this profile.
     339             :   void computeSummary();
     340             : 
     341             :   /// \brief The format of sample.
     342             :   SampleProfileFormat Format = SPF_None;
     343             : };
     344             : 
     345          87 : class SampleProfileReaderText : public SampleProfileReader {
     346             : public:
     347             :   SampleProfileReaderText(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
     348          95 :       : SampleProfileReader(std::move(B), C, SPF_Text) {}
     349             : 
     350             :   /// Read and validate the file header.
     351         190 :   std::error_code readHeader() override { return sampleprof_error::success; }
     352             : 
     353             :   /// Read sample profiles from the associated file.
     354             :   std::error_code read() override;
     355             : 
     356             :   /// Return true if \p Buffer is in the format supported by this class.
     357             :   static bool hasFormat(const MemoryBuffer &Buffer);
     358             : };
     359             : 
     360          15 : class SampleProfileReaderBinary : public SampleProfileReader {
     361             : public:
     362             :   SampleProfileReaderBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
     363             :                             SampleProfileFormat Format = SPF_None)
     364          30 :       : SampleProfileReader(std::move(B), C, Format) {}
     365             : 
     366             :   /// Read and validate the file header.
     367             :   std::error_code readHeader() override;
     368             : 
     369             :   /// Read sample profiles from the associated file.
     370             :   std::error_code read() override;
     371             : 
     372             : protected:
     373             :   /// Read a numeric value of type T from the profile.
     374             :   ///
     375             :   /// If an error occurs during decoding, a diagnostic message is emitted and
     376             :   /// EC is set.
     377             :   ///
     378             :   /// \returns the read value.
     379             :   template <typename T> ErrorOr<T> readNumber();
     380             : 
     381             :   /// Read a string from the profile.
     382             :   ///
     383             :   /// If an error occurs during decoding, a diagnostic message is emitted and
     384             :   /// EC is set.
     385             :   ///
     386             :   /// \returns the read value.
     387             :   ErrorOr<StringRef> readString();
     388             : 
     389             :   /// Read the string index and check whether it overflows the table.
     390             :   template <typename T> inline ErrorOr<uint32_t> readStringIndex(T &Table);
     391             : 
     392             :   /// Return true if we've reached the end of file.
     393             :   bool at_eof() const { return Data >= End; }
     394             : 
     395             :   /// Read the contents of the given profile instance.
     396             :   std::error_code readProfile(FunctionSamples &FProfile);
     397             : 
     398             :   /// Points to the current location in the buffer.
     399             :   const uint8_t *Data = nullptr;
     400             : 
     401             :   /// Points to the end of the buffer.
     402             :   const uint8_t *End = nullptr;
     403             : 
     404             : private:
     405             :   std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
     406             :   virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
     407             : 
     408             :   /// Read profile summary.
     409             :   std::error_code readSummary();
     410             : 
     411             :   /// Read the whole name table.
     412             :   virtual std::error_code readNameTable() = 0;
     413             : 
     414             :   /// Read a string indirectly via the name table.
     415             :   virtual ErrorOr<StringRef> readStringFromTable() = 0;
     416             : };
     417             : 
     418          36 : class SampleProfileReaderRawBinary : public SampleProfileReaderBinary {
     419             : private:
     420             :   /// Function name table.
     421             :   std::vector<StringRef> NameTable;
     422             :   virtual std::error_code verifySPMagic(uint64_t Magic) override;
     423             :   virtual std::error_code readNameTable() override;
     424             :   /// Read a string indirectly via the name table.
     425             :   virtual ErrorOr<StringRef> readStringFromTable() override;
     426             : 
     427             : public:
     428             :   SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
     429          24 :       : SampleProfileReaderBinary(std::move(B), C, SPF_Binary) {}
     430             : 
     431             :   /// \brief Return true if \p Buffer is in the format supported by this class.
     432             :   static bool hasFormat(const MemoryBuffer &Buffer);
     433             : };
     434             : 
     435           6 : class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
     436             : private:
     437             :   /// Function name table.
     438             :   std::vector<std::string> NameTable;
     439             :   virtual std::error_code verifySPMagic(uint64_t Magic) override;
     440             :   virtual std::error_code readNameTable() override;
     441             :   /// Read a string indirectly via the name table.
     442             :   virtual ErrorOr<StringRef> readStringFromTable() override;
     443             : 
     444             : public:
     445             :   SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B,
     446             :                                    LLVMContext &C)
     447           6 :       : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) {}
     448             : 
     449             :   /// \brief Return true if \p Buffer is in the format supported by this class.
     450             :   static bool hasFormat(const MemoryBuffer &Buffer);
     451             : };
     452             : 
     453             : using InlineCallStack = SmallVector<FunctionSamples *, 10>;
     454             : 
     455             : // Supported histogram types in GCC.  Currently, we only need support for
     456             : // call target histograms.
     457             : enum HistType {
     458             :   HIST_TYPE_INTERVAL,
     459             :   HIST_TYPE_POW2,
     460             :   HIST_TYPE_SINGLE_VALUE,
     461             :   HIST_TYPE_CONST_DELTA,
     462             :   HIST_TYPE_INDIR_CALL,
     463             :   HIST_TYPE_AVERAGE,
     464             :   HIST_TYPE_IOR,
     465             :   HIST_TYPE_INDIR_CALL_TOPN
     466             : };
     467             : 
     468           8 : class SampleProfileReaderGCC : public SampleProfileReader {
     469             : public:
     470             :   SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
     471           8 :       : SampleProfileReader(std::move(B), C, SPF_GCC),
     472           8 :         GcovBuffer(Buffer.get()) {}
     473             : 
     474             :   /// Read and validate the file header.
     475             :   std::error_code readHeader() override;
     476             : 
     477             :   /// Read sample profiles from the associated file.
     478             :   std::error_code read() override;
     479             : 
     480             :   /// Return true if \p Buffer is in the format supported by this class.
     481             :   static bool hasFormat(const MemoryBuffer &Buffer);
     482             : 
     483             : protected:
     484             :   std::error_code readNameTable();
     485             :   std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
     486             :                                          bool Update, uint32_t Offset);
     487             :   std::error_code readFunctionProfiles();
     488             :   std::error_code skipNextWord();
     489             :   template <typename T> ErrorOr<T> readNumber();
     490             :   ErrorOr<StringRef> readString();
     491             : 
     492             :   /// Read the section tag and check that it's the same as \p Expected.
     493             :   std::error_code readSectionTag(uint32_t Expected);
     494             : 
     495             :   /// GCOV buffer containing the profile.
     496             :   GCOVBuffer GcovBuffer;
     497             : 
     498             :   /// Function names in this profile.
     499             :   std::vector<std::string> Names;
     500             : 
     501             :   /// GCOV tags used to separate sections in the profile file.
     502             :   static const uint32_t GCOVTagAFDOFileNames = 0xaa000000;
     503             :   static const uint32_t GCOVTagAFDOFunction = 0xac000000;
     504             : };
     505             : 
     506             : } // end namespace sampleprof
     507             : 
     508             : } // end namespace llvm
     509             : 
     510             : #endif // LLVM_PROFILEDATA_SAMPLEPROFREADER_H

Generated by: LCOV version 1.13