Line data Source code
1 : //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file contains support for reading profiling data for instrumentation
11 : // based PGO and coverage.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16 : #define LLVM_PROFILEDATA_INSTRPROFREADER_H
17 :
18 : #include "llvm/ADT/ArrayRef.h"
19 : #include "llvm/ADT/StringRef.h"
20 : #include "llvm/IR/ProfileSummary.h"
21 : #include "llvm/ProfileData/InstrProf.h"
22 : #include "llvm/Support/Endian.h"
23 : #include "llvm/Support/Error.h"
24 : #include "llvm/Support/LineIterator.h"
25 : #include "llvm/Support/MemoryBuffer.h"
26 : #include "llvm/Support/OnDiskHashTable.h"
27 : #include "llvm/Support/SwapByteOrder.h"
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cstddef>
31 : #include <cstdint>
32 : #include <iterator>
33 : #include <memory>
34 : #include <utility>
35 : #include <vector>
36 :
37 : namespace llvm {
38 :
39 : class InstrProfReader;
40 :
41 : /// A file format agnostic iterator over profiling data.
42 434 : class InstrProfIterator : public std::iterator<std::input_iterator_tag,
43 : NamedInstrProfRecord> {
44 : InstrProfReader *Reader = nullptr;
45 : value_type Record;
46 :
47 : void Increment();
48 :
49 : public:
50 : InstrProfIterator() = default;
51 434 : InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
52 :
53 559 : InstrProfIterator &operator++() { Increment(); return *this; }
54 5 : bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
55 2 : bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
56 : value_type &operator*() { return Record; }
57 : value_type *operator->() { return &Record; }
58 : };
59 :
60 : /// Base class and interface for reading profiling data of any known instrprof
61 : /// format. Provides an iterator over NamedInstrProfRecords.
62 : class InstrProfReader {
63 : instrprof_error LastError = instrprof_error::success;
64 :
65 : public:
66 563 : InstrProfReader() = default;
67 0 : virtual ~InstrProfReader() = default;
68 :
69 : /// Read the header. Required before reading first record.
70 : virtual Error readHeader() = 0;
71 :
72 : /// Read a single record.
73 : virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
74 :
75 : /// Iterator over profile data.
76 : InstrProfIterator begin() { return InstrProfIterator(this); }
77 218 : InstrProfIterator end() { return InstrProfIterator(); }
78 :
79 : virtual bool isIRLevelProfile() const = 0;
80 :
81 : /// Return the PGO symtab. There are three different readers:
82 : /// Raw, Text, and Indexed profile readers. The first two types
83 : /// of readers are used only by llvm-profdata tool, while the indexed
84 : /// profile reader is also used by llvm-cov tool and the compiler (
85 : /// backend or frontend). Since creating PGO symtab can create
86 : /// significant runtime and memory overhead (as it touches data
87 : /// for the whole program), InstrProfSymtab for the indexed profile
88 : /// reader should be created on demand and it is recommended to be
89 : /// only used for dumping purpose with llvm-proftool, not with the
90 : /// compiler.
91 : virtual InstrProfSymtab &getSymtab() = 0;
92 :
93 : protected:
94 : std::unique_ptr<InstrProfSymtab> Symtab;
95 :
96 : /// Set the current error and return same.
97 : Error error(instrprof_error Err) {
98 2099 : LastError = Err;
99 154 : if (Err == instrprof_error::success)
100 : return Error::success();
101 : return make_error<InstrProfError>(Err);
102 : }
103 :
104 308 : Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
105 :
106 : /// Clear the current error and return a successful one.
107 0 : Error success() { return error(instrprof_error::success); }
108 :
109 : public:
110 : /// Return true if the reader has finished reading the profile data.
111 0 : bool isEOF() { return LastError == instrprof_error::eof; }
112 :
113 : /// Return true if the reader encountered an error reading profiling data.
114 222 : bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
115 :
116 : /// Get the current error.
117 9 : Error getError() {
118 9 : if (hasError())
119 : return make_error<InstrProfError>(LastError);
120 : return Error::success();
121 : }
122 :
123 : /// Factory method to create an appropriately typed reader for the given
124 : /// instrprof file.
125 : static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
126 :
127 : static Expected<std::unique_ptr<InstrProfReader>>
128 : create(std::unique_ptr<MemoryBuffer> Buffer);
129 : };
130 :
131 : /// Reader for the simple text based instrprof format.
132 : ///
133 : /// This format is a simple text format that's suitable for test data. Records
134 : /// are separated by one or more blank lines, and record fields are separated by
135 : /// new lines.
136 : ///
137 : /// Each record consists of a function name, a function hash, a number of
138 : /// counters, and then each counter value, in that order.
139 : class TextInstrProfReader : public InstrProfReader {
140 : private:
141 : /// The profile data file contents.
142 : std::unique_ptr<MemoryBuffer> DataBuffer;
143 : /// Iterator over the profile data.
144 : line_iterator Line;
145 : bool IsIRLevelProfile = false;
146 :
147 : Error readValueProfileData(InstrProfRecord &Record);
148 :
149 : public:
150 : TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
151 432 : : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
152 : TextInstrProfReader(const TextInstrProfReader &) = delete;
153 : TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
154 :
155 : /// Return true if the given buffer is in text instrprof format.
156 : static bool hasFormat(const MemoryBuffer &Buffer);
157 :
158 147 : bool isIRLevelProfile() const override { return IsIRLevelProfile; }
159 :
160 : /// Read the header.
161 : Error readHeader() override;
162 :
163 : /// Read a single record.
164 : Error readNextRecord(NamedInstrProfRecord &Record) override;
165 :
166 14 : InstrProfSymtab &getSymtab() override {
167 : assert(Symtab.get());
168 14 : return *Symtab.get();
169 : }
170 : };
171 :
172 : /// Reader for the raw instrprof binary format from runtime.
173 : ///
174 : /// This format is a raw memory dump of the instrumentation-baed profiling data
175 : /// from the runtime. It has no index.
176 : ///
177 : /// Templated on the unsigned type whose size matches pointers on the platform
178 : /// that wrote the profile.
179 : template <class IntPtrT>
180 : class RawInstrProfReader : public InstrProfReader {
181 : private:
182 : /// The profile data file contents.
183 : std::unique_ptr<MemoryBuffer> DataBuffer;
184 : bool ShouldSwapBytes;
185 : // The value of the version field of the raw profile data header. The lower 56
186 : // bits specifies the format version and the most significant 8 bits specify
187 : // the variant types of the profile.
188 : uint64_t Version;
189 : uint64_t CountersDelta;
190 : uint64_t NamesDelta;
191 : const RawInstrProf::ProfileData<IntPtrT> *Data;
192 : const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
193 : const uint64_t *CountersStart;
194 : const char *NamesStart;
195 : uint64_t NamesSize;
196 : // After value profile is all read, this pointer points to
197 : // the header of next profile data (if exists)
198 : const uint8_t *ValueDataStart;
199 : uint32_t ValueKindLast;
200 : uint32_t CurValueDataSize;
201 :
202 : public:
203 10 : RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
204 10 : : DataBuffer(std::move(DataBuffer)) {}
205 0 : RawInstrProfReader(const RawInstrProfReader &) = delete;
206 0 : RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
207 0 :
208 0 : static bool hasFormat(const MemoryBuffer &DataBuffer);
209 : Error readHeader() override;
210 : Error readNextRecord(NamedInstrProfRecord &Record) override;
211 :
212 : bool isIRLevelProfile() const override {
213 : return (Version & VARIANT_MASK_IR_PROF) != 0;
214 : }
215 :
216 16 : InstrProfSymtab &getSymtab() override {
217 16 : assert(Symtab.get());
218 : return *Symtab.get();
219 12 : }
220 12 :
221 : private:
222 4 : Error createSymtab(InstrProfSymtab &Symtab);
223 4 : Error readNextHeader(const char *CurrentPos);
224 : Error readHeader(const RawInstrProf::Header &Header);
225 :
226 0 : template <class IntT> IntT swap(IntT Int) const {
227 : return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
228 0 : }
229 :
230 0 : support::endianness getDataEndianness() const {
231 : support::endianness HostEndian = getHostEndianness();
232 0 : if (!ShouldSwapBytes)
233 : return HostEndian;
234 0 : if (HostEndian == support::little)
235 : return support::big;
236 0 : else
237 : return support::little;
238 : }
239 :
240 : inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
241 : return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
242 : }
243 :
244 0 : Error readName(NamedInstrProfRecord &Record);
245 219 : Error readFuncHash(NamedInstrProfRecord &Record);
246 : Error readRawCounts(InstrProfRecord &Record);
247 0 : Error readValueProfilingData(InstrProfRecord &Record);
248 0 : bool atEnd() const { return Data == DataEnd; }
249 :
250 0 : void advanceData() {
251 0 : Data++;
252 : ValueDataStart += CurValueDataSize;
253 0 : }
254 0 :
255 : const char *getNextHeaderPos() const {
256 0 : assert(atEnd());
257 0 : return (const char *)ValueDataStart;
258 : }
259 :
260 0 : const uint64_t *getCounter(IntPtrT CounterPtr) const {
261 : ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
262 0 : return CountersStart + Offset;
263 : }
264 :
265 0 : StringRef getName(uint64_t NameRef) const {
266 : return Symtab->getFuncName(swap(NameRef));
267 : }
268 : };
269 0 :
270 : using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
271 0 : using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
272 :
273 : namespace IndexedInstrProf {
274 0 :
275 : enum class HashT : uint32_t;
276 :
277 : } // end namespace IndexedInstrProf
278 0 :
279 : /// Trait for lookups into the on-disk hash table for the binary instrprof
280 0 : /// format.
281 : class InstrProfLookupTrait {
282 : std::vector<NamedInstrProfRecord> DataBuffer;
283 0 : IndexedInstrProf::HashT HashType;
284 : unsigned FormatVersion;
285 : // Endianness of the input value profile data.
286 : // It should be LE by default, but can be changed
287 : // for testing purpose.
288 0 : support::endianness ValueProfDataEndianness = support::little;
289 9 :
290 : public:
291 0 : InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
292 0 : : HashType(HashType), FormatVersion(FormatVersion) {}
293 :
294 0 : using data_type = ArrayRef<NamedInstrProfRecord>;
295 0 :
296 : using internal_key_type = StringRef;
297 : using external_key_type = StringRef;
298 : using hash_value_type = uint64_t;
299 : using offset_type = uint64_t;
300 :
301 : static bool EqualKey(StringRef A, StringRef B) { return A == B; }
302 54 : static StringRef GetInternalKey(StringRef K) { return K; }
303 : static StringRef GetExternalKey(StringRef K) { return K; }
304 0 :
305 46 : hash_value_type ComputeHash(StringRef K);
306 46 :
307 0 : static std::pair<offset_type, offset_type>
308 0 : ReadKeyDataLength(const unsigned char *&D) {
309 0 : using namespace support;
310 0 :
311 0 : offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
312 0 : offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
313 0 : return std::make_pair(KeyLen, DataLen);
314 0 : }
315 0 :
316 : StringRef ReadKey(const unsigned char *D, offset_type N) {
317 0 : return StringRef((const char *)D, N);
318 : }
319 9 :
320 : bool readValueProfilingData(const unsigned char *&D,
321 0 : const unsigned char *const End);
322 : data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
323 0 :
324 : // Used for testing purpose only.
325 0 : void setValueProfDataEndianness(support::endianness Endianness) {
326 : ValueProfDataEndianness = Endianness;
327 0 : }
328 : };
329 :
330 0 : struct InstrProfReaderIndexBase {
331 46 : virtual ~InstrProfReaderIndexBase() = default;
332 46 :
333 : // Read all the profile records with the same key pointed to the current
334 0 : // iterator.
335 0 : virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
336 0 :
337 : // Read all the profile records with the key equal to FuncName
338 0 : virtual Error getRecords(StringRef FuncName,
339 0 : ArrayRef<NamedInstrProfRecord> &Data) = 0;
340 0 : virtual void advanceToNextKey() = 0;
341 : virtual bool atEnd() const = 0;
342 : virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
343 0 : virtual uint64_t getVersion() const = 0;
344 50 : virtual bool isIRLevelProfile() const = 0;
345 : virtual Error populateSymtab(InstrProfSymtab &) = 0;
346 0 : };
347 0 :
348 : using OnDiskHashTableImplV3 =
349 0 : OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
350 0 :
351 : template <typename HashTableImpl>
352 : class InstrProfReaderItaniumRemapper;
353 :
354 : template <typename HashTableImpl>
355 : class InstrProfReaderIndex : public InstrProfReaderIndexBase {
356 : private:
357 : std::unique_ptr<HashTableImpl> HashTable;
358 : typename HashTableImpl::data_iterator RecordIterator;
359 : uint64_t FormatVersion;
360 :
361 : friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
362 :
363 : public:
364 : InstrProfReaderIndex(const unsigned char *Buckets,
365 1225 : const unsigned char *const Payload,
366 : const unsigned char *const Base,
367 : IndexedInstrProf::HashT HashType, uint64_t Version);
368 : ~InstrProfReaderIndex() override = default;
369 :
370 : Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
371 : Error getRecords(StringRef FuncName,
372 : ArrayRef<NamedInstrProfRecord> &Data) override;
373 : void advanceToNextKey() override { RecordIterator++; }
374 :
375 : bool atEnd() const override {
376 818 : return RecordIterator == HashTable->data_end();
377 : }
378 :
379 : void setValueProfDataEndianness(support::endianness Endianness) override {
380 : HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
381 : }
382 :
383 : uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
384 :
385 : bool isIRLevelProfile() const override {
386 : return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
387 : }
388 :
389 : Error populateSymtab(InstrProfSymtab &Symtab) override {
390 : return Symtab.create(HashTable->keys());
391 : }
392 : };
393 :
394 : /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
395 : class InstrProfReaderRemapper {
396 : public:
397 : virtual ~InstrProfReaderRemapper() {}
398 : virtual Error populateRemappings() { return Error::success(); }
399 : virtual Error getRecords(StringRef FuncName,
400 0 : ArrayRef<NamedInstrProfRecord> &Data) = 0;
401 0 : };
402 :
403 : /// Reader for the indexed binary instrprof format.
404 : class IndexedInstrProfReader : public InstrProfReader {
405 : private:
406 : /// The profile data file contents.
407 : std::unique_ptr<MemoryBuffer> DataBuffer;
408 : /// The profile remapping file contents.
409 0 : std::unique_ptr<MemoryBuffer> RemappingBuffer;
410 2 : /// The index into the profile data.
411 0 : std::unique_ptr<InstrProfReaderIndexBase> Index;
412 : /// The profile remapping file contents.
413 : std::unique_ptr<InstrProfReaderRemapper> Remapper;
414 : /// Profile summary data.
415 0 : std::unique_ptr<ProfileSummary> Summary;
416 : // Index to the current record in the record array.
417 : unsigned RecordIndex;
418 :
419 : // Read the profile summary. Return a pointer pointing to one byte past the
420 : // end of the summary data if it exists or the input \c Cur.
421 : const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
422 : const unsigned char *Cur);
423 :
424 : public:
425 : IndexedInstrProfReader(
426 : std::unique_ptr<MemoryBuffer> DataBuffer,
427 : std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
428 : : DataBuffer(std::move(DataBuffer)),
429 : RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
430 : IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
431 : IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
432 :
433 : /// Return the profile version.
434 304 : uint64_t getVersion() const { return Index->getVersion(); }
435 380 : bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
436 :
437 : /// Return true if the given buffer is in an indexed instrprof format.
438 : static bool hasFormat(const MemoryBuffer &DataBuffer);
439 :
440 : /// Read the file header.
441 : Error readHeader() override;
442 : /// Read a single record.
443 : Error readNextRecord(NamedInstrProfRecord &Record) override;
444 :
445 : /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
446 : Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
447 : uint64_t FuncHash);
448 :
449 : /// Fill Counts with the profile data for the given function name.
450 : Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
451 : std::vector<uint64_t> &Counts);
452 407 :
453 : /// Return the maximum of all known function counts.
454 59 : uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
455 :
456 : /// Factory method to create an indexed reader.
457 202 : static Expected<std::unique_ptr<IndexedInstrProfReader>>
458 : create(const Twine &Path, const Twine &RemappingPath = "");
459 284 :
460 568 : static Expected<std::unique_ptr<IndexedInstrProfReader>>
461 : create(std::unique_ptr<MemoryBuffer> Buffer,
462 : std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
463 2 :
464 : // Used for testing purpose only.
465 2 : void setValueProfDataEndianness(support::endianness Endianness) {
466 2 : Index->setValueProfDataEndianness(Endianness);
467 304 : }
468 :
469 190 : // See description in the base class. This interface is designed
470 190 : // to be used by llvm-profdata (for dumping). Avoid using this when
471 : // the client is the compiler.
472 : InstrProfSymtab &getSymtab() override;
473 4 : ProfileSummary &getSummary() { return *(Summary.get()); }
474 4 : };
475 :
476 : } // end namespace llvm
477 :
478 : #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
|