Line data Source code
1 : //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file contains support for reading profiling data for clang's
11 : // instrumentation based PGO and coverage.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "llvm/ProfileData/InstrProfReader.h"
16 : #include "llvm/ADT/ArrayRef.h"
17 : #include "llvm/ADT/DenseMap.h"
18 : #include "llvm/ADT/STLExtras.h"
19 : #include "llvm/ADT/StringRef.h"
20 : #include "llvm/IR/ProfileSummary.h"
21 : #include "llvm/ProfileData/InstrProf.h"
22 : #include "llvm/ProfileData/ProfileCommon.h"
23 : #include "llvm/Support/Endian.h"
24 : #include "llvm/Support/Error.h"
25 : #include "llvm/Support/ErrorOr.h"
26 : #include "llvm/Support/MemoryBuffer.h"
27 : #include "llvm/Support/SymbolRemappingReader.h"
28 : #include "llvm/Support/SwapByteOrder.h"
29 : #include <algorithm>
30 : #include <cctype>
31 : #include <cstddef>
32 : #include <cstdint>
33 : #include <limits>
34 : #include <memory>
35 : #include <system_error>
36 : #include <utility>
37 : #include <vector>
38 :
39 : using namespace llvm;
40 :
41 : static Expected<std::unique_ptr<MemoryBuffer>>
42 442 : setupMemoryBuffer(const Twine &Path) {
43 : ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
44 442 : MemoryBuffer::getFileOrSTDIN(Path);
45 442 : if (std::error_code EC = BufferOrErr.getError())
46 4 : return errorCodeToError(EC);
47 : return std::move(BufferOrErr.get());
48 : }
49 :
50 : static Error initializeReader(InstrProfReader &Reader) {
51 215 : return Reader.readHeader();
52 : }
53 :
54 : Expected<std::unique_ptr<InstrProfReader>>
55 219 : InstrProfReader::create(const Twine &Path) {
56 : // Set up the buffer to read.
57 438 : auto BufferOrError = setupMemoryBuffer(Path);
58 219 : if (Error E = BufferOrError.takeError())
59 : return std::move(E);
60 438 : return InstrProfReader::create(std::move(BufferOrError.get()));
61 : }
62 :
63 : Expected<std::unique_ptr<InstrProfReader>>
64 219 : InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
65 : // Sanity check the buffer.
66 438 : if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
67 : return make_error<InstrProfError>(instrprof_error::too_large);
68 :
69 219 : if (Buffer->getBufferSize() == 0)
70 : return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
71 :
72 : std::unique_ptr<InstrProfReader> Result;
73 : // Create the reader.
74 216 : if (IndexedInstrProfReader::hasFormat(*Buffer))
75 61 : Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
76 155 : else if (RawInstrProfReader64::hasFormat(*Buffer))
77 8 : Result.reset(new RawInstrProfReader64(std::move(Buffer)));
78 147 : else if (RawInstrProfReader32::hasFormat(*Buffer))
79 2 : Result.reset(new RawInstrProfReader32(std::move(Buffer)));
80 145 : else if (TextInstrProfReader::hasFormat(*Buffer))
81 144 : Result.reset(new TextInstrProfReader(std::move(Buffer)));
82 : else
83 : return make_error<InstrProfError>(instrprof_error::unrecognized_format);
84 :
85 : // Initialize the reader and return the result.
86 215 : if (Error E = initializeReader(*Result))
87 : return std::move(E);
88 :
89 : return std::move(Result);
90 : }
91 :
92 : Expected<std::unique_ptr<IndexedInstrProfReader>>
93 221 : IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
94 : // Set up the buffer to read.
95 442 : auto BufferOrError = setupMemoryBuffer(Path);
96 221 : if (Error E = BufferOrError.takeError())
97 : return std::move(E);
98 :
99 : // Set up the remapping buffer if requested.
100 : std::unique_ptr<MemoryBuffer> RemappingBuffer;
101 217 : std::string RemappingPathStr = RemappingPath.str();
102 217 : if (!RemappingPathStr.empty()) {
103 4 : auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
104 2 : if (Error E = RemappingBufferOrError.takeError())
105 : return std::move(E);
106 : RemappingBuffer = std::move(RemappingBufferOrError.get());
107 : }
108 :
109 : return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
110 434 : std::move(RemappingBuffer));
111 : }
112 :
113 : Expected<std::unique_ptr<IndexedInstrProfReader>>
114 348 : IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
115 : std::unique_ptr<MemoryBuffer> RemappingBuffer) {
116 : // Sanity check the buffer.
117 696 : if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
118 : return make_error<InstrProfError>(instrprof_error::too_large);
119 :
120 : // Create the reader.
121 348 : if (!IndexedInstrProfReader::hasFormat(*Buffer))
122 : return make_error<InstrProfError>(instrprof_error::bad_magic);
123 : auto Result = llvm::make_unique<IndexedInstrProfReader>(
124 348 : std::move(Buffer), std::move(RemappingBuffer));
125 :
126 : // Initialize the reader and return the result.
127 696 : if (Error E = initializeReader(*Result))
128 : return std::move(E);
129 :
130 : return std::move(Result);
131 : }
132 :
133 777 : void InstrProfIterator::Increment() {
134 1554 : if (auto E = Reader->readNextRecord(Record)) {
135 : // Handle errors in the reader.
136 218 : InstrProfError::take(std::move(E));
137 436 : *this = InstrProfIterator();
138 : }
139 777 : }
140 :
141 145 : bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
142 : // Verify that this really looks like plain ASCII text by checking a
143 : // 'reasonable' number of characters (up to profile magic size).
144 290 : size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
145 : StringRef buffer = Buffer.getBufferStart();
146 145 : return count == 0 ||
147 145 : std::all_of(buffer.begin(), buffer.begin() + count,
148 0 : [](char c) { return isPrint(c) || ::isspace(c); });
149 : }
150 :
151 : // Read the profile variant flag from the header: ":FE" means this is a FE
152 : // generated profile. ":IR" means this is an IR level profile. Other strings
153 : // with a leading ':' will be reported an error format.
154 144 : Error TextInstrProfReader::readHeader() {
155 144 : Symtab.reset(new InstrProfSymtab());
156 : bool IsIRInstr = false;
157 : if (!Line->startswith(":")) {
158 106 : IsIRLevelProfile = false;
159 : return success();
160 : }
161 38 : StringRef Str = (Line)->substr(1);
162 38 : if (Str.equals_lower("ir"))
163 : IsIRInstr = true;
164 1 : else if (Str.equals_lower("fe"))
165 : IsIRInstr = false;
166 : else
167 : return error(instrprof_error::bad_header);
168 :
169 38 : ++Line;
170 38 : IsIRLevelProfile = IsIRInstr;
171 : return success();
172 : }
173 :
174 : Error
175 299 : TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
176 :
177 : #define CHECK_LINE_END(Line) \
178 : if (Line.is_at_end()) \
179 : return error(instrprof_error::truncated);
180 : #define READ_NUM(Str, Dst) \
181 : if ((Str).getAsInteger(10, (Dst))) \
182 : return error(instrprof_error::malformed);
183 : #define VP_READ_ADVANCE(Val) \
184 : CHECK_LINE_END(Line); \
185 : uint32_t Val; \
186 : READ_NUM((*Line), (Val)); \
187 : Line++;
188 :
189 299 : if (Line.is_at_end())
190 : return success();
191 :
192 : uint32_t NumValueKinds;
193 158 : if (Line->getAsInteger(10, NumValueKinds)) {
194 : // No value profile data
195 : return success();
196 : }
197 15 : if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
198 : return error(instrprof_error::malformed);
199 15 : Line++;
200 :
201 31 : for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
202 40 : VP_READ_ADVANCE(ValueKind);
203 20 : if (ValueKind > IPVK_Last)
204 : return error(instrprof_error::malformed);
205 40 : VP_READ_ADVANCE(NumValueSites);
206 20 : if (!NumValueSites)
207 : continue;
208 :
209 20 : Record.reserveSites(VK, NumValueSites);
210 57 : for (uint32_t S = 0; S < NumValueSites; S++) {
211 85 : VP_READ_ADVANCE(NumValueData);
212 :
213 : std::vector<InstrProfValueData> CurrentValues;
214 127 : for (uint32_t V = 0; V < NumValueData; V++) {
215 93 : CHECK_LINE_END(Line);
216 90 : std::pair<StringRef, StringRef> VD = Line->rsplit(':');
217 : uint64_t TakenCount, Value;
218 90 : if (ValueKind == IPVK_IndirectCallTarget) {
219 : if (InstrProfSymtab::isExternalSymbol(VD.first)) {
220 : Value = 0;
221 : } else {
222 70 : if (Error E = Symtab->addFuncName(VD.first))
223 : return E;
224 : Value = IndexedInstrProf::ComputeHash(VD.first);
225 : }
226 : } else {
227 54 : READ_NUM(VD.first, Value);
228 : }
229 2 : READ_NUM(VD.second, TakenCount);
230 87 : CurrentValues.push_back({Value, TakenCount});
231 : Line++;
232 : }
233 37 : Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
234 : nullptr);
235 : }
236 : }
237 : return success();
238 :
239 : #undef CHECK_LINE_END
240 : #undef READ_NUM
241 : #undef VP_READ_ADVANCE
242 : }
243 :
244 439 : Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
245 : // Skip empty lines and comments.
246 439 : while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
247 0 : ++Line;
248 : // If we hit EOF while looking for a name, we're done.
249 439 : if (Line.is_at_end()) {
250 : return error(instrprof_error::eof);
251 : }
252 :
253 : // Read the function name.
254 304 : Record.Name = *Line++;
255 608 : if (Error E = Symtab->addFuncName(Record.Name))
256 0 : return error(std::move(E));
257 :
258 : // Read the function hash.
259 304 : if (Line.is_at_end())
260 : return error(instrprof_error::truncated);
261 304 : if ((Line++)->getAsInteger(0, Record.Hash))
262 : return error(instrprof_error::malformed);
263 :
264 : // Read the number of counters.
265 : uint64_t NumCounters;
266 302 : if (Line.is_at_end())
267 : return error(instrprof_error::truncated);
268 0 : if ((Line++)->getAsInteger(10, NumCounters))
269 : return error(instrprof_error::malformed);
270 302 : if (NumCounters == 0)
271 : return error(instrprof_error::malformed);
272 :
273 : // Read each counter and fill our internal storage with the values.
274 301 : Record.Clear();
275 301 : Record.Counts.reserve(NumCounters);
276 1083 : for (uint64_t I = 0; I < NumCounters; ++I) {
277 784 : if (Line.is_at_end())
278 2 : return error(instrprof_error::truncated);
279 : uint64_t Count;
280 2 : if ((Line++)->getAsInteger(10, Count))
281 : return error(instrprof_error::malformed);
282 782 : Record.Counts.push_back(Count);
283 : }
284 :
285 : // Check if value profile data exists and read it if so.
286 598 : if (Error E = readValueProfileData(Record))
287 4 : return error(std::move(E));
288 :
289 : return success();
290 : }
291 :
292 : template <class IntPtrT>
293 312 : bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
294 624 : if (DataBuffer.getBufferSize() < sizeof(uint64_t))
295 : return false;
296 306 : uint64_t Magic =
297 : *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
298 306 : return RawInstrProf::getMagic<IntPtrT>() == Magic ||
299 : sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
300 : }
301 163 :
302 326 : template <class IntPtrT>
303 : Error RawInstrProfReader<IntPtrT>::readHeader() {
304 160 : if (!hasFormat(*DataBuffer))
305 : return error(instrprof_error::bad_magic);
306 160 : if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
307 : return error(instrprof_error::bad_header);
308 : auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
309 149 : DataBuffer->getBufferStart());
310 298 : ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
311 : return readHeader(*Header);
312 146 : }
313 :
314 146 : template <class IntPtrT>
315 : Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
316 : const char *End = DataBuffer->getBufferEnd();
317 : // Skip zero padding between profiles.
318 : while (CurrentPos != End && *CurrentPos == 0)
319 10 : ++CurrentPos;
320 10 : // If there's nothing left, we're done.
321 : if (CurrentPos == End)
322 20 : return make_error<InstrProfError>(instrprof_error::eof);
323 : // If there isn't enough space for another header, this is probably just
324 : // garbage at the end of the file.
325 : if (CurrentPos + sizeof(RawInstrProf::Header) > End)
326 8 : return make_error<InstrProfError>(instrprof_error::malformed);
327 8 : // The writer ensures each profile is padded to start at an aligned address.
328 : if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
329 8 : return make_error<InstrProfError>(instrprof_error::malformed);
330 8 : // The magic should have the same byte order as in the previous header.
331 : uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
332 16 : if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
333 : return make_error<InstrProfError>(instrprof_error::bad_magic);
334 :
335 : // There's another profile to read, so we need to process the header.
336 6 : auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
337 6 : return readHeader(*Header);
338 : }
339 2 :
340 2 : template <class IntPtrT>
341 : Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
342 4 : if (Error E = Symtab.create(StringRef(NamesStart, NamesSize)))
343 : return error(std::move(E));
344 : for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
345 : const IntPtrT FPtr = swap(I->FunctionPointer);
346 2 : if (!FPtr)
347 2 : continue;
348 : Symtab.mapAddress(FPtr, I->NameRef);
349 : }
350 : return success();
351 9 : }
352 9 :
353 : template <class IntPtrT>
354 9 : Error RawInstrProfReader<IntPtrT>::readHeader(
355 0 : const RawInstrProf::Header &Header) {
356 : Version = swap(Header.Version);
357 9 : if (GET_VERSION(Version) != RawInstrProf::Version)
358 : return error(instrprof_error::unsupported_version);
359 :
360 : CountersDelta = swap(Header.CountersDelta);
361 1 : NamesDelta = swap(Header.NamesDelta);
362 : auto DataSize = swap(Header.DataSize);
363 : auto CountersSize = swap(Header.CountersSize);
364 1 : NamesSize = swap(Header.NamesSize);
365 : ValueKindLast = swap(Header.ValueKindLast);
366 :
367 1 : auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
368 1 : auto PaddingSize = getNumPaddingBytes(NamesSize);
369 :
370 : ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
371 : ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes;
372 : ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
373 1 : ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
374 :
375 7 : auto *Start = reinterpret_cast<const char *>(&Header);
376 7 : if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
377 : return error(instrprof_error::bad_header);
378 7 :
379 0 : Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
380 : Start + DataOffset);
381 7 : DataEnd = Data + DataSize;
382 : CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
383 : NamesStart = Start + NamesOffset;
384 : ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
385 1 :
386 : std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
387 : if (Error E = createSymtab(*NewSymtab.get()))
388 1 : return E;
389 :
390 : Symtab = std::move(NewSymtab);
391 1 : return success();
392 1 : }
393 :
394 : template <class IntPtrT>
395 : Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
396 : Record.Name = getName(Data->NameRef);
397 1 : return success();
398 : }
399 2 :
400 2 : template <class IntPtrT>
401 : Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
402 2 : Record.Hash = swap(Data->FuncHash);
403 0 : return success();
404 : }
405 2 :
406 : template <class IntPtrT>
407 : Error RawInstrProfReader<IntPtrT>::readRawCounts(
408 : InstrProfRecord &Record) {
409 0 : uint32_t NumCounters = swap(Data->NumCounters);
410 : IntPtrT CounterPtr = Data->CounterPtr;
411 : if (NumCounters == 0)
412 0 : return error(instrprof_error::malformed);
413 :
414 : auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters);
415 0 : auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
416 0 :
417 : // Check bounds.
418 : if (RawCounts.data() < CountersStart ||
419 : RawCounts.data() + RawCounts.size() > NamesStartAsCounter)
420 : return error(instrprof_error::malformed);
421 0 :
422 : if (ShouldSwapBytes) {
423 : Record.Counts.clear();
424 : Record.Counts.reserve(RawCounts.size());
425 9 : for (uint64_t Count : RawCounts)
426 9 : Record.Counts.push_back(swap(Count));
427 0 : } else
428 55 : Record.Counts = RawCounts;
429 46 :
430 46 : return success();
431 : }
432 36 :
433 : template <class IntPtrT>
434 : Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
435 : InstrProfRecord &Record) {
436 7 : Record.clearValueData();
437 7 : CurValueDataSize = 0;
438 0 : // Need to match the logic in value profile dumper code in compiler-rt:
439 49 : uint32_t NumValueKinds = 0;
440 42 : for (uint32_t I = 0; I < IPVK_Last + 1; I++)
441 42 : NumValueKinds += (Data->NumValueSites[I] != 0);
442 :
443 36 : if (!NumValueKinds)
444 : return success();
445 :
446 : Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
447 2 : ValueProfData::getValueProfData(
448 2 : ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(),
449 0 : getDataEndianness());
450 6 :
451 4 : if (Error E = VDataPtrOrErr.takeError())
452 4 : return E;
453 :
454 0 : // Note that besides deserialization, this also performs the conversion for
455 : // indirect call targets. The function pointers from the raw profile are
456 : // remapped into function name hashes.
457 : VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get());
458 : CurValueDataSize = VDataPtrOrErr.get()->getSize();
459 : return success();
460 9 : }
461 :
462 9 : template <class IntPtrT>
463 9 : Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
464 : if (atEnd())
465 : // At this point, ValueDataStart field points to the next header.
466 9 : if (Error E = readNextHeader(getNextHeaderPos()))
467 9 : return error(std::move(E));
468 9 :
469 9 : // Read name ad set it in Record.
470 9 : if (Error E = readName(Record))
471 9 : return error(std::move(E));
472 :
473 9 : // Read FuncHash and set it in Record.
474 : if (Error E = readFuncHash(Record))
475 : return error(std::move(E));
476 :
477 9 : // Read raw counts and set Record.
478 9 : if (Error E = readRawCounts(Record))
479 9 : return error(std::move(E));
480 :
481 : // Read value data and set Record.
482 18 : if (Error E = readValueProfilingData(Record))
483 : return error(std::move(E));
484 :
485 9 : // Iterate.
486 9 : advanceData();
487 9 : return success();
488 9 : }
489 9 :
490 9 : namespace llvm {
491 :
492 18 : template class RawInstrProfReader<uint32_t>;
493 18 : template class RawInstrProfReader<uint64_t>;
494 :
495 : } // end namespace llvm
496 9 :
497 : InstrProfLookupTrait::hash_value_type
498 : InstrProfLookupTrait::ComputeHash(StringRef K) {
499 7 : return IndexedInstrProf::ComputeHash(HashType, K);
500 : }
501 7 :
502 7 : using data_type = InstrProfLookupTrait::data_type;
503 : using offset_type = InstrProfLookupTrait::offset_type;
504 :
505 7 : bool InstrProfLookupTrait::readValueProfilingData(
506 7 : const unsigned char *&D, const unsigned char *const End) {
507 7 : Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
508 7 : ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
509 7 :
510 7 : if (VDataPtrOrErr.takeError())
511 : return false;
512 7 :
513 : VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
514 : D += VDataPtrOrErr.get()->TotalSize;
515 :
516 7 : return true;
517 7 : }
518 7 :
519 : data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
520 : offset_type N) {
521 14 : using namespace support;
522 :
523 : // Check if the data is corrupt. If so, don't try to read it.
524 7 : if (N % sizeof(uint64_t))
525 7 : return data_type();
526 7 :
527 7 : DataBuffer.clear();
528 7 : std::vector<uint64_t> CounterBuffer;
529 7 :
530 : const unsigned char *End = D + N;
531 14 : while (D < End) {
532 14 : // Read hash.
533 : if (D + sizeof(uint64_t) >= End)
534 : return data_type();
535 7 : uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
536 :
537 : // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
538 2 : uint64_t CountsSize = N / sizeof(uint64_t) - 1;
539 : // If format version is different then read the number of counters.
540 2 : if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
541 2 : if (D + sizeof(uint64_t) > End)
542 : return data_type();
543 : CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
544 2 : }
545 2 : // Read counter values.
546 2 : if (D + CountsSize * sizeof(uint64_t) > End)
547 2 : return data_type();
548 2 :
549 2 : CounterBuffer.clear();
550 : CounterBuffer.reserve(CountsSize);
551 2 : for (uint64_t J = 0; J < CountsSize; ++J)
552 : CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
553 :
554 : DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
555 2 :
556 2 : // Read value profiling data.
557 2 : if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
558 : !readValueProfilingData(D, End)) {
559 : DataBuffer.clear();
560 4 : return data_type();
561 : }
562 : }
563 2 : return DataBuffer;
564 2 : }
565 2 :
566 2 : template <typename HashTableImpl>
567 2 : Error InstrProfReaderIndex<HashTableImpl>::getRecords(
568 2 : StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
569 : auto Iter = HashTable->find(FuncName);
570 4 : if (Iter == HashTable->end())
571 4 : return make_error<InstrProfError>(instrprof_error::unknown_function);
572 :
573 : Data = (*Iter);
574 2 : if (Data.empty())
575 : return make_error<InstrProfError>(instrprof_error::malformed);
576 :
577 : return Error::success();
578 : }
579 46 :
580 46 : template <typename HashTableImpl>
581 46 : Error InstrProfReaderIndex<HashTableImpl>::getRecords(
582 : ArrayRef<NamedInstrProfRecord> &Data) {
583 42 : if (atEnd())
584 42 : return make_error<InstrProfError>(instrprof_error::eof);
585 42 :
586 : Data = *RecordIterator;
587 4 :
588 4 : if (Data.empty())
589 4 : return make_error<InstrProfError>(instrprof_error::malformed);
590 :
591 : return Error::success();
592 : }
593 46 :
594 46 : template <typename HashTableImpl>
595 46 : InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
596 : const unsigned char *Buckets, const unsigned char *const Payload,
597 42 : const unsigned char *const Base, IndexedInstrProf::HashT HashType,
598 42 : uint64_t Version) {
599 42 : FormatVersion = Version;
600 : HashTable.reset(HashTableImpl::Create(
601 4 : Buckets, Payload, Base,
602 4 : typename HashTableImpl::InfoType(HashType, Version)));
603 4 : RecordIterator = HashTable->data_begin();
604 : }
605 :
606 : namespace {
607 46 : /// A remapper that does not apply any remappings.
608 : class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
609 46 : InstrProfReaderIndexBase &Underlying;
610 46 :
611 46 : public:
612 : InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
613 : : Underlying(Underlying) {}
614 46 :
615 46 : Error getRecords(StringRef FuncName,
616 : ArrayRef<NamedInstrProfRecord> &Data) override {
617 : return Underlying.getRecords(FuncName, Data);
618 46 : }
619 46 : };
620 : }
621 :
622 46 : /// A remapper that applies remappings based on a symbol remapping file.
623 4 : template <typename HashTableImpl>
624 4 : class llvm::InstrProfReaderItaniumRemapper
625 10 : : public InstrProfReaderRemapper {
626 12 : public:
627 : InstrProfReaderItaniumRemapper(
628 84 : std::unique_ptr<MemoryBuffer> RemapBuffer,
629 : InstrProfReaderIndex<HashTableImpl> &Underlying)
630 : : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
631 : }
632 42 :
633 : /// Extract the original function name from a PGO function name.
634 42 : static StringRef extractName(StringRef Name) {
635 42 : // We can have multiple :-separated pieces; there can be pieces both
636 42 : // before and after the mangled name. Find the first part that starts
637 : // with '_Z'; we'll assume that's the mangled name we want.
638 : std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
639 42 : while (true) {
640 42 : Parts = Parts.second.split(':');
641 : if (Parts.first.startswith("_Z"))
642 : return Parts.first;
643 42 : if (Parts.second.empty())
644 42 : return Name;
645 : }
646 : }
647 42 :
648 2 : /// Given a mangled name extracted from a PGO function name, and a new
649 2 : /// form for that mangled name, reconstitute the name.
650 5 : static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
651 6 : StringRef Replacement,
652 : SmallVectorImpl<char> &Out) {
653 80 : Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
654 : Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
655 : Out.insert(Out.end(), Replacement.begin(), Replacement.end());
656 : Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
657 4 : }
658 :
659 4 : Error populateRemappings() override {
660 4 : if (Error E = Remappings.read(*RemapBuffer))
661 4 : return E;
662 : for (StringRef Name : Underlying.HashTable->keys()) {
663 : StringRef RealName = extractName(Name);
664 4 : if (auto Key = Remappings.insert(RealName)) {
665 4 : // FIXME: We could theoretically map the same equivalence class to
666 : // multiple names in the profile data. If that happens, we should
667 : // return NamedInstrProfRecords from all of them.
668 4 : MappedNames.insert({Key, RealName});
669 4 : }
670 : }
671 : return Error::success();
672 4 : }
673 2 :
674 2 : Error getRecords(StringRef FuncName,
675 5 : ArrayRef<NamedInstrProfRecord> &Data) override {
676 6 : StringRef RealName = extractName(FuncName);
677 : if (auto Key = Remappings.lookup(RealName)) {
678 4 : StringRef Remapped = MappedNames.lookup(Key);
679 : if (!Remapped.empty()) {
680 : if (RealName.begin() == FuncName.begin() &&
681 : RealName.end() == FuncName.end())
682 : FuncName = Remapped;
683 : else {
684 46 : // Try rebuilding the name from the given remapping.
685 : SmallString<256> Reconstituted;
686 : reconstituteName(FuncName, RealName, Remapped, Reconstituted);
687 46 : Error E = Underlying.getRecords(Reconstituted, Data);
688 : if (!E)
689 : return E;
690 138 :
691 92 : // If we failed because the name doesn't exist, fall back to asking
692 : // about the original name.
693 46 : if (Error Unhandled = handleErrors(
694 : std::move(E), [](std::unique_ptr<InstrProfError> Err) {
695 : return Err->get() == instrprof_error::unknown_function
696 0 : ? Error::success()
697 : : Error(std::move(Err));
698 0 : }))
699 : return Unhandled;
700 : }
701 0 : }
702 : }
703 : return Underlying.getRecords(FuncName, Data);
704 : }
705 :
706 : private:
707 0 : /// The memory buffer containing the remapping configuration. Remappings
708 0 : /// holds pointers into this buffer.
709 : std::unique_ptr<MemoryBuffer> RemapBuffer;
710 :
711 42 : /// The mangling remapper.
712 : SymbolRemappingReader Remappings;
713 :
714 42 : /// Mapping from mangled name keys to the name used for the key in the
715 : /// profile data.
716 : /// FIXME: Can we store a location within the on-disk hash table instead of
717 126 : /// redoing lookup?
718 84 : DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
719 :
720 42 : /// The real profile data reader.
721 : InstrProfReaderIndex<HashTableImpl> &Underlying;
722 : };
723 0 :
724 : bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
725 0 : using namespace support;
726 :
727 : if (DataBuffer.getBufferSize() < 8)
728 0 : return false;
729 : uint64_t Magic =
730 : endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
731 : // Verify that it's magical.
732 : return Magic == IndexedInstrProf::Magic;
733 : }
734 0 :
735 0 : const unsigned char *
736 : IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
737 : const unsigned char *Cur) {
738 4 : using namespace IndexedInstrProf;
739 : using namespace support;
740 :
741 4 : if (Version >= IndexedInstrProf::Version4) {
742 : const IndexedInstrProf::Summary *SummaryInLE =
743 : reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
744 12 : uint64_t NFields =
745 8 : endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
746 : uint64_t NEntries =
747 4 : endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
748 : uint32_t SummarySize =
749 : IndexedInstrProf::Summary::getSize(NFields, NEntries);
750 0 : std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
751 : IndexedInstrProf::allocSummary(SummarySize);
752 0 :
753 : const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
754 : uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
755 0 : for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
756 : Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
757 :
758 : SummaryEntryVector DetailedSummary;
759 : for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
760 : const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
761 0 : DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
762 0 : Ent.NumBlocks);
763 : }
764 : // initialize InstrProfSummary using the SummaryData from disk.
765 : this->Summary = llvm::make_unique<ProfileSummary>(
766 : ProfileSummary::PSK_Instr, DetailedSummary,
767 54 : SummaryData->get(Summary::TotalBlockCount),
768 54 : SummaryData->get(Summary::MaxBlockCount),
769 : SummaryData->get(Summary::MaxInternalBlockCount),
770 18 : SummaryData->get(Summary::MaxFunctionCount),
771 8 : SummaryData->get(Summary::TotalNumBlocks),
772 : SummaryData->get(Summary::TotalNumFunctions));
773 : return Cur + SummarySize;
774 92 : } else {
775 0 : // For older version of profile data, we need to compute on the fly:
776 : using namespace IndexedInstrProf;
777 :
778 92 : InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
779 0 : // FIXME: This only computes an empty summary. Need to call addRecord for
780 : // all NamedInstrProfRecords to get the correct summary.
781 : this->Summary = Builder.getSummary();
782 92 : return Cur;
783 0 : }
784 : }
785 :
786 92 : Error IndexedInstrProfReader::readHeader() {
787 0 : using namespace support;
788 :
789 : const unsigned char *Start =
790 : (const unsigned char *)DataBuffer->getBufferStart();
791 : const unsigned char *Cur = Start;
792 : if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
793 48 : return error(instrprof_error::truncated);
794 48 :
795 : auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
796 14 : Cur += sizeof(IndexedInstrProf::Header);
797 6 :
798 : // Check the magic number.
799 : uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
800 84 : if (Magic != IndexedInstrProf::Magic)
801 0 : return error(instrprof_error::bad_magic);
802 :
803 : // Read the version.
804 84 : uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
805 0 : if (GET_VERSION(FormatVersion) >
806 : IndexedInstrProf::ProfVersion::CurrentVersion)
807 : return error(instrprof_error::unsupported_version);
808 84 :
809 0 : Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur);
810 :
811 : // Read the hash type and start offset.
812 84 : IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
813 0 : endian::byte_swap<uint64_t, little>(Header->HashType));
814 : if (HashType > IndexedInstrProf::HashT::Last)
815 : return error(instrprof_error::unsupported_hash_type);
816 :
817 : uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
818 :
819 6 : // The rest of the file is an on disk hash table.
820 6 : auto IndexPtr =
821 : llvm::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
822 4 : Start + HashOffset, Cur, Start, HashType, FormatVersion);
823 2 :
824 : // Load the remapping table now if requested.
825 : if (RemappingBuffer) {
826 8 : Remapper = llvm::make_unique<
827 0 : InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
828 : std::move(RemappingBuffer), *IndexPtr);
829 : if (Error E = Remapper->populateRemappings())
830 8 : return E;
831 0 : } else {
832 : Remapper = llvm::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
833 : }
834 8 : Index = std::move(IndexPtr);
835 0 :
836 : return success();
837 : }
838 8 :
839 0 : InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
840 : if (Symtab.get())
841 : return *Symtab.get();
842 :
843 : std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
844 : if (Error E = Index->populateSymtab(*NewSymtab.get())) {
845 : consumeError(error(InstrProfError::take(std::move(E))));
846 : }
847 :
848 : Symtab = std::move(NewSymtab);
849 : return *Symtab.get();
850 : }
851 :
852 : Expected<InstrProfRecord>
853 : IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
854 790 : uint64_t FuncHash) {
855 1580 : ArrayRef<NamedInstrProfRecord> Data;
856 : Error Err = Remapper->getRecords(FuncName, Data);
857 : if (Err)
858 : return std::move(Err);
859 : // Found it. Look for counters with the right hash.
860 : for (unsigned I = 0, E = Data.size(); I < E; ++I) {
861 904 : // Check for a match and fill the vector if there is one.
862 : if (Data[I].Hash == FuncHash) {
863 : return std::move(Data[I]);
864 1808 : }
865 : }
866 904 : return error(instrprof_error::hash_mismatch);
867 : }
868 :
869 904 : Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
870 904 : uint64_t FuncHash,
871 : std::vector<uint64_t> &Counts) {
872 904 : Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
873 : if (Error E = Record.takeError())
874 : return error(std::move(E));
875 917 :
876 : Counts = Record.get().Counts;
877 : return success();
878 : }
879 :
880 917 : Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
881 0 : ArrayRef<NamedInstrProfRecord> Data;
882 :
883 917 : Error E = Index->getRecords(Data);
884 : if (E)
885 : return error(std::move(E));
886 917 :
887 1914 : Record = Data[RecordIndex++];
888 : if (RecordIndex >= Data.size()) {
889 997 : Index->advanceToNextKey();
890 0 : RecordIndex = 0;
891 997 : }
892 : return success();
893 : }
|